--- /dev/null
+diff -Nurb linux-2.6.22-570/.config.orig linux-2.6.22-591/.config.orig
+--- linux-2.6.22-570/.config.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/.config.orig 2007-12-21 15:54:46.000000000 -0500
+@@ -0,0 +1,1693 @@
++#
++# Automatically generated make config: don't edit
++# Linux kernel version: 2.6.22-prep
++# Fri Dec 21 15:54:46 2007
++#
++CONFIG_X86_32=y
++CONFIG_GENERIC_TIME=y
++CONFIG_CLOCKSOURCE_WATCHDOG=y
++CONFIG_GENERIC_CLOCKEVENTS=y
++CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
++CONFIG_LOCKDEP_SUPPORT=y
++CONFIG_STACKTRACE_SUPPORT=y
++CONFIG_SEMAPHORE_SLEEPERS=y
++CONFIG_X86=y
++CONFIG_MMU=y
++CONFIG_ZONE_DMA=y
++CONFIG_QUICKLIST=y
++CONFIG_GENERIC_ISA_DMA=y
++CONFIG_GENERIC_IOMAP=y
++CONFIG_GENERIC_BUG=y
++CONFIG_GENERIC_HWEIGHT=y
++CONFIG_ARCH_MAY_HAVE_PC_FDC=y
++CONFIG_DMI=y
++CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
++
++#
++# Code maturity level options
++#
++CONFIG_EXPERIMENTAL=y
++CONFIG_LOCK_KERNEL=y
++CONFIG_INIT_ENV_ARG_LIMIT=32
++
++#
++# General setup
++#
++CONFIG_LOCALVERSION=""
++CONFIG_LOCALVERSION_AUTO=y
++CONFIG_SWAP=y
++CONFIG_SYSVIPC=y
++CONFIG_SYSVIPC_SYSCTL=y
++CONFIG_POSIX_MQUEUE=y
++# CONFIG_BSD_PROCESS_ACCT is not set
++# CONFIG_TASKSTATS is not set
++# CONFIG_USER_NS is not set
++# CONFIG_AUDIT is not set
++CONFIG_IKCONFIG=y
++CONFIG_IKCONFIG_PROC=y
++CONFIG_LOG_BUF_SHIFT=18
++CONFIG_OOM_PANIC=y
++# CONFIG_CONTAINER_DEBUG is not set
++# CONFIG_CPUSETS is not set
++CONFIG_SYSFS_DEPRECATED=y
++# CONFIG_CONTAINER_CPUACCT is not set
++# CONFIG_CONTAINER_NS is not set
++# CONFIG_RELAY is not set
++CONFIG_BLK_DEV_INITRD=y
++CONFIG_INITRAMFS_SOURCE=""
++CONFIG_CC_OPTIMIZE_FOR_SIZE=y
++CONFIG_SYSCTL=y
++# CONFIG_EMBEDDED is not set
++CONFIG_UID16=y
++CONFIG_SYSCTL_SYSCALL=y
++CONFIG_KALLSYMS=y
++CONFIG_KALLSYMS_ALL=y
++# CONFIG_KALLSYMS_EXTRA_PASS is not set
++CONFIG_HOTPLUG=y
++CONFIG_PRINTK=y
++CONFIG_BUG=y
++CONFIG_ELF_CORE=y
++CONFIG_BASE_FULL=y
++CONFIG_FUTEX=y
++CONFIG_ANON_INODES=y
++CONFIG_EPOLL=y
++CONFIG_SIGNALFD=y
++CONFIG_EVENTFD=y
++CONFIG_SHMEM=y
++CONFIG_VM_EVENT_COUNTERS=y
++CONFIG_SLAB=y
++# CONFIG_SLUB is not set
++# CONFIG_SLOB is not set
++CONFIG_PROC_SMAPS=y
++CONFIG_PROC_CLEAR_REFS=y
++CONFIG_PROC_PAGEMAP=y
++CONFIG_RT_MUTEXES=y
++# CONFIG_TINY_SHMEM is not set
++CONFIG_BASE_SMALL=0
++CONFIG_PAGE_GROUP_BY_MOBILITY=y
++
++#
++# Loadable module support
++#
++CONFIG_MODULES=y
++CONFIG_MODULE_UNLOAD=y
++CONFIG_MODULE_FORCE_UNLOAD=y
++# CONFIG_MODVERSIONS is not set
++# CONFIG_MODULE_SRCVERSION_ALL is not set
++# CONFIG_KMOD is not set
++CONFIG_STOP_MACHINE=y
++
++#
++# Block layer
++#
++CONFIG_BLOCK=y
++CONFIG_LBD=y
++# CONFIG_BLK_DEV_IO_TRACE is not set
++# CONFIG_LSF is not set
++
++#
++# IO Schedulers
++#
++CONFIG_IOSCHED_NOOP=y
++CONFIG_IOSCHED_AS=y
++CONFIG_IOSCHED_DEADLINE=y
++CONFIG_IOSCHED_CFQ=y
++CONFIG_DEFAULT_AS=y
++# CONFIG_DEFAULT_DEADLINE is not set
++# CONFIG_DEFAULT_CFQ is not set
++# CONFIG_DEFAULT_NOOP is not set
++CONFIG_DEFAULT_IOSCHED="anticipatory"
++
++#
++# Processor type and features
++#
++# CONFIG_TICK_ONESHOT is not set
++# CONFIG_NO_HZ is not set
++# CONFIG_HIGH_RES_TIMERS is not set
++CONFIG_SMP=y
++# CONFIG_X86_PC is not set
++# CONFIG_X86_ELAN is not set
++# CONFIG_X86_VOYAGER is not set
++# CONFIG_X86_NUMAQ is not set
++# CONFIG_X86_SUMMIT is not set
++# CONFIG_X86_BIGSMP is not set
++# CONFIG_X86_VISWS is not set
++CONFIG_X86_GENERICARCH=y
++# CONFIG_X86_ES7000 is not set
++# CONFIG_PARAVIRT is not set
++CONFIG_X86_CYCLONE_TIMER=y
++# CONFIG_M386 is not set
++# CONFIG_M486 is not set
++# CONFIG_M586 is not set
++# CONFIG_M586TSC is not set
++# CONFIG_M586MMX is not set
++# CONFIG_M686 is not set
++# CONFIG_MPENTIUMII is not set
++CONFIG_MPENTIUMIII=y
++# CONFIG_MPENTIUMM is not set
++# CONFIG_MCORE2 is not set
++# CONFIG_MPENTIUM4 is not set
++# CONFIG_MK6 is not set
++# CONFIG_MK7 is not set
++# CONFIG_MK8 is not set
++# CONFIG_MCRUSOE is not set
++# CONFIG_MEFFICEON is not set
++# CONFIG_MWINCHIPC6 is not set
++# CONFIG_MWINCHIP2 is not set
++# CONFIG_MWINCHIP3D is not set
++# CONFIG_MGEODEGX1 is not set
++# CONFIG_MGEODE_LX is not set
++# CONFIG_MCYRIXIII is not set
++# CONFIG_MVIAC3_2 is not set
++# CONFIG_MVIAC7 is not set
++CONFIG_X86_GENERIC=y
++CONFIG_X86_CMPXCHG=y
++CONFIG_X86_L1_CACHE_SHIFT=7
++CONFIG_X86_XADD=y
++CONFIG_RWSEM_XCHGADD_ALGORITHM=y
++# CONFIG_ARCH_HAS_ILOG2_U32 is not set
++# CONFIG_ARCH_HAS_ILOG2_U64 is not set
++CONFIG_GENERIC_CALIBRATE_DELAY=y
++CONFIG_X86_WP_WORKS_OK=y
++CONFIG_X86_INVLPG=y
++CONFIG_X86_BSWAP=y
++CONFIG_X86_POPAD_OK=y
++CONFIG_X86_GOOD_APIC=y
++CONFIG_X86_INTEL_USERCOPY=y
++CONFIG_X86_USE_PPRO_CHECKSUM=y
++CONFIG_X86_TSC=y
++CONFIG_X86_CMOV=y
++CONFIG_X86_MINIMUM_CPU_MODEL=4
++CONFIG_HPET_TIMER=y
++CONFIG_HPET_EMULATE_RTC=y
++CONFIG_NR_CPUS=32
++CONFIG_SCHED_SMT=y
++CONFIG_SCHED_MC=y
++# CONFIG_PREEMPT_NONE is not set
++CONFIG_PREEMPT_VOLUNTARY=y
++# CONFIG_PREEMPT is not set
++CONFIG_PREEMPT_BKL=y
++CONFIG_X86_LOCAL_APIC=y
++CONFIG_X86_IO_APIC=y
++CONFIG_X86_MCE=y
++CONFIG_X86_MCE_NONFATAL=y
++CONFIG_X86_MCE_P4THERMAL=y
++CONFIG_VM86=y
++# CONFIG_TOSHIBA is not set
++# CONFIG_I8K is not set
++# CONFIG_X86_REBOOTFIXUPS is not set
++CONFIG_MICROCODE=y
++CONFIG_MICROCODE_OLD_INTERFACE=y
++CONFIG_X86_MSR=y
++CONFIG_X86_CPUID=y
++
++#
++# Firmware Drivers
++#
++# CONFIG_EDD is not set
++# CONFIG_DELL_RBU is not set
++# CONFIG_DCDBAS is not set
++# CONFIG_NOHIGHMEM is not set
++CONFIG_HIGHMEM4G=y
++# CONFIG_HIGHMEM64G is not set
++CONFIG_PAGE_OFFSET=0xC0000000
++CONFIG_HIGHMEM=y
++CONFIG_ARCH_POPULATES_NODE_MAP=y
++CONFIG_SELECT_MEMORY_MODEL=y
++CONFIG_FLATMEM_MANUAL=y
++# CONFIG_DISCONTIGMEM_MANUAL is not set
++# CONFIG_SPARSEMEM_MANUAL is not set
++CONFIG_FLATMEM=y
++CONFIG_FLAT_NODE_MEM_MAP=y
++# CONFIG_SPARSEMEM_STATIC is not set
++CONFIG_SPLIT_PTLOCK_CPUS=4
++CONFIG_RESOURCES_64BIT=y
++CONFIG_ZONE_DMA_FLAG=1
++CONFIG_NR_QUICK=1
++# CONFIG_HIGHPTE is not set
++# CONFIG_MATH_EMULATION is not set
++CONFIG_MTRR=y
++# CONFIG_EFI is not set
++# CONFIG_IRQBALANCE is not set
++CONFIG_SECCOMP=y
++# CONFIG_HZ_100 is not set
++CONFIG_HZ_250=y
++# CONFIG_HZ_300 is not set
++# CONFIG_HZ_1000 is not set
++CONFIG_HZ=250
++CONFIG_KEXEC=y
++# CONFIG_CRASH_DUMP is not set
++CONFIG_PHYSICAL_START=0x100000
++# CONFIG_RELOCATABLE is not set
++CONFIG_PHYSICAL_ALIGN=0x100000
++# CONFIG_HOTPLUG_CPU is not set
++CONFIG_COMPAT_VDSO=y
++CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
++
++#
++# Power management options (ACPI, APM)
++#
++CONFIG_PM=y
++CONFIG_PM_LEGACY=y
++# CONFIG_PM_DEBUG is not set
++CONFIG_PM_SYSFS_DEPRECATED=y
++
++#
++# ACPI (Advanced Configuration and Power Interface) Support
++#
++CONFIG_ACPI=y
++CONFIG_ACPI_PROCFS=y
++CONFIG_ACPI_AC=y
++CONFIG_ACPI_BATTERY=y
++CONFIG_ACPI_BUTTON=y
++CONFIG_ACPI_FAN=y
++# CONFIG_ACPI_DOCK is not set
++CONFIG_ACPI_PROCESSOR=y
++CONFIG_ACPI_THERMAL=y
++# CONFIG_ACPI_ASUS is not set
++# CONFIG_ACPI_TOSHIBA is not set
++CONFIG_ACPI_BLACKLIST_YEAR=2001
++CONFIG_ACPI_DEBUG=y
++# CONFIG_ACPI_DEBUG_FUNC_TRACE is not set
++CONFIG_ACPI_EC=y
++CONFIG_ACPI_POWER=y
++CONFIG_ACPI_SYSTEM=y
++CONFIG_X86_PM_TIMER=y
++# CONFIG_ACPI_CONTAINER is not set
++# CONFIG_ACPI_SBS is not set
++# CONFIG_APM is not set
++
++#
++# CPU Frequency scaling
++#
++CONFIG_CPU_FREQ=y
++CONFIG_CPU_FREQ_TABLE=y
++CONFIG_CPU_FREQ_DEBUG=y
++CONFIG_CPU_FREQ_STAT=y
++# CONFIG_CPU_FREQ_STAT_DETAILS is not set
++CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
++# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
++CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
++# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
++CONFIG_CPU_FREQ_GOV_USERSPACE=y
++CONFIG_CPU_FREQ_GOV_ONDEMAND=y
++# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
++
++#
++# CPUFreq processor drivers
++#
++CONFIG_X86_ACPI_CPUFREQ=y
++# CONFIG_X86_POWERNOW_K6 is not set
++# CONFIG_X86_POWERNOW_K7 is not set
++CONFIG_X86_POWERNOW_K8=y
++CONFIG_X86_POWERNOW_K8_ACPI=y
++# CONFIG_X86_GX_SUSPMOD is not set
++# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
++# CONFIG_X86_SPEEDSTEP_ICH is not set
++# CONFIG_X86_SPEEDSTEP_SMI is not set
++# CONFIG_X86_P4_CLOCKMOD is not set
++# CONFIG_X86_CPUFREQ_NFORCE2 is not set
++# CONFIG_X86_LONGRUN is not set
++# CONFIG_X86_LONGHAUL is not set
++# CONFIG_X86_E_POWERSAVER is not set
++
++#
++# shared options
++#
++CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
++# CONFIG_X86_SPEEDSTEP_LIB is not set
++
++#
++# CPU idle PM support
++#
++# CONFIG_CPU_IDLE is not set
++
++#
++# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
++#
++CONFIG_PCI=y
++# CONFIG_PCI_GOBIOS is not set
++# CONFIG_PCI_GOMMCONFIG is not set
++# CONFIG_PCI_GODIRECT is not set
++CONFIG_PCI_GOANY=y
++CONFIG_PCI_BIOS=y
++CONFIG_PCI_DIRECT=y
++CONFIG_PCI_MMCONFIG=y
++# CONFIG_PCIEPORTBUS is not set
++CONFIG_ARCH_SUPPORTS_MSI=y
++CONFIG_PCI_MSI=y
++# CONFIG_PCI_DEBUG is not set
++# CONFIG_HT_IRQ is not set
++CONFIG_ISA_DMA_API=y
++# CONFIG_ISA is not set
++# CONFIG_MCA is not set
++# CONFIG_SCx200 is not set
++
++#
++# PCCARD (PCMCIA/CardBus) support
++#
++# CONFIG_PCCARD is not set
++# CONFIG_HOTPLUG_PCI is not set
++
++#
++# Executable file formats
++#
++CONFIG_BINFMT_ELF=y
++# CONFIG_BINFMT_AOUT is not set
++# CONFIG_BINFMT_MISC is not set
++
++#
++# Networking
++#
++CONFIG_NET=y
++
++#
++# Networking options
++#
++# CONFIG_NET_NS is not set
++CONFIG_PACKET=y
++# CONFIG_PACKET_MMAP is not set
++CONFIG_UNIX=y
++CONFIG_XFRM=y
++# CONFIG_XFRM_USER is not set
++# CONFIG_XFRM_SUB_POLICY is not set
++# CONFIG_XFRM_MIGRATE is not set
++# CONFIG_NET_KEY is not set
++CONFIG_INET=y
++CONFIG_IP_MULTICAST=y
++# CONFIG_IP_ADVANCED_ROUTER is not set
++CONFIG_IP_FIB_HASH=y
++CONFIG_IP_PNP=y
++CONFIG_IP_PNP_DHCP=y
++# CONFIG_IP_PNP_BOOTP is not set
++# CONFIG_IP_PNP_RARP is not set
++# CONFIG_NET_IPIP is not set
++# CONFIG_NET_IPGRE is not set
++# CONFIG_IP_MROUTE is not set
++# CONFIG_ARPD is not set
++# CONFIG_SYN_COOKIES is not set
++# CONFIG_INET_AH is not set
++# CONFIG_INET_ESP is not set
++# CONFIG_INET_IPCOMP is not set
++# CONFIG_INET_XFRM_TUNNEL is not set
++CONFIG_INET_TUNNEL=y
++CONFIG_INET_XFRM_MODE_TRANSPORT=y
++CONFIG_INET_XFRM_MODE_TUNNEL=y
++# CONFIG_INET_XFRM_MODE_BEET is not set
++CONFIG_INET_DIAG=y
++CONFIG_INET_TCP_DIAG=y
++# CONFIG_TCP_CONG_ADVANCED is not set
++CONFIG_TCP_CONG_CUBIC=y
++CONFIG_DEFAULT_TCP_CONG="cubic"
++# CONFIG_TCP_MD5SIG is not set
++# CONFIG_IP_VS is not set
++# CONFIG_ICMP_IPOD is not set
++CONFIG_IPV6=y
++# CONFIG_IPV6_PRIVACY is not set
++# CONFIG_IPV6_ROUTER_PREF is not set
++# CONFIG_IPV6_OPTIMISTIC_DAD is not set
++# CONFIG_INET6_AH is not set
++# CONFIG_INET6_ESP is not set
++# CONFIG_INET6_IPCOMP is not set
++# CONFIG_IPV6_MIP6 is not set
++# CONFIG_INET6_XFRM_TUNNEL is not set
++# CONFIG_INET6_TUNNEL is not set
++CONFIG_INET6_XFRM_MODE_TRANSPORT=y
++CONFIG_INET6_XFRM_MODE_TUNNEL=y
++# CONFIG_INET6_XFRM_MODE_BEET is not set
++# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
++CONFIG_IPV6_SIT=y
++# CONFIG_IPV6_TUNNEL is not set
++# CONFIG_IPV6_MULTIPLE_TABLES is not set
++# CONFIG_NETWORK_SECMARK is not set
++CONFIG_NETFILTER=y
++# CONFIG_NETFILTER_DEBUG is not set
++
++#
++# Core Netfilter Configuration
++#
++# CONFIG_NETFILTER_NETLINK is not set
++CONFIG_NF_CONNTRACK_ENABLED=m
++CONFIG_NF_CONNTRACK=m
++# CONFIG_NF_CT_ACCT is not set
++# CONFIG_NF_CONNTRACK_MARK is not set
++# CONFIG_NF_CONNTRACK_EVENTS is not set
++# CONFIG_NF_CT_PROTO_SCTP is not set
++# CONFIG_NF_CONNTRACK_AMANDA is not set
++# CONFIG_NF_CONNTRACK_FTP is not set
++# CONFIG_NF_CONNTRACK_H323 is not set
++# CONFIG_NF_CONNTRACK_IRC is not set
++# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
++# CONFIG_NF_CONNTRACK_PPTP is not set
++# CONFIG_NF_CONNTRACK_SANE is not set
++# CONFIG_NF_CONNTRACK_SIP is not set
++# CONFIG_NF_CONNTRACK_TFTP is not set
++CONFIG_NETFILTER_XTABLES=m
++# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
++# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
++# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
++# CONFIG_NETFILTER_XT_TARGET_MARK is not set
++# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
++# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
++# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
++# CONFIG_NETFILTER_XT_TARGET_SETXID is not set
++# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
++# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
++# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set
++# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
++# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
++# CONFIG_NETFILTER_XT_MATCH_DSCP is not set
++# CONFIG_NETFILTER_XT_MATCH_ESP is not set
++# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
++# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
++# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set
++# CONFIG_NETFILTER_XT_MATCH_MAC is not set
++# CONFIG_NETFILTER_XT_MATCH_MARK is not set
++# CONFIG_NETFILTER_XT_MATCH_POLICY is not set
++# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set
++# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
++# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
++# CONFIG_NETFILTER_XT_MATCH_REALM is not set
++# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
++# CONFIG_NETFILTER_XT_MATCH_STATE is not set
++# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
++# CONFIG_NETFILTER_XT_MATCH_STRING is not set
++# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
++# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
++
++#
++# IP: Netfilter Configuration
++#
++CONFIG_NF_CONNTRACK_IPV4=m
++CONFIG_NF_CONNTRACK_PROC_COMPAT=y
++# CONFIG_IP_NF_QUEUE is not set
++CONFIG_IP_NF_IPTABLES=m
++# CONFIG_IP_NF_MATCH_IPRANGE is not set
++# CONFIG_IP_NF_MATCH_TOS is not set
++# CONFIG_IP_NF_MATCH_RECENT is not set
++# CONFIG_IP_NF_MATCH_ECN is not set
++# CONFIG_IP_NF_MATCH_AH is not set
++# CONFIG_IP_NF_MATCH_TTL is not set
++# CONFIG_IP_NF_MATCH_OWNER is not set
++# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
++CONFIG_IP_NF_FILTER=m
++# CONFIG_IP_NF_TARGET_REJECT is not set
++# CONFIG_IP_NF_TARGET_LOG is not set
++# CONFIG_IP_NF_TARGET_ULOG is not set
++CONFIG_NF_NAT=m
++CONFIG_NF_NAT_NEEDED=y
++# CONFIG_IP_NF_TARGET_MASQUERADE is not set
++# CONFIG_IP_NF_TARGET_REDIRECT is not set
++# CONFIG_IP_NF_TARGET_NETMAP is not set
++# CONFIG_IP_NF_TARGET_SAME is not set
++# CONFIG_NF_NAT_SNMP_BASIC is not set
++# CONFIG_NF_NAT_FTP is not set
++# CONFIG_NF_NAT_IRC is not set
++# CONFIG_NF_NAT_TFTP is not set
++# CONFIG_NF_NAT_AMANDA is not set
++# CONFIG_NF_NAT_PPTP is not set
++# CONFIG_NF_NAT_H323 is not set
++# CONFIG_NF_NAT_SIP is not set
++CONFIG_IP_NF_MANGLE=m
++# CONFIG_IP_NF_TARGET_TOS is not set
++# CONFIG_IP_NF_TARGET_ECN is not set
++# CONFIG_IP_NF_TARGET_TTL is not set
++# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
++# CONFIG_IP_NF_RAW is not set
++# CONFIG_IP_NF_ARPTABLES is not set
++# CONFIG_IP_NF_SET is not set
++
++#
++# IPv6: Netfilter Configuration (EXPERIMENTAL)
++#
++# CONFIG_NF_CONNTRACK_IPV6 is not set
++# CONFIG_IP6_NF_QUEUE is not set
++# CONFIG_IP6_NF_IPTABLES is not set
++# CONFIG_IP_DCCP is not set
++# CONFIG_IP_SCTP is not set
++# CONFIG_TIPC is not set
++# CONFIG_ATM is not set
++# CONFIG_BRIDGE is not set
++# CONFIG_VLAN_8021Q is not set
++# CONFIG_DECNET is not set
++# CONFIG_LLC2 is not set
++# CONFIG_IPX is not set
++# CONFIG_ATALK is not set
++# CONFIG_X25 is not set
++# CONFIG_LAPB is not set
++# CONFIG_ECONET is not set
++# CONFIG_WAN_ROUTER is not set
++
++#
++# QoS and/or fair queueing
++#
++# CONFIG_NET_SCHED is not set
++
++#
++# Network testing
++#
++CONFIG_NET_PKTGEN=m
++# CONFIG_NET_TCPPROBE is not set
++# CONFIG_HAMRADIO is not set
++# CONFIG_IRDA is not set
++# CONFIG_BT is not set
++# CONFIG_AF_RXRPC is not set
++
++#
++# Wireless
++#
++# CONFIG_CFG80211 is not set
++# CONFIG_WIRELESS_EXT is not set
++# CONFIG_MAC80211 is not set
++# CONFIG_IEEE80211 is not set
++# CONFIG_RFKILL is not set
++
++#
++# Device Drivers
++#
++
++#
++# Generic Driver Options
++#
++CONFIG_STANDALONE=y
++CONFIG_PREVENT_FIRMWARE_BUILD=y
++CONFIG_FW_LOADER=y
++# CONFIG_DEBUG_DRIVER is not set
++# CONFIG_DEBUG_DEVRES is not set
++# CONFIG_SYS_HYPERVISOR is not set
++
++#
++# Connector - unified userspace <-> kernelspace linker
++#
++CONFIG_CONNECTOR=m
++# CONFIG_MTD is not set
++
++#
++# Parallel port support
++#
++# CONFIG_PARPORT is not set
++
++#
++# Plug and Play support
++#
++CONFIG_PNP=y
++# CONFIG_PNP_DEBUG is not set
++
++#
++# Protocols
++#
++CONFIG_PNPACPI=y
++
++#
++# Block devices
++#
++CONFIG_BLK_DEV_FD=y
++# CONFIG_BLK_CPQ_DA is not set
++# CONFIG_BLK_CPQ_CISS_DA is not set
++# CONFIG_BLK_DEV_DAC960 is not set
++# CONFIG_BLK_DEV_UMEM is not set
++# CONFIG_BLK_DEV_COW_COMMON is not set
++CONFIG_BLK_DEV_LOOP=y
++# CONFIG_BLK_DEV_CRYPTOLOOP is not set
++# CONFIG_BLK_DEV_NBD is not set
++# CONFIG_BLK_DEV_SX8 is not set
++# CONFIG_BLK_DEV_UB is not set
++CONFIG_BLK_DEV_RAM=y
++CONFIG_BLK_DEV_RAM_COUNT=16
++CONFIG_BLK_DEV_RAM_SIZE=4096
++CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
++# CONFIG_CDROM_PKTCDVD is not set
++# CONFIG_ATA_OVER_ETH is not set
++
++#
++# Misc devices
++#
++# CONFIG_IBM_ASM is not set
++# CONFIG_PHANTOM is not set
++# CONFIG_SGI_IOC4 is not set
++# CONFIG_TIFM_CORE is not set
++# CONFIG_SONY_LAPTOP is not set
++# CONFIG_THINKPAD_ACPI is not set
++CONFIG_IDE=y
++CONFIG_BLK_DEV_IDE=y
++
++#
++# Please see Documentation/ide.txt for help/info on IDE drives
++#
++# CONFIG_BLK_DEV_IDE_SATA is not set
++# CONFIG_BLK_DEV_HD_IDE is not set
++CONFIG_BLK_DEV_IDEDISK=y
++CONFIG_IDEDISK_MULTI_MODE=y
++CONFIG_BLK_DEV_IDECD=y
++# CONFIG_BLK_DEV_IDETAPE is not set
++# CONFIG_BLK_DEV_IDEFLOPPY is not set
++# CONFIG_BLK_DEV_IDESCSI is not set
++# CONFIG_BLK_DEV_IDEACPI is not set
++# CONFIG_IDE_TASK_IOCTL is not set
++CONFIG_IDE_PROC_FS=y
++
++#
++# IDE chipset support/bugfixes
++#
++CONFIG_IDE_GENERIC=y
++# CONFIG_BLK_DEV_CMD640 is not set
++# CONFIG_BLK_DEV_IDEPNP is not set
++CONFIG_BLK_DEV_IDEPCI=y
++# CONFIG_IDEPCI_SHARE_IRQ is not set
++CONFIG_IDEPCI_PCIBUS_ORDER=y
++# CONFIG_BLK_DEV_OFFBOARD is not set
++# CONFIG_BLK_DEV_GENERIC is not set
++# CONFIG_BLK_DEV_OPTI621 is not set
++# CONFIG_BLK_DEV_RZ1000 is not set
++CONFIG_BLK_DEV_IDEDMA_PCI=y
++# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
++# CONFIG_IDEDMA_ONLYDISK is not set
++# CONFIG_BLK_DEV_AEC62XX is not set
++# CONFIG_BLK_DEV_ALI15X3 is not set
++CONFIG_BLK_DEV_AMD74XX=y
++# CONFIG_BLK_DEV_ATIIXP is not set
++# CONFIG_BLK_DEV_CMD64X is not set
++# CONFIG_BLK_DEV_TRIFLEX is not set
++# CONFIG_BLK_DEV_CY82C693 is not set
++# CONFIG_BLK_DEV_CS5520 is not set
++# CONFIG_BLK_DEV_CS5530 is not set
++# CONFIG_BLK_DEV_CS5535 is not set
++# CONFIG_BLK_DEV_HPT34X is not set
++# CONFIG_BLK_DEV_HPT366 is not set
++# CONFIG_BLK_DEV_JMICRON is not set
++# CONFIG_BLK_DEV_SC1200 is not set
++CONFIG_BLK_DEV_PIIX=y
++# CONFIG_BLK_DEV_IT8213 is not set
++# CONFIG_BLK_DEV_IT821X is not set
++# CONFIG_BLK_DEV_NS87415 is not set
++# CONFIG_BLK_DEV_PDC202XX_OLD is not set
++# CONFIG_BLK_DEV_PDC202XX_NEW is not set
++# CONFIG_BLK_DEV_SVWKS is not set
++# CONFIG_BLK_DEV_SIIMAGE is not set
++# CONFIG_BLK_DEV_SIS5513 is not set
++# CONFIG_BLK_DEV_SLC90E66 is not set
++# CONFIG_BLK_DEV_TRM290 is not set
++# CONFIG_BLK_DEV_VIA82CXXX is not set
++# CONFIG_BLK_DEV_TC86C001 is not set
++# CONFIG_IDE_ARM is not set
++CONFIG_BLK_DEV_IDEDMA=y
++# CONFIG_IDEDMA_IVB is not set
++# CONFIG_BLK_DEV_HD is not set
++
++#
++# SCSI device support
++#
++# CONFIG_RAID_ATTRS is not set
++CONFIG_SCSI=y
++# CONFIG_SCSI_TGT is not set
++CONFIG_SCSI_NETLINK=y
++# CONFIG_SCSI_PROC_FS is not set
++
++#
++# SCSI support type (disk, tape, CD-ROM)
++#
++CONFIG_BLK_DEV_SD=y
++# CONFIG_CHR_DEV_ST is not set
++# CONFIG_CHR_DEV_OSST is not set
++CONFIG_BLK_DEV_SR=y
++# CONFIG_BLK_DEV_SR_VENDOR is not set
++CONFIG_CHR_DEV_SG=y
++# CONFIG_CHR_DEV_SCH is not set
++
++#
++# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
++#
++# CONFIG_SCSI_MULTI_LUN is not set
++# CONFIG_SCSI_CONSTANTS is not set
++# CONFIG_SCSI_LOGGING is not set
++# CONFIG_SCSI_SCAN_ASYNC is not set
++CONFIG_SCSI_WAIT_SCAN=m
++
++#
++# SCSI Transports
++#
++CONFIG_SCSI_SPI_ATTRS=y
++CONFIG_SCSI_FC_ATTRS=y
++# CONFIG_SCSI_ISCSI_ATTRS is not set
++# CONFIG_SCSI_SAS_ATTRS is not set
++# CONFIG_SCSI_SAS_LIBSAS is not set
++
++#
++# SCSI low-level drivers
++#
++# CONFIG_ISCSI_TCP is not set
++CONFIG_BLK_DEV_3W_XXXX_RAID=y
++# CONFIG_SCSI_3W_9XXX is not set
++# CONFIG_SCSI_ACARD is not set
++# CONFIG_SCSI_AACRAID is not set
++CONFIG_SCSI_AIC7XXX=y
++CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
++CONFIG_AIC7XXX_RESET_DELAY_MS=5000
++CONFIG_AIC7XXX_DEBUG_ENABLE=y
++CONFIG_AIC7XXX_DEBUG_MASK=0
++CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
++# CONFIG_SCSI_AIC7XXX_OLD is not set
++CONFIG_SCSI_AIC79XX=y
++CONFIG_AIC79XX_CMDS_PER_DEVICE=32
++CONFIG_AIC79XX_RESET_DELAY_MS=4000
++# CONFIG_AIC79XX_DEBUG_ENABLE is not set
++CONFIG_AIC79XX_DEBUG_MASK=0
++# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
++# CONFIG_SCSI_AIC94XX is not set
++# CONFIG_SCSI_DPT_I2O is not set
++# CONFIG_SCSI_ADVANSYS is not set
++# CONFIG_SCSI_ARCMSR is not set
++# CONFIG_MEGARAID_NEWGEN is not set
++# CONFIG_MEGARAID_LEGACY is not set
++# CONFIG_MEGARAID_SAS is not set
++# CONFIG_SCSI_HPTIOP is not set
++# CONFIG_SCSI_BUSLOGIC is not set
++# CONFIG_SCSI_DMX3191D is not set
++# CONFIG_SCSI_EATA is not set
++# CONFIG_SCSI_FUTURE_DOMAIN is not set
++# CONFIG_SCSI_GDTH is not set
++# CONFIG_SCSI_IPS is not set
++# CONFIG_SCSI_INITIO is not set
++# CONFIG_SCSI_INIA100 is not set
++# CONFIG_SCSI_STEX is not set
++# CONFIG_SCSI_SYM53C8XX_2 is not set
++# CONFIG_SCSI_IPR is not set
++# CONFIG_SCSI_QLOGIC_1280 is not set
++# CONFIG_SCSI_QLA_FC is not set
++# CONFIG_SCSI_QLA_ISCSI is not set
++# CONFIG_SCSI_LPFC is not set
++# CONFIG_SCSI_DC395x is not set
++# CONFIG_SCSI_DC390T is not set
++# CONFIG_SCSI_NSP32 is not set
++# CONFIG_SCSI_DEBUG is not set
++# CONFIG_SCSI_SRP is not set
++CONFIG_ATA=y
++# CONFIG_ATA_NONSTANDARD is not set
++CONFIG_ATA_ACPI=y
++CONFIG_SATA_AHCI=y
++CONFIG_SATA_SVW=y
++CONFIG_ATA_PIIX=y
++# CONFIG_SATA_MV is not set
++CONFIG_SATA_NV=y
++# CONFIG_PDC_ADMA is not set
++# CONFIG_SATA_QSTOR is not set
++# CONFIG_SATA_PROMISE is not set
++# CONFIG_SATA_SX4 is not set
++CONFIG_SATA_SIL=y
++# CONFIG_SATA_SIL24 is not set
++# CONFIG_SATA_SIS is not set
++# CONFIG_SATA_ULI is not set
++CONFIG_SATA_VIA=y
++# CONFIG_SATA_VITESSE is not set
++# CONFIG_SATA_INIC162X is not set
++# CONFIG_PATA_ALI is not set
++# CONFIG_PATA_AMD is not set
++# CONFIG_PATA_ARTOP is not set
++# CONFIG_PATA_ATIIXP is not set
++# CONFIG_PATA_CMD640_PCI is not set
++# CONFIG_PATA_CMD64X is not set
++# CONFIG_PATA_CS5520 is not set
++# CONFIG_PATA_CS5530 is not set
++# CONFIG_PATA_CS5535 is not set
++# CONFIG_PATA_CYPRESS is not set
++# CONFIG_PATA_EFAR is not set
++# CONFIG_ATA_GENERIC is not set
++# CONFIG_PATA_HPT366 is not set
++# CONFIG_PATA_HPT37X is not set
++# CONFIG_PATA_HPT3X2N is not set
++# CONFIG_PATA_HPT3X3 is not set
++# CONFIG_PATA_IT821X is not set
++# CONFIG_PATA_IT8213 is not set
++# CONFIG_PATA_JMICRON is not set
++# CONFIG_PATA_TRIFLEX is not set
++# CONFIG_PATA_MARVELL is not set
++# CONFIG_PATA_MPIIX is not set
++# CONFIG_PATA_OLDPIIX is not set
++# CONFIG_PATA_NETCELL is not set
++# CONFIG_PATA_NS87410 is not set
++# CONFIG_PATA_OPTI is not set
++# CONFIG_PATA_OPTIDMA is not set
++# CONFIG_PATA_PDC_OLD is not set
++# CONFIG_PATA_RADISYS is not set
++# CONFIG_PATA_RZ1000 is not set
++# CONFIG_PATA_SC1200 is not set
++# CONFIG_PATA_SERVERWORKS is not set
++# CONFIG_PATA_PDC2027X is not set
++# CONFIG_PATA_SIL680 is not set
++# CONFIG_PATA_SIS is not set
++# CONFIG_PATA_VIA is not set
++# CONFIG_PATA_WINBOND is not set
++
++#
++# Multi-device support (RAID and LVM)
++#
++CONFIG_MD=y
++# CONFIG_BLK_DEV_MD is not set
++CONFIG_BLK_DEV_DM=y
++# CONFIG_DM_DEBUG is not set
++# CONFIG_DM_CRYPT is not set
++# CONFIG_DM_SNAPSHOT is not set
++# CONFIG_DM_MIRROR is not set
++# CONFIG_DM_ZERO is not set
++# CONFIG_DM_MULTIPATH is not set
++# CONFIG_DM_DELAY is not set
++# CONFIG_DM_NETLINK is not set
++
++#
++# Fusion MPT device support
++#
++CONFIG_FUSION=y
++CONFIG_FUSION_SPI=y
++# CONFIG_FUSION_FC is not set
++# CONFIG_FUSION_SAS is not set
++CONFIG_FUSION_MAX_SGE=128
++# CONFIG_FUSION_CTL is not set
++
++#
++# IEEE 1394 (FireWire) support
++#
++# CONFIG_FIREWIRE is not set
++CONFIG_IEEE1394=y
++
++#
++# Subsystem Options
++#
++# CONFIG_IEEE1394_VERBOSEDEBUG is not set
++
++#
++# Controllers
++#
++
++#
++# Texas Instruments PCILynx requires I2C
++#
++CONFIG_IEEE1394_OHCI1394=y
++
++#
++# Protocols
++#
++# CONFIG_IEEE1394_VIDEO1394 is not set
++# CONFIG_IEEE1394_SBP2 is not set
++# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
++# CONFIG_IEEE1394_ETH1394 is not set
++# CONFIG_IEEE1394_DV1394 is not set
++CONFIG_IEEE1394_RAWIO=y
++
++#
++# I2O device support
++#
++# CONFIG_I2O is not set
++# CONFIG_MACINTOSH_DRIVERS is not set
++
++#
++# Network device support
++#
++CONFIG_NETDEVICES=y
++# CONFIG_DUMMY is not set
++# CONFIG_BONDING is not set
++# CONFIG_EQUALIZER is not set
++CONFIG_TUN=m
++# CONFIG_ETUN is not set
++# CONFIG_NET_SB1000 is not set
++# CONFIG_ARCNET is not set
++# CONFIG_PHYLIB is not set
++
++#
++# Ethernet (10 or 100Mbit)
++#
++CONFIG_NET_ETHERNET=y
++CONFIG_MII=y
++# CONFIG_HAPPYMEAL is not set
++# CONFIG_SUNGEM is not set
++# CONFIG_CASSINI is not set
++# CONFIG_NET_VENDOR_3COM is not set
++
++#
++# Tulip family network device support
++#
++CONFIG_NET_TULIP=y
++# CONFIG_DE2104X is not set
++CONFIG_TULIP=y
++# CONFIG_TULIP_MWI is not set
++# CONFIG_TULIP_MMIO is not set
++# CONFIG_TULIP_NAPI is not set
++# CONFIG_DE4X5 is not set
++# CONFIG_WINBOND_840 is not set
++# CONFIG_DM9102 is not set
++# CONFIG_ULI526X is not set
++# CONFIG_HP100 is not set
++CONFIG_NET_PCI=y
++# CONFIG_PCNET32 is not set
++# CONFIG_AMD8111_ETH is not set
++# CONFIG_ADAPTEC_STARFIRE is not set
++CONFIG_B44=y
++CONFIG_FORCEDETH=y
++# CONFIG_FORCEDETH_NAPI is not set
++# CONFIG_DGRS is not set
++# CONFIG_EEPRO100 is not set
++CONFIG_E100=y
++# CONFIG_FEALNX is not set
++# CONFIG_NATSEMI is not set
++# CONFIG_NE2K_PCI is not set
++CONFIG_8139CP=y
++CONFIG_8139TOO=y
++# CONFIG_8139TOO_PIO is not set
++# CONFIG_8139TOO_TUNE_TWISTER is not set
++# CONFIG_8139TOO_8129 is not set
++# CONFIG_8139_OLD_RX_RESET is not set
++# CONFIG_SIS900 is not set
++# CONFIG_EPIC100 is not set
++# CONFIG_SUNDANCE is not set
++# CONFIG_TLAN is not set
++# CONFIG_VIA_RHINE is not set
++# CONFIG_SC92031 is not set
++CONFIG_NETDEV_1000=y
++# CONFIG_ACENIC is not set
++# CONFIG_DL2K is not set
++CONFIG_E1000=y
++# CONFIG_E1000_NAPI is not set
++# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
++# CONFIG_E1000E is not set
++# CONFIG_NS83820 is not set
++# CONFIG_HAMACHI is not set
++# CONFIG_YELLOWFIN is not set
++CONFIG_R8169=y
++# CONFIG_R8169_NAPI is not set
++# CONFIG_SIS190 is not set
++# CONFIG_SKGE is not set
++CONFIG_SKY2=y
++# CONFIG_SK98LIN is not set
++# CONFIG_VIA_VELOCITY is not set
++CONFIG_TIGON3=y
++CONFIG_BNX2=y
++# CONFIG_QLA3XXX is not set
++# CONFIG_ATL1 is not set
++CONFIG_NETDEV_10000=y
++# CONFIG_CHELSIO_T1 is not set
++# CONFIG_CHELSIO_T3 is not set
++# CONFIG_IXGB is not set
++# CONFIG_S2IO is not set
++# CONFIG_MYRI10GE is not set
++# CONFIG_NETXEN_NIC is not set
++# CONFIG_MLX4_CORE is not set
++# CONFIG_TR is not set
++
++#
++# Wireless LAN
++#
++# CONFIG_WLAN_PRE80211 is not set
++# CONFIG_WLAN_80211 is not set
++
++#
++# USB Network Adapters
++#
++# CONFIG_USB_CATC is not set
++# CONFIG_USB_KAWETH is not set
++# CONFIG_USB_PEGASUS is not set
++# CONFIG_USB_RTL8150 is not set
++# CONFIG_USB_USBNET_MII is not set
++# CONFIG_USB_USBNET is not set
++# CONFIG_WAN is not set
++# CONFIG_FDDI is not set
++# CONFIG_HIPPI is not set
++CONFIG_PPP=m
++# CONFIG_PPP_MULTILINK is not set
++# CONFIG_PPP_FILTER is not set
++# CONFIG_PPP_ASYNC is not set
++# CONFIG_PPP_SYNC_TTY is not set
++# CONFIG_PPP_DEFLATE is not set
++# CONFIG_PPP_BSDCOMP is not set
++# CONFIG_PPP_MPPE is not set
++# CONFIG_PPPOE is not set
++# CONFIG_SLIP is not set
++CONFIG_SLHC=m
++# CONFIG_NET_FC is not set
++# CONFIG_SHAPER is not set
++CONFIG_NETCONSOLE=y
++CONFIG_NETPOLL=y
++# CONFIG_NETPOLL_TRAP is not set
++CONFIG_NET_POLL_CONTROLLER=y
++
++#
++# ISDN subsystem
++#
++# CONFIG_ISDN is not set
++
++#
++# Telephony Support
++#
++# CONFIG_PHONE is not set
++
++#
++# Input device support
++#
++CONFIG_INPUT=y
++# CONFIG_INPUT_FF_MEMLESS is not set
++# CONFIG_INPUT_POLLDEV is not set
++
++#
++# Userland interfaces
++#
++CONFIG_INPUT_MOUSEDEV=y
++CONFIG_INPUT_MOUSEDEV_PSAUX=y
++CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
++CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
++# CONFIG_INPUT_JOYDEV is not set
++# CONFIG_INPUT_TSDEV is not set
++CONFIG_INPUT_EVDEV=y
++# CONFIG_INPUT_EVBUG is not set
++
++#
++# Input Device Drivers
++#
++CONFIG_INPUT_KEYBOARD=y
++CONFIG_KEYBOARD_ATKBD=y
++# CONFIG_KEYBOARD_SUNKBD is not set
++# CONFIG_KEYBOARD_LKKBD is not set
++# CONFIG_KEYBOARD_XTKBD is not set
++# CONFIG_KEYBOARD_NEWTON is not set
++# CONFIG_KEYBOARD_STOWAWAY is not set
++CONFIG_INPUT_MOUSE=y
++CONFIG_MOUSE_PS2=y
++CONFIG_MOUSE_PS2_ALPS=y
++CONFIG_MOUSE_PS2_LOGIPS2PP=y
++CONFIG_MOUSE_PS2_SYNAPTICS=y
++CONFIG_MOUSE_PS2_LIFEBOOK=y
++CONFIG_MOUSE_PS2_TRACKPOINT=y
++# CONFIG_MOUSE_PS2_TOUCHKIT is not set
++# CONFIG_MOUSE_SERIAL is not set
++# CONFIG_MOUSE_APPLETOUCH is not set
++# CONFIG_MOUSE_VSXXXAA is not set
++# CONFIG_INPUT_JOYSTICK is not set
++# CONFIG_INPUT_TABLET is not set
++# CONFIG_INPUT_TOUCHSCREEN is not set
++# CONFIG_INPUT_MISC is not set
++
++#
++# Hardware I/O ports
++#
++CONFIG_SERIO=y
++CONFIG_SERIO_I8042=y
++# CONFIG_SERIO_SERPORT is not set
++# CONFIG_SERIO_CT82C710 is not set
++# CONFIG_SERIO_PCIPS2 is not set
++CONFIG_SERIO_LIBPS2=y
++# CONFIG_SERIO_RAW is not set
++# CONFIG_GAMEPORT is not set
++
++#
++# Character devices
++#
++CONFIG_VT=y
++CONFIG_VT_CONSOLE=y
++CONFIG_HW_CONSOLE=y
++# CONFIG_VT_HW_CONSOLE_BINDING is not set
++# CONFIG_SERIAL_NONSTANDARD is not set
++
++#
++# Serial drivers
++#
++CONFIG_SERIAL_8250=y
++CONFIG_SERIAL_8250_CONSOLE=y
++CONFIG_SERIAL_8250_PCI=y
++CONFIG_SERIAL_8250_PNP=y
++CONFIG_SERIAL_8250_NR_UARTS=4
++CONFIG_SERIAL_8250_RUNTIME_UARTS=4
++# CONFIG_SERIAL_8250_EXTENDED is not set
++
++#
++# Non-8250 serial port support
++#
++CONFIG_SERIAL_CORE=y
++CONFIG_SERIAL_CORE_CONSOLE=y
++# CONFIG_SERIAL_JSM is not set
++CONFIG_UNIX98_PTYS=y
++CONFIG_LEGACY_PTYS=y
++CONFIG_LEGACY_PTY_COUNT=256
++
++#
++# IPMI
++#
++# CONFIG_IPMI_HANDLER is not set
++# CONFIG_WATCHDOG is not set
++CONFIG_HW_RANDOM=y
++CONFIG_HW_RANDOM_INTEL=y
++CONFIG_HW_RANDOM_AMD=y
++CONFIG_HW_RANDOM_GEODE=y
++CONFIG_HW_RANDOM_VIA=y
++# CONFIG_NVRAM is not set
++CONFIG_RTC=y
++# CONFIG_R3964 is not set
++# CONFIG_APPLICOM is not set
++# CONFIG_SONYPI is not set
++# CONFIG_AGP is not set
++# CONFIG_DRM is not set
++# CONFIG_MWAVE is not set
++# CONFIG_PC8736x_GPIO is not set
++# CONFIG_NSC_GPIO is not set
++# CONFIG_CS5535_GPIO is not set
++CONFIG_RAW_DRIVER=y
++CONFIG_MAX_RAW_DEVS=256
++CONFIG_HPET=y
++# CONFIG_HPET_RTC_IRQ is not set
++CONFIG_HPET_MMAP=y
++CONFIG_HANGCHECK_TIMER=y
++
++#
++# TPM devices
++#
++# CONFIG_TCG_TPM is not set
++# CONFIG_TELCLOCK is not set
++CONFIG_DEVPORT=y
++# CONFIG_I2C is not set
++
++#
++# SPI support
++#
++# CONFIG_SPI is not set
++# CONFIG_SPI_MASTER is not set
++
++#
++# Dallas's 1-wire bus
++#
++# CONFIG_W1 is not set
++# CONFIG_HWMON is not set
++
++#
++# Multifunction device drivers
++#
++# CONFIG_MFD_SM501 is not set
++
++#
++# Multimedia devices
++#
++# CONFIG_VIDEO_DEV is not set
++# CONFIG_DVB_CORE is not set
++CONFIG_DAB=y
++# CONFIG_USB_DABUSB is not set
++
++#
++# Graphics support
++#
++# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
++
++#
++# Display device support
++#
++# CONFIG_DISPLAY_SUPPORT is not set
++# CONFIG_VGASTATE is not set
++CONFIG_VIDEO_OUTPUT_CONTROL=m
++# CONFIG_FB is not set
++
++#
++# Console display driver support
++#
++CONFIG_VGA_CONSOLE=y
++CONFIG_VGACON_SOFT_SCROLLBACK=y
++CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
++CONFIG_VIDEO_SELECT=y
++CONFIG_DUMMY_CONSOLE=y
++
++#
++# Sound
++#
++CONFIG_SOUND=y
++
++#
++# Advanced Linux Sound Architecture
++#
++# CONFIG_SND is not set
++
++#
++# Open Sound System
++#
++CONFIG_SOUND_PRIME=y
++# CONFIG_OSS_OBSOLETE is not set
++# CONFIG_SOUND_TRIDENT is not set
++# CONFIG_SOUND_MSNDCLAS is not set
++# CONFIG_SOUND_MSNDPIN is not set
++# CONFIG_SOUND_OSS is not set
++
++#
++# HID Devices
++#
++CONFIG_HID=y
++# CONFIG_HID_DEBUG is not set
++
++#
++# USB Input Devices
++#
++CONFIG_USB_HID=y
++# CONFIG_USB_HIDINPUT_POWERBOOK is not set
++# CONFIG_HID_FF is not set
++# CONFIG_USB_HIDDEV is not set
++
++#
++# USB support
++#
++CONFIG_USB_ARCH_HAS_HCD=y
++CONFIG_USB_ARCH_HAS_OHCI=y
++CONFIG_USB_ARCH_HAS_EHCI=y
++CONFIG_USB=y
++# CONFIG_USB_DEBUG is not set
++
++#
++# Miscellaneous USB options
++#
++CONFIG_USB_DEVICEFS=y
++CONFIG_USB_DEVICE_CLASS=y
++# CONFIG_USB_DYNAMIC_MINORS is not set
++# CONFIG_USB_SUSPEND is not set
++# CONFIG_USB_OTG is not set
++
++#
++# USB Host Controller Drivers
++#
++CONFIG_USB_EHCI_HCD=y
++# CONFIG_USB_EHCI_SPLIT_ISO is not set
++# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
++# CONFIG_USB_EHCI_TT_NEWSCHED is not set
++# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
++# CONFIG_USB_ISP116X_HCD is not set
++CONFIG_USB_OHCI_HCD=y
++# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
++# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
++CONFIG_USB_OHCI_LITTLE_ENDIAN=y
++CONFIG_USB_UHCI_HCD=y
++# CONFIG_USB_SL811_HCD is not set
++
++#
++# USB Device Class drivers
++#
++# CONFIG_USB_ACM is not set
++CONFIG_USB_PRINTER=y
++
++#
++# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
++#
++
++#
++# may also be needed; see USB_STORAGE Help for more information
++#
++CONFIG_USB_STORAGE=y
++# CONFIG_USB_STORAGE_DEBUG is not set
++# CONFIG_USB_STORAGE_DATAFAB is not set
++# CONFIG_USB_STORAGE_FREECOM is not set
++# CONFIG_USB_STORAGE_ISD200 is not set
++# CONFIG_USB_STORAGE_DPCM is not set
++# CONFIG_USB_STORAGE_USBAT is not set
++# CONFIG_USB_STORAGE_SDDR09 is not set
++# CONFIG_USB_STORAGE_SDDR55 is not set
++# CONFIG_USB_STORAGE_JUMPSHOT is not set
++# CONFIG_USB_STORAGE_ALAUDA is not set
++# CONFIG_USB_STORAGE_KARMA is not set
++# CONFIG_USB_LIBUSUAL is not set
++
++#
++# USB Imaging devices
++#
++# CONFIG_USB_MDC800 is not set
++# CONFIG_USB_MICROTEK is not set
++CONFIG_USB_MON=y
++
++#
++# USB port drivers
++#
++
++#
++# USB Serial Converter support
++#
++# CONFIG_USB_SERIAL is not set
++
++#
++# USB Miscellaneous drivers
++#
++# CONFIG_USB_EMI62 is not set
++# CONFIG_USB_EMI26 is not set
++# CONFIG_USB_ADUTUX is not set
++# CONFIG_USB_AUERSWALD is not set
++# CONFIG_USB_RIO500 is not set
++# CONFIG_USB_LEGOTOWER is not set
++# CONFIG_USB_LCD is not set
++# CONFIG_USB_BERRY_CHARGE is not set
++# CONFIG_USB_LED is not set
++# CONFIG_USB_CYPRESS_CY7C63 is not set
++# CONFIG_USB_CYTHERM is not set
++# CONFIG_USB_PHIDGET is not set
++# CONFIG_USB_IDMOUSE is not set
++# CONFIG_USB_FTDI_ELAN is not set
++# CONFIG_USB_APPLEDISPLAY is not set
++# CONFIG_USB_SISUSBVGA is not set
++# CONFIG_USB_LD is not set
++# CONFIG_USB_TRANCEVIBRATOR is not set
++# CONFIG_USB_IOWARRIOR is not set
++# CONFIG_USB_TEST is not set
++
++#
++# USB DSL modem support
++#
++
++#
++# USB Gadget Support
++#
++# CONFIG_USB_GADGET is not set
++# CONFIG_MMC is not set
++
++#
++# LED devices
++#
++# CONFIG_NEW_LEDS is not set
++
++#
++# LED drivers
++#
++
++#
++# LED Triggers
++#
++
++#
++# InfiniBand support
++#
++# CONFIG_INFINIBAND is not set
++
++#
++# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
++#
++# CONFIG_EDAC is not set
++
++#
++# Real Time Clock
++#
++# CONFIG_RTC_CLASS is not set
++
++#
++# DMA Engine support
++#
++# CONFIG_DMA_ENGINE is not set
++
++#
++# DMA Clients
++#
++
++#
++# DMA Devices
++#
++
++#
++# Virtualization
++#
++# CONFIG_KVM is not set
++
++#
++# File systems
++#
++CONFIG_EXT2_FS=y
++CONFIG_EXT2_FS_XATTR=y
++CONFIG_EXT2_FS_POSIX_ACL=y
++# CONFIG_EXT2_FS_SECURITY is not set
++# CONFIG_EXT2_FS_XIP is not set
++CONFIG_EXT3_FS=y
++CONFIG_EXT3_FS_XATTR=y
++CONFIG_EXT3_FS_POSIX_ACL=y
++# CONFIG_EXT3_FS_SECURITY is not set
++# CONFIG_EXT4DEV_FS is not set
++CONFIG_JBD=y
++# CONFIG_JBD_DEBUG is not set
++CONFIG_FS_MBCACHE=y
++CONFIG_REISERFS_FS=y
++# CONFIG_REISERFS_CHECK is not set
++# CONFIG_REISERFS_PROC_INFO is not set
++CONFIG_REISERFS_FS_XATTR=y
++CONFIG_REISERFS_FS_POSIX_ACL=y
++# CONFIG_REISERFS_FS_SECURITY is not set
++# CONFIG_JFS_FS is not set
++CONFIG_FS_POSIX_ACL=y
++# CONFIG_XFS_FS is not set
++# CONFIG_GFS2_FS is not set
++# CONFIG_OCFS2_FS is not set
++# CONFIG_MINIX_FS is not set
++# CONFIG_ROMFS_FS is not set
++CONFIG_INOTIFY=y
++CONFIG_INOTIFY_USER=y
++# CONFIG_QUOTA is not set
++CONFIG_DNOTIFY=y
++# CONFIG_AUTOFS_FS is not set
++CONFIG_AUTOFS4_FS=y
++CONFIG_FUSE_FS=m
++CONFIG_GENERIC_ACL=y
++
++#
++# CD-ROM/DVD Filesystems
++#
++CONFIG_ISO9660_FS=y
++# CONFIG_JOLIET is not set
++# CONFIG_ZISOFS is not set
++CONFIG_UDF_FS=m
++CONFIG_UDF_NLS=y
++
++#
++# DOS/FAT/NT Filesystems
++#
++CONFIG_FAT_FS=y
++CONFIG_MSDOS_FS=y
++CONFIG_VFAT_FS=y
++CONFIG_FAT_DEFAULT_CODEPAGE=437
++CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
++CONFIG_NTFS_FS=m
++# CONFIG_NTFS_DEBUG is not set
++CONFIG_NTFS_RW=y
++
++#
++# Pseudo filesystems
++#
++CONFIG_PROC_FS=y
++CONFIG_PROC_KCORE=y
++CONFIG_PROC_SYSCTL=y
++CONFIG_SYSFS=y
++CONFIG_TMPFS=y
++CONFIG_TMPFS_POSIX_ACL=y
++CONFIG_HUGETLBFS=y
++CONFIG_HUGETLB_PAGE=y
++CONFIG_RAMFS=y
++# CONFIG_CONFIGFS_FS is not set
++
++#
++# Layered filesystems
++#
++# CONFIG_UNION_FS is not set
++
++#
++# Miscellaneous filesystems
++#
++# CONFIG_ADFS_FS is not set
++# CONFIG_AFFS_FS is not set
++# CONFIG_HFS_FS is not set
++# CONFIG_HFSPLUS_FS is not set
++# CONFIG_BEFS_FS is not set
++# CONFIG_BFS_FS is not set
++# CONFIG_EFS_FS is not set
++# CONFIG_CRAMFS is not set
++# CONFIG_VXFS_FS is not set
++# CONFIG_HPFS_FS is not set
++# CONFIG_QNX4FS_FS is not set
++# CONFIG_SYSV_FS is not set
++# CONFIG_UFS_FS is not set
++
++#
++# Network File Systems
++#
++CONFIG_NFS_FS=y
++CONFIG_NFS_V3=y
++# CONFIG_NFS_V3_ACL is not set
++# CONFIG_NFS_V4 is not set
++# CONFIG_NFS_DIRECTIO is not set
++CONFIG_NFSD=y
++CONFIG_NFSD_V3=y
++# CONFIG_NFSD_V3_ACL is not set
++# CONFIG_NFSD_V4 is not set
++CONFIG_NFSD_TCP=y
++CONFIG_ROOT_NFS=y
++CONFIG_LOCKD=y
++CONFIG_LOCKD_V4=y
++CONFIG_EXPORTFS=y
++CONFIG_NFS_COMMON=y
++CONFIG_SUNRPC=y
++# CONFIG_SUNRPC_BIND34 is not set
++# CONFIG_RPCSEC_GSS_KRB5 is not set
++# CONFIG_RPCSEC_GSS_SPKM3 is not set
++CONFIG_SMB_FS=m
++# CONFIG_SMB_NLS_DEFAULT is not set
++# CONFIG_CIFS is not set
++# CONFIG_NCP_FS is not set
++# CONFIG_CODA_FS is not set
++# CONFIG_AFS_FS is not set
++# CONFIG_9P_FS is not set
++
++#
++# Partition Types
++#
++# CONFIG_PARTITION_ADVANCED is not set
++CONFIG_MSDOS_PARTITION=y
++
++#
++# Native Language Support
++#
++CONFIG_NLS=y
++CONFIG_NLS_DEFAULT="iso8859-1"
++CONFIG_NLS_CODEPAGE_437=y
++# CONFIG_NLS_CODEPAGE_737 is not set
++# CONFIG_NLS_CODEPAGE_775 is not set
++CONFIG_NLS_CODEPAGE_850=y
++CONFIG_NLS_CODEPAGE_852=y
++# CONFIG_NLS_CODEPAGE_855 is not set
++# CONFIG_NLS_CODEPAGE_857 is not set
++# CONFIG_NLS_CODEPAGE_860 is not set
++# CONFIG_NLS_CODEPAGE_861 is not set
++# CONFIG_NLS_CODEPAGE_862 is not set
++# CONFIG_NLS_CODEPAGE_863 is not set
++# CONFIG_NLS_CODEPAGE_864 is not set
++# CONFIG_NLS_CODEPAGE_865 is not set
++# CONFIG_NLS_CODEPAGE_866 is not set
++# CONFIG_NLS_CODEPAGE_869 is not set
++# CONFIG_NLS_CODEPAGE_936 is not set
++# CONFIG_NLS_CODEPAGE_950 is not set
++# CONFIG_NLS_CODEPAGE_932 is not set
++# CONFIG_NLS_CODEPAGE_949 is not set
++# CONFIG_NLS_CODEPAGE_874 is not set
++# CONFIG_NLS_ISO8859_8 is not set
++# CONFIG_NLS_CODEPAGE_1250 is not set
++# CONFIG_NLS_CODEPAGE_1251 is not set
++CONFIG_NLS_ASCII=y
++CONFIG_NLS_ISO8859_1=y
++CONFIG_NLS_ISO8859_2=y
++# CONFIG_NLS_ISO8859_3 is not set
++# CONFIG_NLS_ISO8859_4 is not set
++# CONFIG_NLS_ISO8859_5 is not set
++# CONFIG_NLS_ISO8859_6 is not set
++# CONFIG_NLS_ISO8859_7 is not set
++# CONFIG_NLS_ISO8859_9 is not set
++# CONFIG_NLS_ISO8859_13 is not set
++# CONFIG_NLS_ISO8859_14 is not set
++CONFIG_NLS_ISO8859_15=y
++# CONFIG_NLS_KOI8_R is not set
++# CONFIG_NLS_KOI8_U is not set
++CONFIG_NLS_UTF8=y
++
++#
++# Distributed Lock Manager
++#
++# CONFIG_DLM is not set
++
++#
++# Instrumentation Support
++#
++CONFIG_PROFILING=y
++CONFIG_OPROFILE=y
++CONFIG_KPROBES=y
++
++#
++# Kernel hacking
++#
++CONFIG_TRACE_IRQFLAGS_SUPPORT=y
++CONFIG_PRINTK_TIME=y
++# CONFIG_ENABLE_MUST_CHECK is not set
++CONFIG_MAGIC_SYSRQ=y
++CONFIG_UNUSED_SYMBOLS=y
++# CONFIG_DEBUG_FS is not set
++# CONFIG_HEADERS_CHECK is not set
++CONFIG_DEBUG_KERNEL=y
++# CONFIG_DEBUG_SHIRQ is not set
++CONFIG_DETECT_SOFTLOCKUP=y
++# CONFIG_SCHEDSTATS is not set
++# CONFIG_TIMER_STATS is not set
++# CONFIG_DEBUG_SLAB is not set
++# CONFIG_DEBUG_RT_MUTEXES is not set
++# CONFIG_RT_MUTEX_TESTER is not set
++# CONFIG_DEBUG_SPINLOCK is not set
++# CONFIG_DEBUG_MUTEXES is not set
++# CONFIG_DEBUG_LOCK_ALLOC is not set
++# CONFIG_PROVE_LOCKING is not set
++# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
++# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
++# CONFIG_DEBUG_KOBJECT is not set
++# CONFIG_DEBUG_HIGHMEM is not set
++CONFIG_DEBUG_BUGVERBOSE=y
++CONFIG_DEBUG_INFO=y
++# CONFIG_DEBUG_VM is not set
++# CONFIG_DEBUG_LIST is not set
++# CONFIG_FRAME_POINTER is not set
++# CONFIG_UNWIND_INFO is not set
++# CONFIG_FORCED_INLINING is not set
++# CONFIG_RCU_TORTURE_TEST is not set
++# CONFIG_LKDTM is not set
++# CONFIG_FAULT_INJECTION is not set
++# CONFIG_WANT_EXTRA_DEBUG_INFORMATION is not set
++# CONFIG_KGDB is not set
++CONFIG_EARLY_PRINTK=y
++CONFIG_DEBUG_STACKOVERFLOW=y
++# CONFIG_DEBUG_STACK_USAGE is not set
++# CONFIG_DEBUG_RODATA is not set
++# CONFIG_4KSTACKS is not set
++CONFIG_X86_FIND_SMP_CONFIG=y
++CONFIG_X86_MPPARSE=y
++CONFIG_DOUBLEFAULT=y
++
++#
++# Linux VServer
++#
++CONFIG_VSERVER_FILESHARING=y
++CONFIG_VSERVER_AUTO_LBACK=y
++CONFIG_VSERVER_AUTO_SINGLE=y
++CONFIG_VSERVER_COWBL=y
++# CONFIG_VSERVER_VTIME is not set
++# CONFIG_VSERVER_DEVICE is not set
++CONFIG_VSERVER_PROC_SECURE=y
++CONFIG_VSERVER_HARDCPU=y
++CONFIG_VSERVER_IDLETIME=y
++# CONFIG_VSERVER_IDLELIMIT is not set
++# CONFIG_TAGGING_NONE is not set
++# CONFIG_TAGGING_UID16 is not set
++# CONFIG_TAGGING_GID16 is not set
++CONFIG_TAGGING_ID24=y
++# CONFIG_TAGGING_INTERN is not set
++# CONFIG_TAG_NFSD is not set
++# CONFIG_VSERVER_PRIVACY is not set
++CONFIG_VSERVER_CONTEXTS=256
++CONFIG_VSERVER_WARN=y
++# CONFIG_VSERVER_DEBUG is not set
++CONFIG_VSERVER=y
++
++#
++# Security options
++#
++# CONFIG_KEYS is not set
++# CONFIG_SECURITY is not set
++
++#
++# Cryptographic options
++#
++# CONFIG_CRYPTO is not set
++
++#
++# Library routines
++#
++CONFIG_BITREVERSE=y
++CONFIG_CRC_CCITT=y
++CONFIG_CRC16=y
++# CONFIG_CRC_ITU_T is not set
++CONFIG_CRC32=y
++CONFIG_LIBCRC32C=y
++CONFIG_ZLIB_INFLATE=y
++CONFIG_PLIST=y
++CONFIG_HAS_IOMEM=y
++CONFIG_HAS_IOPORT=y
++CONFIG_HAS_DMA=y
++CONFIG_GENERIC_HARDIRQS=y
++CONFIG_GENERIC_IRQ_PROBE=y
++CONFIG_GENERIC_PENDING_IRQ=y
++CONFIG_X86_SMP=y
++CONFIG_X86_HT=y
++CONFIG_X86_BIOS_REBOOT=y
++CONFIG_X86_TRAMPOLINE=y
++CONFIG_KTIME_SCALAR=y
+diff -Nurb linux-2.6.22-570/Documentation/DocBook/Makefile linux-2.6.22-591/Documentation/DocBook/Makefile
+--- linux-2.6.22-570/Documentation/DocBook/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/DocBook/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -11,7 +11,7 @@
+ procfs-guide.xml writing_usb_driver.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml \
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+- genericirq.xml
++ genericirq.xml kgdb.xml
+
+ ###
+ # The build process is as follows (targets):
+diff -Nurb linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl
+--- linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,250 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
++ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
++
++<book id="kgdbInternals">
++ <bookinfo>
++ <title>KGDB Internals</title>
++
++ <authorgroup>
++ <author>
++ <firstname>Tom</firstname>
++ <surname>Rini</surname>
++ <affiliation>
++ <address>
++ <email>trini@kernel.crashing.org</email>
++ </address>
++ </affiliation>
++ </author>
++ </authorgroup>
++
++ <authorgroup>
++ <author>
++ <firstname>Amit S.</firstname>
++ <surname>Kale</surname>
++ <affiliation>
++ <address>
++ <email>amitkale@linsyssoft.com</email>
++ </address>
++ </affiliation>
++ </author>
++ </authorgroup>
++
++ <copyright>
++ <year>2004-2005</year>
++ <holder>MontaVista Software, Inc.</holder>
++ </copyright>
++ <copyright>
++ <year>2004</year>
++ <holder>Amit S. Kale</holder>
++ </copyright>
++
++ <legalnotice>
++ <para>
++ This file is licensed under the terms of the GNU General Public License
++ version 2. This program is licensed "as is" without any warranty of any
++ kind, whether express or implied.
++ </para>
++
++ </legalnotice>
++ </bookinfo>
++
++<toc></toc>
++ <chapter id="Introduction">
++ <title>Introduction</title>
++ <para>
++ kgdb is a source level debugger for linux kernel. It is used along
++ with gdb to debug a linux kernel. Kernel developers can debug a kernel
++ similar to application programs with the use of kgdb. It makes it
++ possible to place breakpoints in kernel code, step through the code
++ and observe variables.
++ </para>
++ <para>
++ Two machines are required for using kgdb. One of these machines is a
++ development machine and the other is a test machine. The machines are
++ typically connected through a serial line, a null-modem cable which
++ connects their serial ports. It is also possible however, to use an
++ ethernet connection between the machines. The kernel to be debugged
++ runs on the test machine. gdb runs on the development machine. The
++ serial line or ethernet connection is used by gdb to communicate to
++ the kernel being debugged.
++ </para>
++ </chapter>
++ <chapter id="CompilingAKernel">
++ <title>Compiling a kernel</title>
++ <para>
++ To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
++ and then select "KGDB: kernel debugging with remote gdb".
++ </para>
++ <para>
++ The first choice for I/O is <symbol>CONFIG_KGDB_ONLY_MODULES</symbol>.
++ This means that you will only be able to use KGDB after loading a
++ kernel module that defines how you want to be able to talk with
++ KGDB. There are two other choices (more on some architectures) that
++ can be enabled as modules later, if not picked here.
++ </para>
++ <para>The first of these is <symbol>CONFIG_KGDB_8250_NOMODULE</symbol>.
++ This has sub-options such as <symbol>CONFIG_KGDB_SIMPLE_SERIAL</symbol>
++ which toggles choosing the serial port by ttyS number or by specifying
++ a port and IRQ number.
++ </para>
++ <para>
++ The second of these choices on most systems for I/O is
++ <symbol>CONFIG_KGDBOE</symbol>. This requires that the machine to be
++ debugged has an ethernet card which supports the netpoll API, such as
++ the cards supported by <symbol>CONFIG_E100</symbol>. There are no
++ sub-options for this, but a kernel command line option is required.
++ </para>
++ </chapter>
++ <chapter id="BootingTheKernel">
++ <title>Booting the kernel</title>
++ <para>
++ The Kernel command line option <constant>kgdbwait</constant> makes kgdb
++ wait for gdb connection during booting of a kernel. If the
++ <symbol>CONFIG_KGDB_8250</symbol> driver is used (or if applicable,
++ another serial driver) this breakpoint will happen very early on, before
++ console output. If you wish to change serial port information and you
++ have enabled both <symbol>CONFIG_KGDB_8250</symbol> and
++ <symbol>CONFIG_KGDB_SIMPLE_SERIAL</symbol> then you must pass the option
++ <constant>kgdb8250=<io or mmio>,<address>,<baud
++ rate>,<irq></constant> before <constant>kgdbwait</constant>.
++ The values <constant>io</constant> or <constant>mmio</constant> refer to
++ if the address being passed next needs to be memory mapped
++ (<constant>mmio</constant>) or not. The <constant>address</constant> must
++ be passed in hex and is the hardware address and will be remapped if
++ passed as <constant>mmio</constant>. The value
++ <constant>baud rate</constant> and <constant>irq</constant> are base-10.
++ The supported values for <constant>baud rate</constant> are
++ <constant>9600</constant>, <constant>19200</constant>,
++ <constant>38400</constant>, <constant>57600</constant>, and
++ <constant>115200</constant>.
++ </para>
++ <para>
++ To have KGDB stop the kernel and wait, with the compiled values for the
++ serial driver, pass in: <constant>kgdbwait</constant>.
++ </para>
++ <para>
++ To specify the values of the SH SCI(F) serial port at boot:
++ <constant>kgdbsci=0,115200</constant>.
++ </para>
++ <para>
++ To specify the values of the serial port at boot:
++ <constant>kgdb8250=io,3f8,115200,3</constant>.
++ On IA64 this could also be:
++ <constant>kgdb8250=mmio,0xff5e0000,115200,74</constant>
++ And to have KGDB also stop the kernel and wait for GDB to connect, pass in
++ <constant>kgdbwait</constant> after this arguement.
++ </para>
++ <para>
++ To configure the <symbol>CONFIG_KGDBOE</symbol> driver, pass in
++ <constant>kgdboe=[src-port]@<src-ip>/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]</constant>
++ where:
++ <itemizedlist>
++ <listitem><para>src-port (optional): source for UDP packets (defaults to <constant>6443</constant>)</para></listitem>
++ <listitem><para>src-ip: source IP to use (interface address)</para></listitem>
++ <listitem><para>dev (optional): network interface (<constant>eth0</constant>)</para></listitem>
++ <listitem><para>tgt-port (optional): port GDB will use (defaults to <constant>6442</constant>)</para></listitem>
++ <listitem><para>tgt-ip: IP address GDB will be connecting from</para></listitem>
++ <listitem><para>tgt-macaddr (optional): ethernet MAC address for logging agent (default is broadcast)</para></listitem>
++ </itemizedlist>
++ </para>
++ <para>
++ The <symbol>CONFIG_KGDBOE</symbol> driver can be reconfigured at run
++ time, if <symbol>CONFIG_SYSFS</symbol> and
++ <symbol>CONFIG_MODULES</symbol> by echo'ing a new config string to
++ <constant>/sys/module/kgdboe/parameter/kgdboe</constant>. The
++ driver can be unconfigured with the special string
++ <constant>not_configured</constant>.
++ </para>
++ </chapter>
++ <chapter id="ConnectingGDB">
++ <title>Connecting gdb</title>
++ <para>
++ If you have used any of the methods to have KGDB stop and create
++ an initial breakpoint described in the previous chapter, kgdb prints
++ the message "Waiting for connection from remote gdb..." on the console
++ and waits for connection from gdb. At this point you connect gdb to kgdb.
++ </para>
++ <para>
++ Example (serial):
++ </para>
++ <programlisting>
++ % gdb ./vmlinux
++ (gdb) set remotebaud 115200
++ (gdb) target remote /dev/ttyS0
++ </programlisting>
++ <para>
++ Example (ethernet):
++ </para>
++ <programlisting>
++ % gdb ./vmlinux
++ (gdb) target remote udp:192.168.2.2:6443
++ </programlisting>
++ <para>
++ Once connected, you can debug a kernel the way you would debug an
++ application program.
++ </para>
++ </chapter>
++ <chapter id="ArchitectureNotes">
++ <title>Architecture specific notes</title>
++ <para>
++ SuperH: The NMI switch found on some boards can be used to trigger an
++ initial breakpoint. Subsequent triggers do nothing. If console
++ is enabled on the SCI(F) serial port, and that is the port being used
++ for KGDB, then you must trigger a breakpoint via sysrq, NMI, or
++ some other method prior to connecting, or echo a control-c to the
++ serial port. Also, to use the SCI(F) port for KGDB, the
++ <symbol>CONFIG_SERIAL_SH_SCI</symbol> driver must be enabled.
++ </para>
++ </chapter>
++ <chapter id="CommonBackEndReq">
++ <title>The common backend (required)</title>
++ <para>
++ There are a few flags which must be set on every architecture in
++ their <asm/kgdb.h> file. These are:
++ <itemizedlist>
++ <listitem>
++ <para>
++ NUMREGBYTES: The size in bytes of all of the registers, so
++ that we can ensure they will all fit into a packet.
++ </para>
++ <para>
++ BUFMAX: The size in bytes of the buffer GDB will read into.
++ This must be larger than NUMREGBYTES.
++ </para>
++ <para>
++ CACHE_FLUSH_IS_SAFE: Set to one if it always safe to call
++ flush_cache_range or flush_icache_range. On some architectures,
++ these functions may not be safe to call on SMP since we keep other
++ CPUs in a holding pattern.
++ </para>
++ </listitem>
++ </itemizedlist>
++ </para>
++ <para>
++ There are also the following functions for the common backend,
++ found in kernel/kgdb.c that must be supplied by the
++ architecture-specific backend. No weak version of these is provided.
++ </para>
++!Iinclude/linux/kgdb.h
++ </chapter>
++ <chapter id="CommonBackEndOpt">
++ <title>The common backend (optional)</title>
++ <para>
++ These functions are part of the common backend, found in kernel/kgdb.c
++ and are optionally implemented. Some functions (with _hw_ in the name)
++ end up being required on arches which use hardware breakpoints.
++ </para>
++!Ikernel/kgdb.c
++ </chapter>
++ <chapter id="DriverSpecificFunctions">
++ <title>Driver-Specific Functions</title>
++ <para>
++ Some of the I/O drivers have additional functions that can be
++ called, that are specific to the driver. Calls from other places
++ to these functions must be wrapped in #ifdefs for the driver in
++ question.
++ </para>
++!Idrivers/serial/8250_kgdb.c
++ </chapter>
++</book>
+diff -Nurb linux-2.6.22-570/Documentation/accounting/getdelays.c linux-2.6.22-591/Documentation/accounting/getdelays.c
+--- linux-2.6.22-570/Documentation/accounting/getdelays.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/accounting/getdelays.c 2007-12-21 15:36:11.000000000 -0500
+@@ -49,6 +49,7 @@
+ int dbg;
+ int print_delays;
+ int print_io_accounting;
++int print_task_context_switch_counts;
+ __u64 stime, utime;
+
+ #define PRINTF(fmt, arg...) { \
+@@ -195,7 +196,7 @@
+ "IO %15s%15s\n"
+ " %15llu%15llu\n"
+ "MEM %15s%15s\n"
+- " %15llu%15llu\n\n",
++ " %15llu%15llu\n"
+ "count", "real total", "virtual total", "delay total",
+ t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
+ t->cpu_delay_total,
+@@ -204,6 +205,14 @@
+ "count", "delay total", t->swapin_count, t->swapin_delay_total);
+ }
+
++void task_context_switch_counts(struct taskstats *t)
++{
++ printf("\n\nTask %15s%15s\n"
++ " %15lu%15lu\n",
++ "voluntary", "nonvoluntary",
++ t->nvcsw, t->nivcsw);
++}
++
+ void print_ioacct(struct taskstats *t)
+ {
+ printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+@@ -235,7 +244,7 @@
+ struct msgtemplate msg;
+
+ while (1) {
+- c = getopt(argc, argv, "diw:r:m:t:p:vl");
++ c = getopt(argc, argv, "qdiw:r:m:t:p:vl");
+ if (c < 0)
+ break;
+
+@@ -248,6 +257,10 @@
+ printf("printing IO accounting\n");
+ print_io_accounting = 1;
+ break;
++ case 'q':
++ printf("printing task/process context switch rates\n");
++ print_task_context_switch_counts = 1;
++ break;
+ case 'w':
+ logfile = strdup(optarg);
+ printf("write to file %s\n", logfile);
+@@ -389,6 +402,8 @@
+ print_delayacct((struct taskstats *) NLA_DATA(na));
+ if (print_io_accounting)
+ print_ioacct((struct taskstats *) NLA_DATA(na));
++ if (print_task_context_switch_counts)
++ task_context_switch_counts((struct taskstats *) NLA_DATA(na));
+ if (fd) {
+ if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
+ err(1,"write error\n");
+diff -Nurb linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt
+--- linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -22,6 +22,8 @@
+ /* Extended accounting fields end */
+ Their values are collected if CONFIG_TASK_XACCT is set.
+
++4) Per-task and per-thread context switch count statistics
++
+ Future extension should add fields to the end of the taskstats struct, and
+ should not change the relative position of each field within the struct.
+
+@@ -158,4 +160,8 @@
+
+ /* Extended accounting fields end */
+
++4) Per-task and per-thread statistics
++ __u64 nvcsw; /* Context voluntary switch counter */
++ __u64 nivcsw; /* Context involuntary switch counter */
++
+ }
+diff -Nurb linux-2.6.22-570/Documentation/cachetlb.txt linux-2.6.22-591/Documentation/cachetlb.txt
+--- linux-2.6.22-570/Documentation/cachetlb.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/cachetlb.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -253,7 +253,7 @@
+
+ The first of these two routines is invoked after map_vm_area()
+ has installed the page table entries. The second is invoked
+- before unmap_vm_area() deletes the page table entries.
++ before unmap_kernel_range() deletes the page table entries.
+
+ There exists another whole class of cpu cache issues which currently
+ require a whole different set of interfaces to handle properly.
+diff -Nurb linux-2.6.22-570/Documentation/containers.txt linux-2.6.22-591/Documentation/containers.txt
+--- linux-2.6.22-570/Documentation/containers.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/containers.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,543 @@
++ CONTAINERS
++ -------
++
++Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
++
++Original copyright statements from cpusets.txt:
++Portions Copyright (C) 2004 BULL SA.
++Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
++Modified by Paul Jackson <pj@sgi.com>
++Modified by Christoph Lameter <clameter@sgi.com>
++
++CONTENTS:
++=========
++
++1. Containers
++ 1.1 What are containers ?
++ 1.2 Why are containers needed ?
++ 1.3 How are containers implemented ?
++ 1.4 What does notify_on_release do ?
++ 1.5 How do I use containers ?
++2. Usage Examples and Syntax
++ 2.1 Basic Usage
++ 2.2 Attaching processes
++3. Kernel API
++ 3.1 Overview
++ 3.2 Synchronization
++ 3.3 Subsystem API
++4. Questions
++
++1. Containers
++==========
++
++1.1 What are containers ?
++----------------------
++
++Containers provide a mechanism for aggregating/partitioning sets of
++tasks, and all their future children, into hierarchical groups with
++specialized behaviour.
++
++Definitions:
++
++A *container* associates a set of tasks with a set of parameters for one
++or more subsystems.
++
++A *subsystem* is a module that makes use of the task grouping
++facilities provided by containers to treat groups of tasks in
++particular ways. A subsystem is typically a "resource controller" that
++schedules a resource or applies per-container limits, but it may be
++anything that wants to act on a group of processes, e.g. a
++virtualization subsystem.
++
++A *hierarchy* is a set of containers arranged in a tree, such that
++every task in the system is in exactly one of the containers in the
++hierarchy, and a set of subsystems; each subsystem has system-specific
++state attached to each container in the hierarchy. Each hierarchy has
++an instance of the container virtual filesystem associated with it.
++
++At any one time there may be multiple active hierachies of task
++containers. Each hierarchy is a partition of all tasks in the system.
++
++User level code may create and destroy containers by name in an
++instance of the container virtual file system, specify and query to
++which container a task is assigned, and list the task pids assigned to
++a container. Those creations and assignments only affect the hierarchy
++associated with that instance of the container file system.
++
++On their own, the only use for containers is for simple job
++tracking. The intention is that other subsystems hook into the generic
++container support to provide new attributes for containers, such as
++accounting/limiting the resources which processes in a container can
++access. For example, cpusets (see Documentation/cpusets.txt) allows
++you to associate a set of CPUs and a set of memory nodes with the
++tasks in each container.
++
++1.2 Why are containers needed ?
++----------------------------
++
++There are multiple efforts to provide process aggregations in the
++Linux kernel, mainly for resource tracking purposes. Such efforts
++include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
++namespaces. These all require the basic notion of a
++grouping/partitioning of processes, with newly forked processes ending
++in the same group (container) as their parent process.
++
++The kernel container patch provides the minimum essential kernel
++mechanisms required to efficiently implement such groups. It has
++minimal impact on the system fast paths, and provides hooks for
++specific subsystems such as cpusets to provide additional behaviour as
++desired.
++
++Multiple hierarchy support is provided to allow for situations where
++the division of tasks into containers is distinctly different for
++different subsystems - having parallel hierarchies allows each
++hierarchy to be a natural division of tasks, without having to handle
++complex combinations of tasks that would be present if several
++unrelated subsystems needed to be forced into the same tree of
++containers.
++
++At one extreme, each resource controller or subsystem could be in a
++separate hierarchy; at the other extreme, all subsystems
++would be attached to the same hierarchy.
++
++As an example of a scenario (originally proposed by vatsa@in.ibm.com)
++that can benefit from multiple hierarchies, consider a large
++university server with various users - students, professors, system
++tasks etc. The resource planning for this server could be along the
++following lines:
++
++ CPU : Top cpuset
++ / \
++ CPUSet1 CPUSet2
++ | |
++ (Profs) (Students)
++
++ In addition (system tasks) are attached to topcpuset (so
++ that they can run anywhere) with a limit of 20%
++
++ Memory : Professors (50%), students (30%), system (20%)
++
++ Disk : Prof (50%), students (30%), system (20%)
++
++ Network : WWW browsing (20%), Network File System (60%), others (20%)
++ / \
++ Prof (15%) students (5%)
++
++Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
++into NFS network class.
++
++At the same time firefox/lynx will share an appropriate CPU/Memory class
++depending on who launched it (prof/student).
++
++With the ability to classify tasks differently for different resources
++(by putting those resource subsystems in different hierarchies) then
++the admin can easily set up a script which receives exec notifications
++and depending on who is launching the browser he can
++
++ # echo browser_pid > /mnt/<restype>/<userclass>/tasks
++
++With only a single hierarchy, he now would potentially have to create
++a separate container for every browser launched and associate it with
++approp network and other resource class. This may lead to
++proliferation of such containers.
++
++Also lets say that the administrator would like to give enhanced network
++access temporarily to a student's browser (since it is night and the user
++wants to do online gaming :) OR give one of the students simulation
++apps enhanced CPU power,
++
++With ability to write pids directly to resource classes, its just a
++matter of :
++
++ # echo pid > /mnt/network/<new_class>/tasks
++ (after some time)
++ # echo pid > /mnt/network/<orig_class>/tasks
++
++Without this ability, he would have to split the container into
++multiple separate ones and then associate the new containers with the
++new resource classes.
++
++
++
++1.3 How are containers implemented ?
++---------------------------------
++
++Containers extends the kernel as follows:
++
++ - Each task in the system has a reference-counted pointer to a
++ css_group.
++
++ - A css_group contains a set of reference-counted pointers to
++ container_subsys_state objects, one for each container subsystem
++ registered in the system. There is no direct link from a task to
++ the container of which it's a member in each hierarchy, but this
++ can be determined by following pointers through the
++ container_subsys_state objects. This is because accessing the
++ subsystem state is something that's expected to happen frequently
++ and in performance-critical code, whereas operations that require a
++ task's actual container assignments (in particular, moving between
++ containers) are less common. A linked list runs through the cg_list
++ field of each task_struct using the css_group, anchored at
++ css_group->tasks.
++
++ - A container hierarchy filesystem can be mounted for browsing and
++ manipulation from user space.
++
++ - You can list all the tasks (by pid) attached to any container.
++
++The implementation of containers requires a few, simple hooks
++into the rest of the kernel, none in performance critical paths:
++
++ - in init/main.c, to initialize the root containers and initial
++ css_group at system boot.
++
++ - in fork and exit, to attach and detach a task from its css_group.
++
++In addition a new file system, of type "container" may be mounted, to
++enable browsing and modifying the containers presently known to the
++kernel. When mounting a container hierarchy, you may specify a
++comma-separated list of subsystems to mount as the filesystem mount
++options. By default, mounting the container filesystem attempts to
++mount a hierarchy containing all registered subsystems.
++
++If an active hierarchy with exactly the same set of subsystems already
++exists, it will be reused for the new mount. If no existing hierarchy
++matches, and any of the requested subsystems are in use in an existing
++hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
++is activated, associated with the requested subsystems.
++
++It's not currently possible to bind a new subsystem to an active
++container hierarchy, or to unbind a subsystem from an active container
++hierarchy. This may be possible in future, but is fraught with nasty
++error-recovery issues.
++
++When a container filesystem is unmounted, if there are any
++subcontainers created below the top-level container, that hierarchy
++will remain active even though unmounted; if there are no
++subcontainers then the hierarchy will be deactivated.
++
++No new system calls are added for containers - all support for
++querying and modifying containers is via this container file system.
++
++Each task under /proc has an added file named 'container' displaying,
++for each active hierarchy, the subsystem names and the container name
++as the path relative to the root of the container file system.
++
++Each container is represented by a directory in the container file system
++containing the following files describing that container:
++
++ - tasks: list of tasks (by pid) attached to that container
++ - notify_on_release flag: run /sbin/container_release_agent on exit?
++
++Other subsystems such as cpusets may add additional files in each
++container dir
++
++New containers are created using the mkdir system call or shell
++command. The properties of a container, such as its flags, are
++modified by writing to the appropriate file in that containers
++directory, as listed above.
++
++The named hierarchical structure of nested containers allows partitioning
++a large system into nested, dynamically changeable, "soft-partitions".
++
++The attachment of each task, automatically inherited at fork by any
++children of that task, to a container allows organizing the work load
++on a system into related sets of tasks. A task may be re-attached to
++any other container, if allowed by the permissions on the necessary
++container file system directories.
++
++When a task is moved from one container to another, it gets a new
++css_group pointer - if there's an already existing css_group with the
++desired collection of containers then that group is reused, else a new
++css_group is allocated. Note that the current implementation uses a
++linear search to locate an appropriate existing css_group, so isn't
++very efficient. A future version will use a hash table for better
++performance.
++
++To allow access from a container to the css_groups (and hence tasks)
++that comprise it, a set of cg_container_link objects form a lattice;
++each cg_container_link is linked into a list of cg_container_links for
++a single container on its cont_link_list field, and a list of
++cg_container_links for a single css_group on its cg_link_list.
++
++Thus the set of tasks in a container can be listed by iterating over
++each css_group that references the container, and sub-iterating over
++each css_group's task set.
++
++The use of a Linux virtual file system (vfs) to represent the
++container hierarchy provides for a familiar permission and name space
++for containers, with a minimum of additional kernel code.
++
++1.4 What does notify_on_release do ?
++------------------------------------
++
++*** notify_on_release is disabled in the current patch set. It will be
++*** reactivated in a future patch in a less-intrusive manner
++
++If the notify_on_release flag is enabled (1) in a container, then
++whenever the last task in the container leaves (exits or attaches to
++some other container) and the last child container of that container
++is removed, then the kernel runs the command specified by the contents
++of the "release_agent" file in that hierarchy's root directory,
++supplying the pathname (relative to the mount point of the container
++file system) of the abandoned container. This enables automatic
++removal of abandoned containers. The default value of
++notify_on_release in the root container at system boot is disabled
++(0). The default value of other containers at creation is the current
++value of their parents notify_on_release setting. The default value of
++a container hierarchy's release_agent path is empty.
++
++1.5 How do I use containers ?
++--------------------------
++
++To start a new job that is to be contained within a container, using
++the "cpuset" container subsystem, the steps are something like:
++
++ 1) mkdir /dev/container
++ 2) mount -t container -ocpuset cpuset /dev/container
++ 3) Create the new container by doing mkdir's and write's (or echo's) in
++ the /dev/container virtual file system.
++ 4) Start a task that will be the "founding father" of the new job.
++ 5) Attach that task to the new container by writing its pid to the
++ /dev/container tasks file for that container.
++ 6) fork, exec or clone the job tasks from this founding father task.
++
++For example, the following sequence of commands will setup a container
++named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
++and then start a subshell 'sh' in that container:
++
++ mount -t container cpuset -ocpuset /dev/container
++ cd /dev/container
++ mkdir Charlie
++ cd Charlie
++ /bin/echo $$ > tasks
++ sh
++ # The subshell 'sh' is now running in container Charlie
++ # The next line should display '/Charlie'
++ cat /proc/self/container
++
++2. Usage Examples and Syntax
++============================
++
++2.1 Basic Usage
++---------------
++
++Creating, modifying, using the containers can be done through the container
++virtual filesystem.
++
++To mount a container hierarchy will all available subsystems, type:
++# mount -t container xxx /dev/container
++
++The "xxx" is not interpreted by the container code, but will appear in
++/proc/mounts so may be any useful identifying string that you like.
++
++To mount a container hierarchy with just the cpuset and numtasks
++subsystems, type:
++# mount -t container -o cpuset,numtasks hier1 /dev/container
++
++To change the set of subsystems bound to a mounted hierarchy, just
++remount with different options:
++
++# mount -o remount,cpuset,ns /dev/container
++
++Note that changing the set of subsystems is currently only supported
++when the hierarchy consists of a single (root) container. Supporting
++the ability to arbitrarily bind/unbind subsystems from an existing
++container hierarchy is intended to be implemented in the future.
++
++Then under /dev/container you can find a tree that corresponds to the
++tree of the containers in the system. For instance, /dev/container
++is the container that holds the whole system.
++
++If you want to create a new container under /dev/container:
++# cd /dev/container
++# mkdir my_container
++
++Now you want to do something with this container.
++# cd my_container
++
++In this directory you can find several files:
++# ls
++notify_on_release release_agent tasks
++(plus whatever files are added by the attached subsystems)
++
++Now attach your shell to this container:
++# /bin/echo $$ > tasks
++
++You can also create containers inside your container by using mkdir in this
++directory.
++# mkdir my_sub_cs
++
++To remove a container, just use rmdir:
++# rmdir my_sub_cs
++
++This will fail if the container is in use (has containers inside, or
++has processes attached, or is held alive by other subsystem-specific
++reference).
++
++2.2 Attaching processes
++-----------------------
++
++# /bin/echo PID > tasks
++
++Note that it is PID, not PIDs. You can only attach ONE task at a time.
++If you have several tasks to attach, you have to do it one after another:
++
++# /bin/echo PID1 > tasks
++# /bin/echo PID2 > tasks
++ ...
++# /bin/echo PIDn > tasks
++
++3. Kernel API
++=============
++
++3.1 Overview
++------------
++
++Each kernel subsystem that wants to hook into the generic container
++system needs to create a container_subsys object. This contains
++various methods, which are callbacks from the container system, along
++with a subsystem id which will be assigned by the container system.
++
++Other fields in the container_subsys object include:
++
++- subsys_id: a unique array index for the subsystem, indicating which
++ entry in container->subsys[] this subsystem should be
++ managing. Initialized by container_register_subsys(); prior to this
++ it should be initialized to -1
++
++- hierarchy: an index indicating which hierarchy, if any, this
++ subsystem is currently attached to. If this is -1, then the
++ subsystem is not attached to any hierarchy, and all tasks should be
++ considered to be members of the subsystem's top_container. It should
++ be initialized to -1.
++
++- name: should be initialized to a unique subsystem name prior to
++ calling container_register_subsystem. Should be no longer than
++ MAX_CONTAINER_TYPE_NAMELEN
++
++Each container object created by the system has an array of pointers,
++indexed by subsystem id; this pointer is entirely managed by the
++subsystem; the generic container code will never touch this pointer.
++
++3.2 Synchronization
++-------------------
++
++There is a global mutex, container_mutex, used by the container
++system. This should be taken by anything that wants to modify a
++container. It may also be taken to prevent containers from being
++modified, but more specific locks may be more appropriate in that
++situation.
++
++See kernel/container.c for more details.
++
++Subsystems can take/release the container_mutex via the functions
++container_lock()/container_unlock(), and can
++take/release the callback_mutex via the functions
++container_lock()/container_unlock().
++
++Accessing a task's container pointer may be done in the following ways:
++- while holding container_mutex
++- while holding the task's alloc_lock (via task_lock())
++- inside an rcu_read_lock() section via rcu_dereference()
++
++3.3 Subsystem API
++--------------------------
++
++Each subsystem should:
++
++- add an entry in linux/container_subsys.h
++- define a container_subsys object called <name>_subsys
++
++Each subsystem may export the following methods. The only mandatory
++methods are create/destroy. Any others that are null are presumed to
++be successful no-ops.
++
++int create(struct container *cont)
++LL=container_mutex
++
++Called to create a subsystem state object for a container. The
++subsystem should set its subsystem pointer for the passed container,
++returning 0 on success or a negative error code. On success, the
++subsystem pointer should point to a structure of type
++container_subsys_state (typically embedded in a larger
++subsystem-specific object), which will be initialized by the container
++system. Note that this will be called at initialization to create the
++root subsystem state for this subsystem; this case can be identified
++by the passed container object having a NULL parent (since it's the
++root of the hierarchy) and may be an appropriate place for
++initialization code.
++
++void destroy(struct container *cont)
++LL=container_mutex
++
++The container system is about to destroy the passed container; the
++subsystem should do any necessary cleanup
++
++int can_attach(struct container_subsys *ss, struct container *cont,
++ struct task_struct *task)
++LL=container_mutex
++
++Called prior to moving a task into a container; if the subsystem
++returns an error, this will abort the attach operation. If a NULL
++task is passed, then a successful result indicates that *any*
++unspecified task can be moved into the container. Note that this isn't
++called on a fork. If this method returns 0 (success) then this should
++remain valid while the caller holds container_mutex.
++
++void attach(struct container_subsys *ss, struct container *cont,
++ struct container *old_cont, struct task_struct *task)
++LL=container_mutex
++
++
++Called after the task has been attached to the container, to allow any
++post-attachment activity that requires memory allocations or blocking.
++
++void fork(struct container_subsy *ss, struct task_struct *task)
++LL=callback_mutex, maybe read_lock(tasklist_lock)
++
++Called when a task is forked into a container. Also called during
++registration for all existing tasks.
++
++void exit(struct container_subsys *ss, struct task_struct *task)
++LL=callback_mutex
++
++Called during task exit
++
++int populate(struct container_subsys *ss, struct container *cont)
++LL=none
++
++Called after creation of a container to allow a subsystem to populate
++the container directory with file entries. The subsystem should make
++calls to container_add_file() with objects of type cftype (see
++include/linux/container.h for details). Note that although this
++method can return an error code, the error code is currently not
++always handled well.
++
++void post_clone(struct container_subsys *ss, struct container *cont)
++
++Called at the end of container_clone() to do any paramater
++initialization which might be required before a task could attach. For
++example in cpusets, no task may attach before 'cpus' and 'mems' are set
++up.
++
++void bind(struct container_subsys *ss, struct container *root)
++LL=callback_mutex
++
++Called when a container subsystem is rebound to a different hierarchy
++and root container. Currently this will only involve movement between
++the default hierarchy (which never has sub-containers) and a hierarchy
++that is being created/destroyed (and hence has no sub-containers).
++
++4. Questions
++============
++
++Q: what's up with this '/bin/echo' ?
++A: bash's builtin 'echo' command does not check calls to write() against
++ errors. If you use it in the container file system, you won't be
++ able to tell whether a command succeeded or failed.
++
++Q: When I attach processes, only the first of the line gets really attached !
++A: We can only return one error code per call to write(). So you should also
++ put only ONE pid.
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/core.txt linux-2.6.22-591/Documentation/cpuidle/core.txt
+--- linux-2.6.22-570/Documentation/cpuidle/core.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/cpuidle/core.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,17 @@
++
++ Supporting multiple CPU idle levels in kernel
++
++ cpuidle
++
++General Information:
++
++Various CPUs today support multiple idle levels that are differentiated
++by varying exit latencies and power consumption during idle.
++cpuidle is a generic in-kernel infrastructure that separates
++idle policy (governor) from idle mechanism (driver) and provides a
++standardized infrastructure to support independent development of
++governors and drivers.
++
++cpuidle resides under /drivers/cpuidle.
++
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/driver.txt linux-2.6.22-591/Documentation/cpuidle/driver.txt
+--- linux-2.6.22-570/Documentation/cpuidle/driver.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/cpuidle/driver.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,24 @@
++
++
++ Supporting multiple CPU idle levels in kernel
++
++ cpuidle drivers
++
++
++
++
++cpuidle driver supports capability detection for a particular system. The
++init and exit routines will be called for each online CPU, with a percpu
++cpuidle_driver object and driver should fill in cpuidle_states inside
++cpuidle_driver depending on the CPU capability.
++
++Driver can handle dynamic state changes (like battery<->AC), by calling
++force_redetect interface.
++
++It is possible to have more than one driver registered at the same time and
++user can switch between drivers using /sysfs interface.
++
++Interfaces:
++int cpuidle_register_driver(struct cpuidle_driver *drv);
++void cpuidle_unregister_driver(struct cpuidle_driver *drv);
++int cpuidle_force_redetect(struct cpuidle_device *dev);
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/governor.txt linux-2.6.22-591/Documentation/cpuidle/governor.txt
+--- linux-2.6.22-570/Documentation/cpuidle/governor.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/cpuidle/governor.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,24 @@
++
++
++
++ Supporting multiple CPU idle levels in kernel
++
++ cpuidle governors
++
++
++
++
++cpuidle governor is policy routine that decides what idle state to enter at
++any given time. cpuidle core uses different callbacks to governor while
++handling idle entry.
++* select_state callback where governor can determine next idle state to enter
++* prepare_idle callback is called before entering an idle state
++* scan callback is called after a driver forces redetection of the states
++
++More than one governor can be registered at the same time and
++user can switch between drivers using /sysfs interface.
++
++Interfaces:
++int cpuidle_register_governor(struct cpuidle_governor *gov);
++void cpuidle_unregister_governor(struct cpuidle_governor *gov);
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/sysfs.txt linux-2.6.22-591/Documentation/cpuidle/sysfs.txt
+--- linux-2.6.22-570/Documentation/cpuidle/sysfs.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/cpuidle/sysfs.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,27 @@
++
++
++ Supporting multiple CPU idle levels in kernel
++
++ cpuidle sysfs
++
++System global cpuidle information are under
++/sys/devices/system/cpu/cpuidle
++
++The current interfaces in this directory has self-explanatory names:
++* available_drivers
++* available_governors
++* current_driver
++* current_governor
++
++Per logical CPU specific cpuidle information are under
++/sys/devices/system/cpu/cpuX/cpuidle
++for each online cpu X
++
++Under this percpu directory, there is a directory for each idle state supported
++by the driver, which in turn has
++* latency
++* power
++* time
++* usage
++
++
+diff -Nurb linux-2.6.22-570/Documentation/cpusets.txt linux-2.6.22-591/Documentation/cpusets.txt
+--- linux-2.6.22-570/Documentation/cpusets.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/cpusets.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -7,6 +7,7 @@
+ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+ Modified by Paul Jackson <pj@sgi.com>
+ Modified by Christoph Lameter <clameter@sgi.com>
++Modified by Paul Menage <menage@google.com>
+
+ CONTENTS:
+ =========
+@@ -16,10 +17,9 @@
+ 1.2 Why are cpusets needed ?
+ 1.3 How are cpusets implemented ?
+ 1.4 What are exclusive cpusets ?
+- 1.5 What does notify_on_release do ?
+- 1.6 What is memory_pressure ?
+- 1.7 What is memory spread ?
+- 1.8 How do I use cpusets ?
++ 1.5 What is memory_pressure ?
++ 1.6 What is memory spread ?
++ 1.7 How do I use cpusets ?
+ 2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Adding/removing cpus
+@@ -43,18 +43,19 @@
+ hooks, beyond what is already present, required to manage dynamic
+ job placement on large systems.
+
+-Each task has a pointer to a cpuset. Multiple tasks may reference
+-the same cpuset. Requests by a task, using the sched_setaffinity(2)
+-system call to include CPUs in its CPU affinity mask, and using the
+-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes
+-in its memory policy, are both filtered through that tasks cpuset,
+-filtering out any CPUs or Memory Nodes not in that cpuset. The
+-scheduler will not schedule a task on a CPU that is not allowed in
+-its cpus_allowed vector, and the kernel page allocator will not
+-allocate a page on a node that is not allowed in the requesting tasks
+-mems_allowed vector.
++Cpusets use the generic container subsystem described in
++Documentation/container.txt.
+
+-User level code may create and destroy cpusets by name in the cpuset
++Requests by a task, using the sched_setaffinity(2) system call to
++include CPUs in its CPU affinity mask, and using the mbind(2) and
++set_mempolicy(2) system calls to include Memory Nodes in its memory
++policy, are both filtered through that tasks cpuset, filtering out any
++CPUs or Memory Nodes not in that cpuset. The scheduler will not
++schedule a task on a CPU that is not allowed in its cpus_allowed
++vector, and the kernel page allocator will not allocate a page on a
++node that is not allowed in the requesting tasks mems_allowed vector.
++
++User level code may create and destroy cpusets by name in the container
+ virtual file system, manage the attributes and permissions of these
+ cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+ specify and query to which cpuset a task is assigned, and list the
+@@ -86,9 +87,6 @@
+ and a database), or
+ * NUMA systems running large HPC applications with demanding
+ performance characteristics.
+- * Also cpu_exclusive cpusets are useful for servers running orthogonal
+- workloads such as RT applications requiring low latency and HPC
+- applications that are throughput sensitive
+
+ These subsets, or "soft partitions" must be able to be dynamically
+ adjusted, as the job mix changes, without impacting other concurrently
+@@ -117,7 +115,7 @@
+ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+ kernel.
+ - Each task in the system is attached to a cpuset, via a pointer
+- in the task structure to a reference counted cpuset structure.
++ in the task structure to a reference counted container structure.
+ - Calls to sched_setaffinity are filtered to just those CPUs
+ allowed in that tasks cpuset.
+ - Calls to mbind and set_mempolicy are filtered to just
+@@ -131,8 +129,6 @@
+ - A cpuset may be marked exclusive, which ensures that no other
+ cpuset (except direct ancestors and descendents) may contain
+ any overlapping CPUs or Memory Nodes.
+- Also a cpu_exclusive cpuset would be associated with a sched
+- domain.
+ - You can list all the tasks (by pid) attached to any cpuset.
+
+ The implementation of cpusets requires a few, simple hooks
+@@ -144,23 +140,15 @@
+ allowed in that tasks cpuset.
+ - in sched.c migrate_all_tasks(), to keep migrating tasks within
+ the CPUs allowed by their cpuset, if possible.
+- - in sched.c, a new API partition_sched_domains for handling
+- sched domain changes associated with cpu_exclusive cpusets
+- and related changes in both sched.c and arch/ia64/kernel/domain.c
+ - in the mbind and set_mempolicy system calls, to mask the requested
+ Memory Nodes by what's allowed in that tasks cpuset.
+ - in page_alloc.c, to restrict memory to allowed nodes.
+ - in vmscan.c, to restrict page recovery to the current cpuset.
+
+-In addition a new file system, of type "cpuset" may be mounted,
+-typically at /dev/cpuset, to enable browsing and modifying the cpusets
+-presently known to the kernel. No new system calls are added for
+-cpusets - all support for querying and modifying cpusets is via
+-this cpuset file system.
+-
+-Each task under /proc has an added file named 'cpuset', displaying
+-the cpuset name, as the path relative to the root of the cpuset file
+-system.
++You should mount the "container" filesystem type in order to enable
++browsing and modifying the cpusets presently known to the kernel. No
++new system calls are added for cpusets - all support for querying and
++modifying cpusets is via this cpuset file system.
+
+ The /proc/<pid>/status file for each task has two added lines,
+ displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
+@@ -170,16 +158,15 @@
+ Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
+ Mems_allowed: ffffffff,ffffffff
+
+-Each cpuset is represented by a directory in the cpuset file system
+-containing the following files describing that cpuset:
++Each cpuset is represented by a directory in the container file system
++containing (on top of the standard container files) the following
++files describing that cpuset:
+
+ - cpus: list of CPUs in that cpuset
+ - mems: list of Memory Nodes in that cpuset
+ - memory_migrate flag: if set, move pages to cpusets nodes
+ - cpu_exclusive flag: is cpu placement exclusive?
+ - mem_exclusive flag: is memory placement exclusive?
+- - tasks: list of tasks (by pid) attached to that cpuset
+- - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
+ - memory_pressure: measure of how much paging pressure in cpuset
+
+ In addition, the root cpuset only has the following file:
+@@ -231,15 +218,6 @@
+ a direct ancestor or descendent, may share any of the same CPUs or
+ Memory Nodes.
+
+-A cpuset that is cpu_exclusive has a scheduler (sched) domain
+-associated with it. The sched domain consists of all CPUs in the
+-current cpuset that are not part of any exclusive child cpusets.
+-This ensures that the scheduler load balancing code only balances
+-against the CPUs that are in the sched domain as defined above and
+-not all of the CPUs in the system. This removes any overhead due to
+-load balancing code trying to pull tasks outside of the cpu_exclusive
+-cpuset only to be prevented by the tasks' cpus_allowed mask.
+-
+ A cpuset that is mem_exclusive restricts kernel allocations for
+ page, buffer and other data commonly shared by the kernel across
+ multiple users. All cpusets, whether mem_exclusive or not, restrict
+@@ -253,21 +231,7 @@
+ outside even a mem_exclusive cpuset.
+
+
+-1.5 What does notify_on_release do ?
+-------------------------------------
+-
+-If the notify_on_release flag is enabled (1) in a cpuset, then whenever
+-the last task in the cpuset leaves (exits or attaches to some other
+-cpuset) and the last child cpuset of that cpuset is removed, then
+-the kernel runs the command /sbin/cpuset_release_agent, supplying the
+-pathname (relative to the mount point of the cpuset file system) of the
+-abandoned cpuset. This enables automatic removal of abandoned cpusets.
+-The default value of notify_on_release in the root cpuset at system
+-boot is disabled (0). The default value of other cpusets at creation
+-is the current value of their parents notify_on_release setting.
+-
+-
+-1.6 What is memory_pressure ?
++1.5 What is memory_pressure ?
+ -----------------------------
+ The memory_pressure of a cpuset provides a simple per-cpuset metric
+ of the rate that the tasks in a cpuset are attempting to free up in
+@@ -324,7 +288,7 @@
+ times 1000.
+
+
+-1.7 What is memory spread ?
++1.6 What is memory spread ?
+ ---------------------------
+ There are two boolean flag files per cpuset that control where the
+ kernel allocates pages for the file system buffers and related in
+@@ -395,7 +359,7 @@
+ can become very uneven.
+
+
+-1.8 How do I use cpusets ?
++1.7 How do I use cpusets ?
+ --------------------------
+
+ In order to minimize the impact of cpusets on critical kernel
+@@ -485,7 +449,7 @@
+ To start a new job that is to be contained within a cpuset, the steps are:
+
+ 1) mkdir /dev/cpuset
+- 2) mount -t cpuset none /dev/cpuset
++ 2) mount -t container -ocpuset cpuset /dev/cpuset
+ 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+ the /dev/cpuset virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+@@ -497,7 +461,7 @@
+ named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+ and then start a subshell 'sh' in that cpuset:
+
+- mount -t cpuset none /dev/cpuset
++ mount -t container -ocpuset cpuset /dev/cpuset
+ cd /dev/cpuset
+ mkdir Charlie
+ cd Charlie
+@@ -529,7 +493,7 @@
+ virtual filesystem.
+
+ To mount it, type:
+-# mount -t cpuset none /dev/cpuset
++# mount -t container -o cpuset cpuset /dev/cpuset
+
+ Then under /dev/cpuset you can find a tree that corresponds to the
+ tree of the cpusets in the system. For instance, /dev/cpuset
+@@ -572,6 +536,18 @@
+ This will fail if the cpuset is in use (has cpusets inside, or has
+ processes attached).
+
++Note that for legacy reasons, the "cpuset" filesystem exists as a
++wrapper around the container filesystem.
++
++The command
++
++mount -t cpuset X /dev/cpuset
++
++is equivalent to
++
++mount -t container -ocpuset X /dev/cpuset
++echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
++
+ 2.2 Adding/removing cpus
+ ------------------------
+
+diff -Nurb linux-2.6.22-570/Documentation/feature-removal-schedule.txt linux-2.6.22-591/Documentation/feature-removal-schedule.txt
+--- linux-2.6.22-570/Documentation/feature-removal-schedule.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/feature-removal-schedule.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -162,6 +162,33 @@
+
+ ---------------------------
+
++What: filemap_nopage, filemap_populate
++When: April 2007
++Why: These legacy interfaces no longer have any callers in the kernel and
++ any functionality provided can be provided with filemap_fault. The
++ removal schedule is short because they are a big maintainence burden
++ and have some bugs.
++Who: Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
++What: vm_ops.populate, install_page
++When: April 2007
++Why: These legacy interfaces no longer have any callers in the kernel and
++ any functionality provided can be provided with vm_ops.fault.
++Who: Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
++What: vm_ops.nopage
++When: February 2008, provided in-kernel callers have been converted
++Why: This interface is replaced by vm_ops.fault, but it has been around
++ forever, is used by a lot of drivers, and doesn't cost much to
++ maintain.
++Who: Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
+ What: Interrupt only SA_* flags
+ When: September 2007
+ Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
+@@ -280,25 +307,6 @@
+
+ ---------------------------
+
+-What: Multipath cached routing support in ipv4
+-When: in 2.6.23
+-Why: Code was merged, then submitter immediately disappeared leaving
+- us with no maintainer and lots of bugs. The code should not have
+- been merged in the first place, and many aspects of it's
+- implementation are blocking more critical core networking
+- development. It's marked EXPERIMENTAL and no distribution
+- enables it because it cause obscure crashes due to unfixable bugs
+- (interfaces don't return errors so memory allocation can't be
+- handled, calling contexts of these interfaces make handling
+- errors impossible too because they get called after we've
+- totally commited to creating a route object, for example).
+- This problem has existed for years and no forward progress
+- has ever been made, and nobody steps up to try and salvage
+- this code, so we're going to finally just get rid of it.
+-Who: David S. Miller <davem@davemloft.net>
+-
+----------------------------
+-
+ What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
+ When: December 2007
+ Why: These functions are a leftover from 2.4 times. They have several
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/00-INDEX linux-2.6.22-591/Documentation/filesystems/00-INDEX
+--- linux-2.6.22-570/Documentation/filesystems/00-INDEX 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/filesystems/00-INDEX 2007-12-21 15:36:11.000000000 -0500
+@@ -84,6 +84,8 @@
+ - info and mount options for the UDF filesystem.
+ ufs.txt
+ - info on the ufs filesystem.
++unionfs/
++ - info on the unionfs filesystem
+ vfat.txt
+ - info on using the VFAT filesystem used in Windows NT and Windows 95
+ vfs.txt
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/Locking linux-2.6.22-591/Documentation/filesystems/Locking
+--- linux-2.6.22-570/Documentation/filesystems/Locking 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/filesystems/Locking 2007-12-21 15:36:11.000000000 -0500
+@@ -510,12 +510,14 @@
+ prototypes:
+ void (*open)(struct vm_area_struct*);
+ void (*close)(struct vm_area_struct*);
++ struct page *(*fault)(struct vm_area_struct*, struct fault_data *);
+ struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
+
+ locking rules:
+ BKL mmap_sem
+ open: no yes
+ close: no yes
++fault: no yes
+ nopage: no yes
+
+ ================================================================================
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt
+--- linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -238,6 +238,8 @@
+ struct config_group *(*make_group)(struct config_group *group,
+ const char *name);
+ int (*commit_item)(struct config_item *item);
++ void (*disconnect_notify)(struct config_group *group,
++ struct config_item *item);
+ void (*drop_item)(struct config_group *group,
+ struct config_item *item);
+ };
+@@ -268,6 +270,16 @@
+ for the item to actually disappear from the subsystem's usage. But it
+ is gone from configfs.
+
++When drop_item() is called, the item's linkage has already been torn
++down. It no longer has a reference on its parent and has no place in
++the item hierarchy. If a client needs to do some cleanup before this
++teardown happens, the subsystem can implement the
++ct_group_ops->disconnect_notify() method. The method is called after
++configfs has removed the item from the filesystem view but before the
++item is removed from its parent group. Like drop_item(),
++disconnect_notify() is void and cannot fail. Client subsystems should
++not drop any references here, as they still must do it in drop_item().
++
+ A config_group cannot be removed while it still has child items. This
+ is implemented in the configfs rmdir(2) code. ->drop_item() will not be
+ called, as the item has not been dropped. rmdir(2) will fail, as the
+@@ -386,6 +398,33 @@
+ rmdir(2). They also are not considered when rmdir(2) on the parent
+ group is checking for children.
+
++[Dependant Subsystems]
++
++Sometimes other drivers depend on particular configfs items. For
++example, ocfs2 mounts depend on a heartbeat region item. If that
++region item is removed with rmdir(2), the ocfs2 mount must BUG or go
++readonly. Not happy.
++
++configfs provides two additional API calls: configfs_depend_item() and
++configfs_undepend_item(). A client driver can call
++configfs_depend_item() on an existing item to tell configfs that it is
++depended on. configfs will then return -EBUSY from rmdir(2) for that
++item. When the item is no longer depended on, the client driver calls
++configfs_undepend_item() on it.
++
++These API cannot be called underneath any configfs callbacks, as
++they will conflict. They can block and allocate. A client driver
++probably shouldn't calling them of its own gumption. Rather it should
++be providing an API that external subsystems call.
++
++How does this work? Imagine the ocfs2 mount process. When it mounts,
++it asks for a heartbeat region item. This is done via a call into the
++heartbeat code. Inside the heartbeat code, the region item is looked
++up. Here, the heartbeat code calls configfs_depend_item(). If it
++succeeds, then heartbeat knows the region is safe to give to ocfs2.
++If it fails, it was being torn down anyway, and heartbeat can gracefully
++pass up an error.
++
+ [Committable Items]
+
+ NOTE: Committable items are currently unimplemented.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,10 @@
++00-INDEX
++ - this file.
++concepts.txt
++ - A brief introduction of concepts.
++issues.txt
++ - A summary of known issues with unionfs.
++rename.txt
++ - Information regarding rename operations.
++usage.txt
++ - Usage information and examples.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,75 @@
++Unionfs 2.0 CONCEPTS:
++=====================
++
++This file describes the concepts needed by a namespace unification file
++system.
++
++Branch Priority:
++================
++
++Each branch is assigned a unique priority - starting from 0 (highest
++priority). No two branches can have the same priority.
++
++
++Branch Mode:
++============
++
++Each branch is assigned a mode - read-write or read-only. This allows
++directories on media mounted read-write to be used in a read-only manner.
++
++
++Whiteouts:
++==========
++
++A whiteout removes a file name from the namespace. Whiteouts are needed when
++one attempts to remove a file on a read-only branch.
++
++Suppose we have a two-branch union, where branch 0 is read-write and branch
++1 is read-only. And a file 'foo' on branch 1:
++
++./b0/
++./b1/
++./b1/foo
++
++The unified view would simply be:
++
++./union/
++./union/foo
++
++Since 'foo' is stored on a read-only branch, it cannot be removed. A
++whiteout is used to remove the name 'foo' from the unified namespace. Again,
++since branch 1 is read-only, the whiteout cannot be created there. So, we
++try on a higher priority (lower numerically) branch and create the whiteout
++there.
++
++./b0/
++./b0/.wh.foo
++./b1/
++./b1/foo
++
++Later, when Unionfs traverses branches (due to lookup or readdir), it
++eliminate 'foo' from the namespace (as well as the whiteout itself.)
++
++
++Duplicate Elimination:
++======================
++
++It is possible for files on different branches to have the same name.
++Unionfs then has to select which instance of the file to show to the user.
++Given the fact that each branch has a priority associated with it, the
++simplest solution is to take the instance from the highest priority
++(numerically lowest value) and "hide" the others.
++
++
++Copyup:
++=======
++
++When a change is made to the contents of a file's data or meta-data, they
++have to be stored somewhere. The best way is to create a copy of the
++original file on a branch that is writable, and then redirect the write
++though to this copy. The copy must be made on a higher priority branch so
++that lookup and readdir return this newer "version" of the file rather than
++the original (see duplicate elimination).
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,39 @@
++KNOWN Unionfs 2.0 ISSUES:
++=========================
++
++1. The NFS server returns -EACCES for read-only exports, instead of -EROFS.
++ This means we can't reliably detect a read-only NFS export.
++
++2. Modifying a Unionfs branch directly, while the union is mounted, is
++ currently unsupported, because it could cause a cache incoherency between
++ the union layer and the lower file systems (for that reason, Unionfs
++ currently prohibits using branches which overlap with each other, even
++ partially). We have tested Unionfs under such conditions, and fixed any
++ bugs we found (Unionfs comes with an extensive regression test suite).
++ However, it may still be possible that changes made to lower branches
++ directly could cause cache incoherency which, in the worst case, may case
++ an oops.
++
++ Unionfs 2.0 has a temporary workaround for this. You can force Unionfs
++ to increase the superblock generation number, and hence purge all cached
++ Unionfs objects, which would then be re-gotten from the lower branches.
++ This should ensure cache consistency. To increase the generation number,
++ executed the command:
++
++ mount -t unionfs -o remount,incgen none MOUNTPOINT
++
++ Note that the older way of incrementing the generation number using an
++ ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not
++ encouraged. Plus, an ioctl is per-file concept, whereas the generation
++ number is a per-file-system concept. Worse, such an ioctl requires an
++ open file, which then has to be invalidated by the very nature of the
++ generation number increase (read: the old generation increase ioctl was
++ pretty racy).
++
++3. Unionfs should not use lookup_one_len() on the underlying f/s as it
++ confuses NFS. Currently, unionfs_lookup() passes lookup intents to the
++ lower file-system, this eliminates part of the problem. The remaining
++ calls to lookup_one_len may need to be changed to pass an intent.
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,31 @@
++Rename is a complex beast. The following table shows which rename(2) operations
++should succeed and which should fail.
++
++o: success
++E: error (either unionfs or vfs)
++X: EXDEV
++
++none = file does not exist
++file = file is a file
++dir = file is a empty directory
++child= file is a non-empty directory
++wh = file is a directory containing only whiteouts; this makes it logically
++ empty
++
++ none file dir child wh
++file o o E E E
++dir o E o E o
++child X E X E X
++wh o E o E o
++
++
++Renaming directories:
++=====================
++
++Whenever a empty (either physically or logically) directory is being renamed,
++the following sequence of events should take place:
++
++1) Remove whiteouts from both source and destination directory
++2) Rename source to destination
++3) Make destination opaque to prevent anything under it from showing up
++
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,90 @@
++Unionfs is a stackable unification file system, which can appear to merge
++the contents of several directories (branches), while keeping their physical
++content separate. Unionfs is useful for unified source tree management,
++merged contents of split CD-ROM, merged separate software package
++directories, data grids, and more. Unionfs allows any mix of read-only and
++read-write branches, as well as insertion and deletion of branches anywhere
++in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
++duplicates, partial-error conditions, and more.
++
++# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT
++
++The available branch-option for the mount command is:
++
++ dirs=branch[=ro|=rw][:...]
++
++specifies a separated list of which directories compose the union.
++Directories that come earlier in the list have a higher precedence than
++those which come later. Additionally, read-only or read-write permissions of
++the branch can be specified by appending =ro or =rw (default) to each
++directory.
++
++Syntax:
++
++ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
++
++Example:
++
++ dirs=/writable_branch=rw:/read-only_branch=ro
++
++
++DYNAMIC BRANCH MANAGEMENT AND REMOUNTS
++======================================
++
++You can remount a union and change its overall mode, or reconfigure the
++branches, as follows.
++
++To downgrade a union from read-write to read-only:
++
++# mount -t unionfs -o remount,ro none MOUNTPOINT
++
++To upgrade a union from read-only to read-write:
++
++# mount -t unionfs -o remount,rw none MOUNTPOINT
++
++To delete a branch /foo, regardless where it is in the current union:
++
++# mount -t unionfs -o del=/foo none MOUNTPOINT
++
++To insert (add) a branch /foo before /bar:
++
++# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
++
++To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
++
++# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
++
++To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
++new highest-priority branch), you can use the above syntax, or use a short
++hand version as follows:
++
++# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
++
++To append a branch to the very end (new lowest-priority branch):
++
++# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
++
++To append a branch to the very end (new lowest-priority branch), in
++read-only mode:
++
++# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT
++
++Finally, to change the mode of one existing branch, say /foo, from read-only
++to read-write, and change /bar from read-write to read-only:
++
++# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
++
++
++CACHE CONSISTENCY
++=================
++
++If you modify any file on any of the lower branches directly, while there is
++a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
++to purge its caches and re-get the objects. To do that, you have to
++increment the generation number of the superblock using the following
++command:
++
++# mount -t unionfs -o remount,incgen none MOUNTPOINT
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c
+--- linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-12-21 15:36:11.000000000 -0500
+@@ -78,6 +78,7 @@
+ firmware_loading_show, firmware_loading_store);
+
+ static ssize_t firmware_data_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
+ char *buffer, loff_t offset, size_t count)
+ {
+ struct class_device *class_dev = to_class_dev(kobj);
+@@ -88,6 +89,7 @@
+ return count;
+ }
+ static ssize_t firmware_data_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
+ char *buffer, loff_t offset, size_t count)
+ {
+ struct class_device *class_dev = to_class_dev(kobj);
+diff -Nurb linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt
+--- linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,160 @@
++Freezing of tasks
++ (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
++
++I. What is the freezing of tasks?
++
++The freezing of tasks is a mechanism by which user space processes and some
++kernel threads are controlled during hibernation or system-wide suspend (on some
++architectures).
++
++II. How does it work?
++
++There are four per-task flags used for that, PF_NOFREEZE, PF_FROZEN, TIF_FREEZE
++and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have
++PF_NOFREEZE unset (all user space processes and some kernel threads) are
++regarded as 'freezable' and treated in a special way before the system enters a
++suspend state as well as before a hibernation image is created (in what follows
++we only consider hibernation, but the description also applies to suspend).
++
++Namely, as the first step of the hibernation procedure the function
++freeze_processes() (defined in kernel/power/process.c) is called. It executes
++try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and
++sends a fake signal to each of them. A task that receives such a signal and has
++TIF_FREEZE set, should react to it by calling the refrigerator() function
++(defined in kernel/power/process.c), which sets the task's PF_FROZEN flag,
++changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is
++cleared for it. Then, we say that the task is 'frozen' and therefore the set of
++functions handling this mechanism is called 'the freezer' (these functions are
++defined in kernel/power/process.c and include/linux/freezer.h). User space
++processes are generally frozen before kernel threads.
++
++It is not recommended to call refrigerator() directly. Instead, it is
++recommended to use the try_to_freeze() function (defined in
++include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the
++task enter refrigerator() if the flag is set.
++
++For user space processes try_to_freeze() is called automatically from the
++signal-handling code, but the freezable kernel threads need to call it
++explicitly in suitable places. The code to do this may look like the following:
++
++ do {
++ hub_events();
++ wait_event_interruptible(khubd_wait,
++ !list_empty(&hub_event_list));
++ try_to_freeze();
++ } while (!signal_pending(current));
++
++(from drivers/usb/core/hub.c::hub_thread()).
++
++If a freezable kernel thread fails to call try_to_freeze() after the freezer has
++set TIF_FREEZE for it, the freezing of tasks will fail and the entire
++hibernation operation will be cancelled. For this reason, freezable kernel
++threads must call try_to_freeze() somewhere.
++
++After the system memory state has been restored from a hibernation image and
++devices have been reinitialized, the function thaw_processes() is called in
++order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that
++have been frozen leave refrigerator() and continue running.
++
++III. Which kernel threads are freezable?
++
++Kernel threads are not freezable by default. However, a kernel thread may clear
++PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
++directly is strongly discouraged). From this point it is regarded as freezable
++and must call try_to_freeze() in a suitable place.
++
++IV. Why do we do that?
++
++Generally speaking, there is a couple of reasons to use the freezing of tasks:
++
++1. The principal reason is to prevent filesystems from being damaged after
++hibernation. At the moment we have no simple means of checkpointing
++filesystems, so if there are any modifications made to filesystem data and/or
++metadata on disks, we cannot bring them back to the state from before the
++modifications. At the same time each hibernation image contains some
++filesystem-related information that must be consistent with the state of the
++on-disk data and metadata after the system memory state has been restored from
++the image (otherwise the filesystems will be damaged in a nasty way, usually
++making them almost impossible to repair). We therefore freeze tasks that might
++cause the on-disk filesystems' data and metadata to be modified after the
++hibernation image has been created and before the system is finally powered off.
++The majority of these are user space processes, but if any of the kernel threads
++may cause something like this to happen, they have to be freezable.
++
++2. The second reason is to prevent user space processes and some kernel threads
++from interfering with the suspending and resuming of devices. A user space
++process running on a second CPU while we are suspending devices may, for
++example, be troublesome and without the freezing of tasks we would need some
++safeguards against race conditions that might occur in such a case.
++
++Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
++of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
++
++"RJW:> Why we freeze tasks at all or why we freeze kernel threads?
++
++Linus: In many ways, 'at all'.
++
++I _do_ realize the IO request queue issues, and that we cannot actually do
++s2ram with some devices in the middle of a DMA. So we want to be able to
++avoid *that*, there's no question about that. And I suspect that stopping
++user threads and then waiting for a sync is practically one of the easier
++ways to do so.
++
++So in practice, the 'at all' may become a 'why freeze kernel threads?' and
++freezing user threads I don't find really objectionable."
++
++Still, there are kernel threads that may want to be freezable. For example, if
++a kernel that belongs to a device driver accesses the device directly, it in
++principle needs to know when the device is suspended, so that it doesn't try to
++access it at that time. However, if the kernel thread is freezable, it will be
++frozen before the driver's .suspend() callback is executed and it will be
++thawed after the driver's .resume() callback has run, so it won't be accessing
++the device while it's suspended.
++
++3. Another reason for freezing tasks is to prevent user space processes from
++realizing that hibernation (or suspend) operation takes place. Ideally, user
++space processes should not notice that such a system-wide operation has occurred
++and should continue running without any problems after the restore (or resume
++from suspend). Unfortunately, in the most general case this is quite difficult
++to achieve without the freezing of tasks. Consider, for example, a process
++that depends on all CPUs being online while it's running. Since we need to
++disable nonboot CPUs during the hibernation, if this process is not frozen, it
++may notice that the number of CPUs has changed and may start to work incorrectly
++because of that.
++
++V. Are there any problems related to the freezing of tasks?
++
++Yes, there are.
++
++First of all, the freezing of kernel threads may be tricky if they depend one
++on another. For example, if kernel thread A waits for a completion (in the
++TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B
++and B is frozen in the meantime, then A will be blocked until B is thawed, which
++may be undesirable. That's why kernel threads are not freezable by default.
++
++Second, there are the following two problems related to the freezing of user
++space processes:
++1. Putting processes into an uninterruptible sleep distorts the load average.
++2. Now that we have FUSE, plus the framework for doing device drivers in
++userspace, it gets even more complicated because some userspace processes are
++now doing the sorts of things that kernel threads do
++(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
++
++The problem 1. seems to be fixable, although it hasn't been fixed so far. The
++other one is more serious, but it seems that we can work around it by using
++hibernation (and suspend) notifiers (in that case, though, we won't be able to
++avoid the realization by the user space processes that the hibernation is taking
++place).
++
++There are also problems that the freezing of tasks tends to expose, although
++they are not directly related to it. For example, if request_firmware() is
++called from a device driver's .resume() routine, it will timeout and eventually
++fail, because the user land process that should respond to the request is frozen
++at this point. So, seemingly, the failure is due to the freezing of tasks.
++Suppose, however, that the firmware file is located on a filesystem accessible
++only through another device that hasn't been resumed yet. In that case,
++request_firmware() will fail regardless of whether or not the freezing of tasks
++is used. Consequently, the problem is not really related to the freezing of
++tasks, since it generally exists anyway. [The solution to this particular
++problem is to keep the firmware in memory after it's loaded for the first time
++and upload if from memory to the device whenever necessary.]
+diff -Nurb linux-2.6.22-570/Documentation/power/kernel_threads.txt linux-2.6.22-591/Documentation/power/kernel_threads.txt
+--- linux-2.6.22-570/Documentation/power/kernel_threads.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/power/kernel_threads.txt 1969-12-31 19:00:00.000000000 -0500
+@@ -1,40 +0,0 @@
+-KERNEL THREADS
+-
+-
+-Freezer
+-
+-Upon entering a suspended state the system will freeze all
+-tasks. This is done by delivering pseudosignals. This affects
+-kernel threads, too. To successfully freeze a kernel thread
+-the thread has to check for the pseudosignal and enter the
+-refrigerator. Code to do this looks like this:
+-
+- do {
+- hub_events();
+- wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list));
+- try_to_freeze();
+- } while (!signal_pending(current));
+-
+-from drivers/usb/core/hub.c::hub_thread()
+-
+-
+-The Unfreezable
+-
+-Some kernel threads however, must not be frozen. The kernel must
+-be able to finish pending IO operations and later on be able to
+-write the memory image to disk. Kernel threads needed to do IO
+-must stay awake. Such threads must mark themselves unfreezable
+-like this:
+-
+- /*
+- * This thread doesn't need any user-level access,
+- * so get rid of all our resources.
+- */
+- daemonize("usb-storage");
+-
+- current->flags |= PF_NOFREEZE;
+-
+-from drivers/usb/storage/usb.c::usb_stor_control_thread()
+-
+-Such drivers are themselves responsible for staying quiet during
+-the actual snapshotting.
+diff -Nurb linux-2.6.22-570/Documentation/power/swsusp.txt linux-2.6.22-591/Documentation/power/swsusp.txt
+--- linux-2.6.22-570/Documentation/power/swsusp.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/power/swsusp.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -140,21 +140,11 @@
+ website, and not to the Linux Kernel Mailing List. We are working
+ toward merging suspend2 into the mainline kernel.
+
+-Q: A kernel thread must voluntarily freeze itself (call 'refrigerator').
+-I found some kernel threads that don't do it, and they don't freeze
+-so the system can't sleep. Is this a known behavior?
+-
+-A: All such kernel threads need to be fixed, one by one. Select the
+-place where the thread is safe to be frozen (no kernel semaphores
+-should be held at that point and it must be safe to sleep there), and
+-add:
+-
+- try_to_freeze();
+-
+-If the thread is needed for writing the image to storage, you should
+-instead set the PF_NOFREEZE process flag when creating the thread (and
+-be very careful).
++Q: What is the freezing of tasks and why are we using it?
+
++A: The freezing of tasks is a mechanism by which user space processes and some
++kernel threads are controlled during hibernation or system-wide suspend (on some
++architectures). See freezing-of-tasks.txt for details.
+
+ Q: What is the difference between "platform" and "shutdown"?
+
+diff -Nurb linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt
+--- linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,450 @@
++ SCSI FC Tansport
++ =============================================
++
++Date: 4/12/2007
++Kernel Revisions for features:
++ rports : <<TBS>>
++ vports : 2.6.22 (? TBD)
++
++
++Introduction
++============
++This file documents the features and components of the SCSI FC Transport.
++It also provides documents the API between the transport and FC LLDDs.
++The FC transport can be found at:
++ drivers/scsi/scsi_transport_fc.c
++ include/scsi/scsi_transport_fc.h
++ include/scsi/scsi_netlink_fc.h
++
++This file is found at Documentation/scsi/scsi_fc_transport.txt
++
++
++FC Remote Ports (rports)
++========================================================================
++<< To Be Supplied >>
++
++
++FC Virtual Ports (vports)
++========================================================================
++
++Overview:
++-------------------------------
++
++ New FC standards have defined mechanisms which allows for a single physical
++ port to appear on as multiple communication ports. Using the N_Port Id
++ Virtualization (NPIV) mechanism, a point-to-point connection to a Fabric
++ can be assigned more than 1 N_Port_ID. Each N_Port_ID appears as a
++ separate port to other endpoints on the fabric, even though it shares one
++ physical link to the switch for communication. Each N_Port_ID can have a
++ unique view of the fabric based on fabric zoning and array lun-masking
++ (just like a normal non-NPIV adapter). Using the Virtual Fabric (VF)
++ mechanism, adding a fabric header to each frame allows the port to
++ interact with the Fabric Port to join multiple fabrics. The port will
++ obtain an N_Port_ID on each fabric it joins. Each fabric will have its
++ own unique view of endpoints and configuration parameters. NPIV may be
++ used together with VF so that the port can obtain multiple N_Port_IDs
++ on each virtual fabric.
++
++ The FC transport is now recognizing a new object - a vport. A vport is
++ an entity that has a world-wide unique World Wide Port Name (wwpn) and
++ World Wide Node Name (wwnn). The transport also allows for the FC4's to
++ be specified for the vport, with FCP_Initiator being the primary role
++ expected. Once instantiated by one of the above methods, it will have a
++ distinct N_Port_ID and view of fabric endpoints and storage entities.
++ The fc_host associated with the physical adapter will export the ability
++ to create vports. The transport will create the vport object within the
++ Linux device tree, and instruct the fc_host's driver to instantiate the
++ virtual port. Typically, the driver will create a new scsi_host instance
++ on the vport, resulting in a unique <H,C,T,L> namespace for the vport.
++ Thus, whether a FC port is based on a physical port or on a virtual port,
++ each will appear as a unique scsi_host with its own target and lun space.
++
++ Note: At this time, the transport is written to create only NPIV-based
++ vports. However, consideration was given to VF-based vports and it
++ should be a minor change to add support if needed. The remaining
++ discussion will concentrate on NPIV.
++
++ Note: World Wide Name assignment (and uniqueness guarantees) are left
++ up to an administrative entity controling the vport. For example,
++ if vports are to be associated with virtual machines, a XEN mgmt
++ utility would be responsible for creating wwpn/wwnn's for the vport,
++ using it's own naming authority and OUI. (Note: it already does this
++ for virtual MAC addresses).
++
++
++Device Trees and Vport Objects:
++-------------------------------
++
++ Today, the device tree typically contains the scsi_host object,
++ with rports and scsi target objects underneath it. Currently the FC
++ transport creates the vport object and places it under the scsi_host
++ object corresponding to the physical adapter. The LLDD will allocate
++ a new scsi_host for the vport and link it's object under the vport.
++ The remainder of the tree under the vports scsi_host is the same
++ as the non-NPIV case. The transport is written currently to easily
++ allow the parent of the vport to be something other than the scsi_host.
++ This could be used in the future to link the object onto a vm-specific
++ device tree. If the vport's parent is not the physical port's scsi_host,
++ a symbolic link to the vport object will be placed in the physical
++ port's scsi_host.
++
++ Here's what to expect in the device tree :
++ The typical Physical Port's Scsi_Host:
++ /sys/devices/.../host17/
++ and it has the typical decendent tree:
++ /sys/devices/.../host17/rport-17:0-0/target17:0:0/17:0:0:0:
++ and then the vport is created on the Physical Port:
++ /sys/devices/.../host17/vport-17:0-0
++ and the vport's Scsi_Host is then created:
++ /sys/devices/.../host17/vport-17:0-0/host18
++ and then the rest of the tree progresses, such as:
++ /sys/devices/.../host17/vport-17:0-0/host18/rport-18:0-0/target18:0:0/18:0:0:0:
++
++ Here's what to expect in the sysfs tree :
++ scsi_hosts:
++ /sys/class/scsi_host/host17 physical port's scsi_host
++ /sys/class/scsi_host/host18 vport's scsi_host
++ fc_hosts:
++ /sys/class/fc_host/host17 physical port's fc_host
++ /sys/class/fc_host/host18 vport's fc_host
++ fc_vports:
++ /sys/class/fc_vports/vport-17:0-0 the vport's fc_vport
++ fc_rports:
++ /sys/class/fc_remote_ports/rport-17:0-0 rport on the physical port
++ /sys/class/fc_remote_ports/rport-18:0-0 rport on the vport
++
++
++Vport Attributes:
++-------------------------------
++
++ The new fc_vport class object has the following attributes
++
++ node_name: Read_Only
++ The WWNN of the vport
++
++ port_name: Read_Only
++ The WWPN of the vport
++
++ roles: Read_Only
++ Indicates the FC4 roles enabled on the vport.
++
++ symbolic_name: Read_Write
++ A string, appended to the driver's symbolic port name string, which
++ is registered with the switch to identify the vport. For example,
++ a hypervisor could set this string to "Xen Domain 2 VM 5 Vport 2",
++ and this set of identifiers can be seen on switch management screens
++ to identify the port.
++
++ vport_delete: Write_Only
++ When written with a "1", will tear down the vport.
++
++ vport_disable: Write_Only
++ When written with a "1", will transition the vport to a disabled.
++ state. The vport will still be instantiated with the Linux kernel,
++ but it will not be active on the FC link.
++ When written with a "0", will enable the vport.
++
++ vport_last_state: Read_Only
++ Indicates the previous state of the vport. See the section below on
++ "Vport States".
++
++ vport_state: Read_Only
++ Indicates the state of the vport. See the section below on
++ "Vport States".
++
++ vport_type: Read_Only
++ Reflects the FC mechanism used to create the virtual port.
++ Only NPIV is supported currently.
++
++
++ For the fc_host class object, the following attributes are added for vports:
++
++ max_npiv_vports: Read_Only
++ Indicates the maximum number of NPIV-based vports that the
++ driver/adapter can support on the fc_host.
++
++ npiv_vports_inuse: Read_Only
++ Indicates how many NPIV-based vports have been instantiated on the
++ fc_host.
++
++ vport_create: Write_Only
++ A "simple" create interface to instantiate a vport on an fc_host.
++ A "<WWPN>:<WWNN>" string is written to the attribute. The transport
++ then instantiates the vport object and calls the LLDD to create the
++ vport with the role of FCP_Initiator. Each WWN is specified as 16
++ hex characters and may *not* contain any prefixes (e.g. 0x, x, etc).
++
++ vport_delete: Write_Only
++ A "simple" delete interface to teardown a vport. A "<WWPN>:<WWNN>"
++ string is written to the attribute. The transport will locate the
++ vport on the fc_host with the same WWNs and tear it down. Each WWN
++ is specified as 16 hex characters and may *not* contain any prefixes
++ (e.g. 0x, x, etc).
++
++
++Vport States:
++-------------------------------
++
++ Vport instantiation consists of two parts:
++ - Creation with the kernel and LLDD. This means all transport and
++ driver data structures are built up, and device objects created.
++ This is equivalent to a driver "attach" on an adapter, which is
++ independent of the adapter's link state.
++ - Instantiation of the vport on the FC link via ELS traffic, etc.
++ This is equivalent to a "link up" and successfull link initialization.
++ Futher information can be found in the interfaces section below for
++ Vport Creation.
++
++ Once a vport has been instantiated with the kernel/LLDD, a vport state
++ can be reported via the sysfs attribute. The following states exist:
++
++ FC_VPORT_UNKNOWN - Unknown
++ An temporary state, typically set only while the vport is being
++ instantiated with the kernel and LLDD.
++
++ FC_VPORT_ACTIVE - Active
++ The vport has been successfully been created on the FC link.
++ It is fully functional.
++
++ FC_VPORT_DISABLED - Disabled
++ The vport instantiated, but "disabled". The vport is not instantiated
++ on the FC link. This is equivalent to a physical port with the
++ link "down".
++
++ FC_VPORT_LINKDOWN - Linkdown
++ The vport is not operational as the physical link is not operational.
++
++ FC_VPORT_INITIALIZING - Initializing
++ The vport is in the process of instantiating on the FC link.
++ The LLDD will set this state just prior to starting the ELS traffic
++ to create the vport. This state will persist until the vport is
++ successfully created (state becomes FC_VPORT_ACTIVE) or it fails
++ (state is one of the values below). As this state is transitory,
++ it will not be preserved in the "vport_last_state".
++
++ FC_VPORT_NO_FABRIC_SUPP - No Fabric Support
++ The vport is not operational. One of the following conditions were
++ encountered:
++ - The FC topology is not Point-to-Point
++ - The FC port is not connected to an F_Port
++ - The F_Port has indicated that NPIV is not supported.
++
++ FC_VPORT_NO_FABRIC_RSCS - No Fabric Resources
++ The vport is not operational. The Fabric failed FDISC with a status
++ indicating that it does not have sufficient resources to complete
++ the operation.
++
++ FC_VPORT_FABRIC_LOGOUT - Fabric Logout
++ The vport is not operational. The Fabric has LOGO'd the N_Port_ID
++ associated with the vport.
++
++ FC_VPORT_FABRIC_REJ_WWN - Fabric Rejected WWN
++ The vport is not operational. The Fabric failed FDISC with a status
++ indicating that the WWN's are not valid.
++
++ FC_VPORT_FAILED - VPort Failed
++ The vport is not operational. This is a catchall for all other
++ error conditions.
++
++
++ The following state table indicates the different state transitions:
++
++ State Event New State
++ --------------------------------------------------------------------
++ n/a Initialization Unknown
++ Unknown: Link Down Linkdown
++ Link Up & Loop No Fabric Support
++ Link Up & no Fabric No Fabric Support
++ Link Up & FLOGI response No Fabric Support
++ indicates no NPIV support
++ Link Up & FDISC being sent Initializing
++ Disable request Disable
++ Linkdown: Link Up Unknown
++ Initializing: FDISC ACC Active
++ FDISC LS_RJT w/ no resources No Fabric Resources
++ FDISC LS_RJT w/ invalid Fabric Rejected WWN
++ pname or invalid nport_id
++ FDISC LS_RJT failed for Vport Failed
++ other reasons
++ Link Down Linkdown
++ Disable request Disable
++ Disable: Enable request Unknown
++ Active: LOGO received from fabric Fabric Logout
++ Link Down Linkdown
++ Disable request Disable
++ Fabric Logout: Link still up Unknown
++
++ The following 4 error states all have the same transitions:
++ No Fabric Support:
++ No Fabric Resources:
++ Fabric Rejected WWN:
++ Vport Failed:
++ Disable request Disable
++ Link goes down Linkdown
++
++
++Transport <-> LLDD Interfaces :
++-------------------------------
++
++Vport support by LLDD:
++
++ The LLDD indicates support for vports by supplying a vport_create()
++ function in the transport template. The presense of this function will
++ cause the creation of the new attributes on the fc_host. As part of
++ the physical port completing its initialization relative to the
++ transport, it should set the max_npiv_vports attribute to indicate the
++ maximum number of vports the driver and/or adapter supports.
++
++
++Vport Creation:
++
++ The LLDD vport_create() syntax is:
++
++ int vport_create(struct fc_vport *vport, bool disable)
++
++ where:
++ vport: Is the newly allocated vport object
++ disable: If "true", the vport is to be created in a disabled stated.
++ If "false", the vport is to be enabled upon creation.
++
++ When a request is made to create a new vport (via sgio/netlink, or the
++ vport_create fc_host attribute), the transport will validate that the LLDD
++ can support another vport (e.g. max_npiv_vports > npiv_vports_inuse).
++ If not, the create request will be failed. If space remains, the transport
++ will increment the vport count, create the vport object, and then call the
++ LLDD's vport_create() function with the newly allocated vport object.
++
++ As mentioned above, vport creation is divided into two parts:
++ - Creation with the kernel and LLDD. This means all transport and
++ driver data structures are built up, and device objects created.
++ This is equivalent to a driver "attach" on an adapter, which is
++ independent of the adapter's link state.
++ - Instantiation of the vport on the FC link via ELS traffic, etc.
++ This is equivalent to a "link up" and successfull link initialization.
++
++ The LLDD's vport_create() function will not synchronously wait for both
++ parts to be fully completed before returning. It must validate that the
++ infrastructure exists to support NPIV, and complete the first part of
++ vport creation (data structure build up) before returning. We do not
++ hinge vport_create() on the link-side operation mainly because:
++ - The link may be down. It is not a failure if it is. It simply
++ means the vport is in an inoperable state until the link comes up.
++ This is consistent with the link bouncing post vport creation.
++ - The vport may be created in a disabled state.
++ - This is consistent with a model where: the vport equates to a
++ FC adapter. The vport_create is synonymous with driver attachment
++ to the adapter, which is independent of link state.
++
++ Note: special error codes have been defined to delineate infrastructure
++ failure cases for quicker resolution.
++
++ The expected behavior for the LLDD's vport_create() function is:
++ - Validate Infrastructure:
++ - If the driver or adapter cannot support another vport, whether
++ due to improper firmware, (a lie about) max_npiv, or a lack of
++ some other resource - return VPCERR_UNSUPPORTED.
++ - If the driver validates the WWN's against those already active on
++ the adapter and detects an overlap - return VPCERR_BAD_WWN.
++ - If the driver detects the topology is loop, non-fabric, or the
++ FLOGI did not support NPIV - return VPCERR_NO_FABRIC_SUPP.
++ - Allocate data structures. If errors are encountered, such as out
++ of memory conditions, return the respective negative Exxx error code.
++ - If the role is FCP Initiator, the LLDD is to :
++ - Call scsi_host_alloc() to allocate a scsi_host for the vport.
++ - Call scsi_add_host(new_shost, &vport->dev) to start the scsi_host
++ and bind it as a child of the vport device.
++ - Initializes the fc_host attribute values.
++ - Kick of further vport state transitions based on the disable flag and
++ link state - and return success (zero).
++
++ LLDD Implementers Notes:
++ - It is suggested that there be a different fc_function_templates for
++ the physical port and the virtual port. The physical port's template
++ would have the vport_create, vport_delete, and vport_disable functions,
++ while the vports would not.
++ - It is suggested that there be different scsi_host_templates
++ for the physical port and virtual port. Likely, there are driver
++ attributes, embedded into the scsi_host_template, that are applicable
++ for the physical port only (link speed, topology setting, etc). This
++ ensures that the attributes are applicable to the respective scsi_host.
++
++
++Vport Disable/Enable:
++
++ The LLDD vport_disable() syntax is:
++
++ int vport_disable(struct fc_vport *vport, bool disable)
++
++ where:
++ vport: Is vport to to be enabled or disabled
++ disable: If "true", the vport is to be disabled.
++ If "false", the vport is to be enabled.
++
++ When a request is made to change the disabled state on a vport, the
++ transport will validate the request against the existing vport state.
++ If the request is to disable and the vport is already disabled, the
++ request will fail. Similarly, if the request is to enable, and the
++ vport is not in a disabled state, the request will fail. If the request
++ is valid for the vport state, the transport will call the LLDD to
++ change the vport's state.
++
++ Within the LLDD, if a vport is disabled, it remains instantiated with
++ the kernel and LLDD, but it is not active or visible on the FC link in
++ any way. (see Vport Creation and the 2 part instantiation discussion).
++ The vport will remain in this state until it is deleted or re-enabled.
++ When enabling a vport, the LLDD reinstantiates the vport on the FC
++ link - essentially restarting the LLDD statemachine (see Vport States
++ above).
++
++
++Vport Deletion:
++
++ The LLDD vport_delete() syntax is:
++
++ int vport_delete(struct fc_vport *vport)
++
++ where:
++ vport: Is vport to delete
++
++ When a request is made to delete a vport (via sgio/netlink, or via the
++ fc_host or fc_vport vport_delete attributes), the transport will call
++ the LLDD to terminate the vport on the FC link, and teardown all other
++ datastructures and references. If the LLDD completes successfully,
++ the transport will teardown the vport objects and complete the vport
++ removal. If the LLDD delete request fails, the vport object will remain,
++ but will be in an indeterminate state.
++
++ Within the LLDD, the normal code paths for a scsi_host teardown should
++ be followed. E.g. If the vport has a FCP Initiator role, the LLDD
++ will call fc_remove_host() for the vports scsi_host, followed by
++ scsi_remove_host() and scsi_host_put() for the vports scsi_host.
++
++
++Other:
++ fc_host port_type attribute:
++ There is a new fc_host port_type value - FC_PORTTYPE_NPIV. This value
++ must be set on all vport-based fc_hosts. Normally, on a physical port,
++ the port_type attribute would be set to NPORT, NLPORT, etc based on the
++ topology type and existence of the fabric. As this is not applicable to
++ a vport, it makes more sense to report the FC mechanism used to create
++ the vport.
++
++ Driver unload:
++ FC drivers are required to call fc_remove_host() prior to calling
++ scsi_remove_host(). This allows the fc_host to tear down all remote
++ ports prior the scsi_host being torn down. The fc_remove_host() call
++ was updated to remove all vports for the fc_host as well.
++
++
++Credits
++=======
++The following people have contributed to this document:
++
++
++
++
++
++
++James Smart
++james.smart@emulex.com
++
+diff -Nurb linux-2.6.22-570/Documentation/sysctl/kernel.txt linux-2.6.22-591/Documentation/sysctl/kernel.txt
+--- linux-2.6.22-570/Documentation/sysctl/kernel.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/Documentation/sysctl/kernel.txt 2007-12-21 15:36:11.000000000 -0500
+@@ -29,6 +29,7 @@
+ - java-interpreter [ binfmt_java, obsolete ]
+ - kstack_depth_to_print [ X86 only ]
+ - l2cr [ PPC only ]
++- mmap_min_addr
+ - modprobe ==> Documentation/kmod.txt
+ - msgmax
+ - msgmnb
+@@ -178,6 +179,19 @@
+
+ ==============================================================
+
++mmap_min_addr
++
++This file indicates the amount of address space which a user process will be
++restricted from mmaping. Since kernel null dereference bugs could
++accidentally operate based on the information in the first couple of pages of
++memory userspace processes should not be allowed to write to them. By default
++this value is set to 0 and no protections will be enforced by the security
++module. Setting this value to something like 64k will allow the vast majority
++of applications to work correctly and provide defense in depth against future
++potential kernel bugs.
++
++==============================================================
++
+ osrelease, ostype & version:
+
+ # cat osrelease
+diff -Nurb linux-2.6.22-570/Documentation/sysfs-rules.txt linux-2.6.22-591/Documentation/sysfs-rules.txt
+--- linux-2.6.22-570/Documentation/sysfs-rules.txt 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/Documentation/sysfs-rules.txt 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,166 @@
++Rules on how to access information in the Linux kernel sysfs
++
++The kernel exported sysfs exports internal kernel implementation-details
++and depends on internal kernel structures and layout. It is agreed upon
++by the kernel developers that the Linux kernel does not provide a stable
++internal API. As sysfs is a direct export of kernel internal
++structures, the sysfs interface can not provide a stable interface eighter,
++it may always change along with internal kernel changes.
++
++To minimize the risk of breaking users of sysfs, which are in most cases
++low-level userspace applications, with a new kernel release, the users
++of sysfs must follow some rules to use an as abstract-as-possible way to
++access this filesystem. The current udev and HAL programs already
++implement this and users are encouraged to plug, if possible, into the
++abstractions these programs provide instead of accessing sysfs
++directly.
++
++But if you really do want or need to access sysfs directly, please follow
++the following rules and then your programs should work with future
++versions of the sysfs interface.
++
++- Do not use libsysfs
++ It makes assumptions about sysfs which are not true. Its API does not
++ offer any abstraction, it exposes all the kernel driver-core
++ implementation details in its own API. Therefore it is not better than
++ reading directories and opening the files yourself.
++ Also, it is not actively maintained, in the sense of reflecting the
++ current kernel-development. The goal of providing a stable interface
++ to sysfs has failed, it causes more problems, than it solves. It
++ violates many of the rules in this document.
++
++- sysfs is always at /sys
++ Parsing /proc/mounts is a waste of time. Other mount points are a
++ system configuration bug you should not try to solve. For test cases,
++ possibly support a SYSFS_PATH environment variable to overwrite the
++ applications behavior, but never try to search for sysfs. Never try
++ to mount it, if you are not an early boot script.
++
++- devices are only "devices"
++ There is no such thing like class-, bus-, physical devices,
++ interfaces, and such that you can rely on in userspace. Everything is
++ just simply a "device". Class-, bus-, physical, ... types are just
++ kernel implementation details, which should not be expected by
++ applications that look for devices in sysfs.
++
++ The properties of a device are:
++ o devpath (/devices/pci0000:00/0000:00:1d.1/usb2/2-2/2-2:1.0)
++ - identical to the DEVPATH value in the event sent from the kernel
++ at device creation and removal
++ - the unique key to the device at that point in time
++ - the kernels path to the device-directory without the leading
++ /sys, and always starting with with a slash
++ - all elements of a devpath must be real directories. Symlinks
++ pointing to /sys/devices must always be resolved to their real
++ target, and the target path must be used to access the device.
++ That way the devpath to the device matches the devpath of the
++ kernel used at event time.
++ - using or exposing symlink values as elements in a devpath string
++ is a bug in the application
++
++ o kernel name (sda, tty, 0000:00:1f.2, ...)
++ - a directory name, identical to the last element of the devpath
++ - applications need to handle spaces and characters like '!' in
++ the name
++
++ o subsystem (block, tty, pci, ...)
++ - simple string, never a path or a link
++ - retrieved by reading the "subsystem"-link and using only the
++ last element of the target path
++
++ o driver (tg3, ata_piix, uhci_hcd)
++ - a simple string, which may contain spaces, never a path or a
++ link
++ - it is retrieved by reading the "driver"-link and using only the
++ last element of the target path
++ - devices which do not have "driver"-link, just do not have a
++ driver; copying the driver value in a child device context, is a
++ bug in the application
++
++ o attributes
++ - the files in the device directory or files below a subdirectories
++ of the same device directory
++ - accessing attributes reached by a symlink pointing to another device,
++ like the "device"-link, is a bug in the application
++
++ Everything else is just a kernel driver-core implementation detail,
++ that should not be assumed to be stable across kernel releases.
++
++- Properties of parent devices never belong into a child device.
++ Always look at the parent devices themselves for determining device
++ context properties. If the device 'eth0' or 'sda' does not have a
++ "driver"-link, then this device does not have a driver. Its value is empty.
++ Never copy any property of the parent-device into a child-device. Parent
++ device-properties may change dynamically without any notice to the
++ child device.
++
++- Hierarchy in a single device-tree
++ There is only one valid place in sysfs where hierarchy can be examined
++ and this is below: /sys/devices.
++ It is planned, that all device directories will end up in the tree
++ below this directory.
++
++- Classification by subsystem
++ There are currently three places for classification of devices:
++ /sys/block, /sys/class and /sys/bus. It is planned that these will
++ not contain any device-directories themselves, but only flat lists of
++ symlinks pointing to the unified /sys/devices tree.
++ All three places have completely different rules on how to access
++ device information. It is planned to merge all three
++ classification-directories into one place at /sys/subsystem,
++ following the layout of the bus-directories. All buses and
++ classes, including the converted block-subsystem, will show up
++ there.
++ The devices belonging to a subsystem will create a symlink in the
++ "devices" directory at /sys/subsystem/<name>/devices.
++
++ If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be
++ ignored. If it does not exist, you have always to scan all three
++ places, as the kernel is free to move a subsystem from one place to
++ the other, as long as the devices are still reachable by the same
++ subsystem name.
++
++ Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or
++ /sys/block and /sys/class/block are not interchangeable, is a bug in
++ the application.
++
++- Block
++ The converted block-subsystem at /sys/class/block, or
++ /sys/subsystem/block will contain the links for disks and partitions
++ at the same level, never in a hierarchy. Assuming the block-subsytem to
++ contain only disks and not partition-devices in the same flat list is
++ a bug in the application.
++
++- "device"-link and <subsystem>:<kernel name>-links
++ Never depend on the "device"-link. The "device"-link is a workaround
++ for the old layout, where class-devices are not created in
++ /sys/devices/ like the bus-devices. If the link-resolving of a
++ device-directory does not end in /sys/devices/, you can use the
++ "device"-link to find the parent devices in /sys/devices/. That is the
++ single valid use of the "device"-link, it must never appear in any
++ path as an element. Assuming the existence of the "device"-link for
++ a device in /sys/devices/ is a bug in the application.
++ Accessing /sys/class/net/eth0/device is a bug in the application.
++
++ Never depend on the class-specific links back to the /sys/class
++ directory. These links are also a workaround for the design mistake
++ that class-devices are not created in /sys/devices. If a device
++ directory does not contain directories for child devices, these links
++ may be used to find the child devices in /sys/class. That is the single
++ valid use of these links, they must never appear in any path as an
++ element. Assuming the existence of these links for devices which are
++ real child device directories in the /sys/devices tree, is a bug in
++ the application.
++
++ It is planned to remove all these links when when all class-device
++ directories live in /sys/devices.
++
++- Position of devices along device chain can change.
++ Never depend on a specific parent device position in the devpath,
++ or the chain of parent devices. The kernel is free to insert devices into
++ the chain. You must always request the parent device you are looking for
++ by its subsystem value. You need to walk up the chain until you find
++ the device that matches the expected subsystem. Depending on a specific
++ position of a parent device, or exposing relative paths, using "../" to
++ access the chain of parents, is a bug in the application.
++
+diff -Nurb linux-2.6.22-570/MAINTAINERS linux-2.6.22-591/MAINTAINERS
+--- linux-2.6.22-570/MAINTAINERS 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/MAINTAINERS 2007-12-21 15:36:11.000000000 -0500
+@@ -232,15 +232,15 @@
+ S: Supported
+
+ ACPI BATTERY DRIVERS
+-P: Vladimir P. Lebedev
+-M: vladimir.p.lebedev@intel.com
++P: Alexey Starikovskiy
++M: astarikovskiy@suse.de
+ L: linux-acpi@vger.kernel.org
+ W: http://acpi.sourceforge.net/
+ S: Supported
+
+ ACPI EC DRIVER
+ P: Alexey Starikovskiy
+-M: alexey.y.starikovskiy@linux.intel.com
++M: astarikovskiy@suse.de
+ L: linux-acpi@vger.kernel.org
+ W: http://acpi.sourceforge.net/
+ S: Supported
+@@ -2127,6 +2127,15 @@
+ L: kexec@lists.infradead.org
+ S: Maintained
+
++KGDB
++P: Jason Wessel
++M: jason.wessel@windriver.com
++P: Amit S. Kale
++M: amitkale@linsyssoft.com
++W: http://sourceforge.net/projects/kgdb
++L: kgdb-bugreport@lists.sourceforge.net
++S: Maintained
++
+ KPROBES
+ P: Prasanna S Panchamukhi
+ M: prasanna@in.ibm.com
+@@ -3593,6 +3602,15 @@
+ W: http://www.kernel.dk
+ S: Maintained
+
++UNIONFS
++P: Erez Zadok
++M: ezk@cs.sunysb.edu
++P: Josef "Jeff" Sipek
++M: jsipek@cs.sunysb.edu
++L: unionfs@filesystems.org
++W: http://unionfs.filesystems.org
++S: Maintained
++
+ USB ACM DRIVER
+ P: Oliver Neukum
+ M: oliver@neukum.name
+diff -Nurb linux-2.6.22-570/Makefile linux-2.6.22-591/Makefile
+--- linux-2.6.22-570/Makefile 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/Makefile 2007-12-21 15:36:16.000000000 -0500
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 22
+-EXTRAVERSION = .14-vs2.3.0.29
++EXTRAVERSION = -prep
+ NAME = Holy Dancing Manatees, Batman!
+
+ # *DOCUMENTATION*
+@@ -496,6 +496,11 @@
+ CFLAGS += -fomit-frame-pointer
+ endif
+
++ifdef CONFIG_UNWIND_INFO
++CFLAGS += -fasynchronous-unwind-tables
++LDFLAGS_vmlinux += --eh-frame-hdr
++endif
++
+ ifdef CONFIG_DEBUG_INFO
+ CFLAGS += -g
+ endif
+diff -Nurb linux-2.6.22-570/Makefile.orig linux-2.6.22-591/Makefile.orig
+--- linux-2.6.22-570/Makefile.orig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/Makefile.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,1493 +0,0 @@
+-VERSION = 2
+-PATCHLEVEL = 6
+-SUBLEVEL = 22
+-EXTRAVERSION = .14
+-NAME = Holy Dancing Manatees, Batman!
+-
+-# *DOCUMENTATION*
+-# To see a list of typical targets execute "make help"
+-# More info can be located in ./README
+-# Comments in this file are targeted only to the developer, do not
+-# expect to learn how to build the kernel reading this file.
+-
+-# Do not:
+-# o use make's built-in rules and variables
+-# (this increases performance and avoid hard-to-debug behavour);
+-# o print "Entering directory ...";
+-MAKEFLAGS += -rR --no-print-directory
+-
+-# We are using a recursive build, so we need to do a little thinking
+-# to get the ordering right.
+-#
+-# Most importantly: sub-Makefiles should only ever modify files in
+-# their own directory. If in some directory we have a dependency on
+-# a file in another dir (which doesn't happen often, but it's often
+-# unavoidable when linking the built-in.o targets which finally
+-# turn into vmlinux), we will call a sub make in that other dir, and
+-# after that we are sure that everything which is in that other dir
+-# is now up to date.
+-#
+-# The only cases where we need to modify files which have global
+-# effects are thus separated out and done before the recursive
+-# descending is started. They are now explicitly listed as the
+-# prepare rule.
+-
+-# To put more focus on warnings, be less verbose as default
+-# Use 'make V=1' to see the full commands
+-
+-ifdef V
+- ifeq ("$(origin V)", "command line")
+- KBUILD_VERBOSE = $(V)
+- endif
+-endif
+-ifndef KBUILD_VERBOSE
+- KBUILD_VERBOSE = 0
+-endif
+-
+-# Call a source code checker (by default, "sparse") as part of the
+-# C compilation.
+-#
+-# Use 'make C=1' to enable checking of only re-compiled files.
+-# Use 'make C=2' to enable checking of *all* source files, regardless
+-# of whether they are re-compiled or not.
+-#
+-# See the file "Documentation/sparse.txt" for more details, including
+-# where to get the "sparse" utility.
+-
+-ifdef C
+- ifeq ("$(origin C)", "command line")
+- KBUILD_CHECKSRC = $(C)
+- endif
+-endif
+-ifndef KBUILD_CHECKSRC
+- KBUILD_CHECKSRC = 0
+-endif
+-
+-# Use make M=dir to specify directory of external module to build
+-# Old syntax make ... SUBDIRS=$PWD is still supported
+-# Setting the environment variable KBUILD_EXTMOD take precedence
+-ifdef SUBDIRS
+- KBUILD_EXTMOD ?= $(SUBDIRS)
+-endif
+-ifdef M
+- ifeq ("$(origin M)", "command line")
+- KBUILD_EXTMOD := $(M)
+- endif
+-endif
+-
+-
+-# kbuild supports saving output files in a separate directory.
+-# To locate output files in a separate directory two syntaxes are supported.
+-# In both cases the working directory must be the root of the kernel src.
+-# 1) O=
+-# Use "make O=dir/to/store/output/files/"
+-#
+-# 2) Set KBUILD_OUTPUT
+-# Set the environment variable KBUILD_OUTPUT to point to the directory
+-# where the output files shall be placed.
+-# export KBUILD_OUTPUT=dir/to/store/output/files/
+-# make
+-#
+-# The O= assignment takes precedence over the KBUILD_OUTPUT environment
+-# variable.
+-
+-
+-# KBUILD_SRC is set on invocation of make in OBJ directory
+-# KBUILD_SRC is not intended to be used by the regular user (for now)
+-ifeq ($(KBUILD_SRC),)
+-
+-# OK, Make called in directory where kernel src resides
+-# Do we want to locate output files in a separate directory?
+-ifdef O
+- ifeq ("$(origin O)", "command line")
+- KBUILD_OUTPUT := $(O)
+- endif
+-endif
+-
+-# That's our default target when none is given on the command line
+-PHONY := _all
+-_all:
+-
+-ifneq ($(KBUILD_OUTPUT),)
+-# Invoke a second make in the output directory, passing relevant variables
+-# check that the output directory actually exists
+-saved-output := $(KBUILD_OUTPUT)
+-KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd)
+-$(if $(KBUILD_OUTPUT),, \
+- $(error output directory "$(saved-output)" does not exist))
+-
+-PHONY += $(MAKECMDGOALS)
+-
+-$(filter-out _all,$(MAKECMDGOALS)) _all:
+- $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
+- KBUILD_SRC=$(CURDIR) \
+- KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@
+-
+-# Leave processing to above invocation of make
+-skip-makefile := 1
+-endif # ifneq ($(KBUILD_OUTPUT),)
+-endif # ifeq ($(KBUILD_SRC),)
+-
+-# We process the rest of the Makefile if this is the final invocation of make
+-ifeq ($(skip-makefile),)
+-
+-# If building an external module we do not care about the all: rule
+-# but instead _all depend on modules
+-PHONY += all
+-ifeq ($(KBUILD_EXTMOD),)
+-_all: all
+-else
+-_all: modules
+-endif
+-
+-srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR))
+-TOPDIR := $(srctree)
+-# FIXME - TOPDIR is obsolete, use srctree/objtree
+-objtree := $(CURDIR)
+-src := $(srctree)
+-obj := $(objtree)
+-
+-VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
+-
+-export srctree objtree VPATH TOPDIR
+-
+-
+-# SUBARCH tells the usermode build what the underlying arch is. That is set
+-# first, and if a usermode build is happening, the "ARCH=um" on the command
+-# line overrides the setting of ARCH below. If a native build is happening,
+-# then ARCH is assigned, getting whatever value it gets normally, and
+-# SUBARCH is subsequently ignored.
+-
+-SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
+- -e s/arm.*/arm/ -e s/sa110/arm/ \
+- -e s/s390x/s390/ -e s/parisc64/parisc/ \
+- -e s/ppc.*/powerpc/ -e s/mips.*/mips/ )
+-
+-# Cross compiling and selecting different set of gcc/bin-utils
+-# ---------------------------------------------------------------------------
+-#
+-# When performing cross compilation for other architectures ARCH shall be set
+-# to the target architecture. (See arch/* for the possibilities).
+-# ARCH can be set during invocation of make:
+-# make ARCH=ia64
+-# Another way is to have ARCH set in the environment.
+-# The default ARCH is the host where make is executed.
+-
+-# CROSS_COMPILE specify the prefix used for all executables used
+-# during compilation. Only gcc and related bin-utils executables
+-# are prefixed with $(CROSS_COMPILE).
+-# CROSS_COMPILE can be set on the command line
+-# make CROSS_COMPILE=ia64-linux-
+-# Alternatively CROSS_COMPILE can be set in the environment.
+-# Default value for CROSS_COMPILE is not to prefix executables
+-# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
+-
+-ARCH ?= $(SUBARCH)
+-CROSS_COMPILE ?=
+-
+-# Architecture as present in compile.h
+-UTS_MACHINE := $(ARCH)
+-
+-KCONFIG_CONFIG ?= .config
+-
+-# SHELL used by kbuild
+-CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+- else if [ -x /bin/bash ]; then echo /bin/bash; \
+- else echo sh; fi ; fi)
+-
+-HOSTCC = gcc
+-HOSTCXX = g++
+-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+-HOSTCXXFLAGS = -O2
+-
+-# Decide whether to build built-in, modular, or both.
+-# Normally, just do built-in.
+-
+-KBUILD_MODULES :=
+-KBUILD_BUILTIN := 1
+-
+-# If we have only "make modules", don't compile built-in objects.
+-# When we're building modules with modversions, we need to consider
+-# the built-in objects during the descend as well, in order to
+-# make sure the checksums are up to date before we record them.
+-
+-ifeq ($(MAKECMDGOALS),modules)
+- KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
+-endif
+-
+-# If we have "make <whatever> modules", compile modules
+-# in addition to whatever we do anyway.
+-# Just "make" or "make all" shall build modules as well
+-
+-ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
+- KBUILD_MODULES := 1
+-endif
+-
+-ifeq ($(MAKECMDGOALS),)
+- KBUILD_MODULES := 1
+-endif
+-
+-export KBUILD_MODULES KBUILD_BUILTIN
+-export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD
+-
+-# Beautify output
+-# ---------------------------------------------------------------------------
+-#
+-# Normally, we echo the whole command before executing it. By making
+-# that echo $($(quiet)$(cmd)), we now have the possibility to set
+-# $(quiet) to choose other forms of output instead, e.g.
+-#
+-# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@
+-# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+-#
+-# If $(quiet) is empty, the whole command will be printed.
+-# If it is set to "quiet_", only the short version will be printed.
+-# If it is set to "silent_", nothing will be printed at all, since
+-# the variable $(silent_cmd_cc_o_c) doesn't exist.
+-#
+-# A simple variant is to prefix commands with $(Q) - that's useful
+-# for commands that shall be hidden in non-verbose mode.
+-#
+-# $(Q)ln $@ :<
+-#
+-# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
+-# If KBUILD_VERBOSE equals 1 then the above command is displayed.
+-
+-ifeq ($(KBUILD_VERBOSE),1)
+- quiet =
+- Q =
+-else
+- quiet=quiet_
+- Q = @
+-endif
+-
+-# If the user is running make -s (silent mode), suppress echoing of
+-# commands
+-
+-ifneq ($(findstring s,$(MAKEFLAGS)),)
+- quiet=silent_
+-endif
+-
+-export quiet Q KBUILD_VERBOSE
+-
+-
+-# Look for make include files relative to root of kernel src
+-MAKEFLAGS += --include-dir=$(srctree)
+-
+-# We need some generic definitions.
+-include $(srctree)/scripts/Kbuild.include
+-
+-# Make variables (CC, etc...)
+-
+-AS = $(CROSS_COMPILE)as
+-LD = $(CROSS_COMPILE)ld
+-CC = $(CROSS_COMPILE)gcc
+-CPP = $(CC) -E
+-AR = $(CROSS_COMPILE)ar
+-NM = $(CROSS_COMPILE)nm
+-STRIP = $(CROSS_COMPILE)strip
+-OBJCOPY = $(CROSS_COMPILE)objcopy
+-OBJDUMP = $(CROSS_COMPILE)objdump
+-AWK = awk
+-GENKSYMS = scripts/genksyms/genksyms
+-DEPMOD = /sbin/depmod
+-KALLSYMS = scripts/kallsyms
+-PERL = perl
+-CHECK = sparse
+-
+-CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
+-MODFLAGS = -DMODULE
+-CFLAGS_MODULE = $(MODFLAGS)
+-AFLAGS_MODULE = $(MODFLAGS)
+-LDFLAGS_MODULE = -r
+-CFLAGS_KERNEL =
+-AFLAGS_KERNEL =
+-
+-
+-# Use LINUXINCLUDE when you must reference the include/ directory.
+-# Needed to be compatible with the O= option
+-LINUXINCLUDE := -Iinclude \
+- $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \
+- -include include/linux/autoconf.h
+-
+-CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE)
+-
+-CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+- -fno-strict-aliasing -fno-common
+-AFLAGS := -D__ASSEMBLY__
+-
+-# Read KERNELRELEASE from include/config/kernel.release (if it exists)
+-KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
+-KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
+-
+-export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
+-export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
+-export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
+-export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
+-
+-export CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
+-export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+-export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
+-
+-# When compiling out-of-tree modules, put MODVERDIR in the module
+-# tree rather than in the kernel tree. The kernel tree might
+-# even be read-only.
+-export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions
+-
+-# Files to ignore in find ... statements
+-
+-RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o
+-export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exclude CVS --exclude .pc --exclude .hg --exclude .git
+-
+-# ===========================================================================
+-# Rules shared between *config targets and build targets
+-
+-# Basic helpers built in scripts/
+-PHONY += scripts_basic
+-scripts_basic:
+- $(Q)$(MAKE) $(build)=scripts/basic
+-
+-# To avoid any implicit rule to kick in, define an empty command.
+-scripts/basic/%: scripts_basic ;
+-
+-PHONY += outputmakefile
+-# outputmakefile generates a Makefile in the output directory, if using a
+-# separate output directory. This allows convenient use of make in the
+-# output directory.
+-outputmakefile:
+-ifneq ($(KBUILD_SRC),)
+- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \
+- $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL)
+-endif
+-
+-# To make sure we do not include .config for any of the *config targets
+-# catch them early, and hand them over to scripts/kconfig/Makefile
+-# It is allowed to specify more targets when calling make, including
+-# mixing *config targets and build targets.
+-# For example 'make oldconfig all'.
+-# Detect when mixed targets is specified, and make a second invocation
+-# of make so .config is not included in this case either (for *config).
+-
+-no-dot-config-targets := clean mrproper distclean \
+- cscope TAGS tags help %docs check% \
+- include/linux/version.h headers_% \
+- kernelrelease kernelversion
+-
+-config-targets := 0
+-mixed-targets := 0
+-dot-config := 1
+-
+-ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),)
+- ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),)
+- dot-config := 0
+- endif
+-endif
+-
+-ifeq ($(KBUILD_EXTMOD),)
+- ifneq ($(filter config %config,$(MAKECMDGOALS)),)
+- config-targets := 1
+- ifneq ($(filter-out config %config,$(MAKECMDGOALS)),)
+- mixed-targets := 1
+- endif
+- endif
+-endif
+-
+-ifeq ($(mixed-targets),1)
+-# ===========================================================================
+-# We're called with mixed targets (*config and build targets).
+-# Handle them one by one.
+-
+-%:: FORCE
+- $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@
+-
+-else
+-ifeq ($(config-targets),1)
+-# ===========================================================================
+-# *config targets only - make sure prerequisites are updated, and descend
+-# in scripts/kconfig to make the *config target
+-
+-# Read arch specific Makefile to set KBUILD_DEFCONFIG as needed.
+-# KBUILD_DEFCONFIG may point out an alternative default configuration
+-# used for 'make defconfig'
+-include $(srctree)/arch/$(ARCH)/Makefile
+-export KBUILD_DEFCONFIG
+-
+-config %config: scripts_basic outputmakefile FORCE
+- $(Q)mkdir -p include/linux include/config
+- $(Q)$(MAKE) $(build)=scripts/kconfig $@
+-
+-else
+-# ===========================================================================
+-# Build targets only - this includes vmlinux, arch specific targets, clean
+-# targets and others. In general all targets except *config targets.
+-
+-ifeq ($(KBUILD_EXTMOD),)
+-# Additional helpers built in scripts/
+-# Carefully list dependencies so we do not try to build scripts twice
+-# in parallel
+-PHONY += scripts
+-scripts: scripts_basic include/config/auto.conf
+- $(Q)$(MAKE) $(build)=$(@)
+-
+-# Objects we will link into vmlinux / subdirs we need to visit
+-init-y := init/
+-drivers-y := drivers/ sound/
+-net-y := net/
+-libs-y := lib/
+-core-y := usr/
+-endif # KBUILD_EXTMOD
+-
+-ifeq ($(dot-config),1)
+-# Read in config
+--include include/config/auto.conf
+-
+-ifeq ($(KBUILD_EXTMOD),)
+-# Read in dependencies to all Kconfig* files, make sure to run
+-# oldconfig if changes are detected.
+--include include/config/auto.conf.cmd
+-
+-# To avoid any implicit rule to kick in, define an empty command
+-$(KCONFIG_CONFIG) include/config/auto.conf.cmd: ;
+-
+-# If .config is newer than include/config/auto.conf, someone tinkered
+-# with it and forgot to run make oldconfig.
+-# if auto.conf.cmd is missing then we are probably in a cleaned tree so
+-# we execute the config step to be sure to catch updated Kconfig files
+-include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd
+- $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig
+-else
+-# external modules needs include/linux/autoconf.h and include/config/auto.conf
+-# but do not care if they are up-to-date. Use auto.conf to trigger the test
+-PHONY += include/config/auto.conf
+-
+-include/config/auto.conf:
+- $(Q)test -e include/linux/autoconf.h -a -e $@ || ( \
+- echo; \
+- echo " ERROR: Kernel configuration is invalid."; \
+- echo " include/linux/autoconf.h or $@ are missing."; \
+- echo " Run 'make oldconfig && make prepare' on kernel src to fix it."; \
+- echo; \
+- /bin/false)
+-
+-endif # KBUILD_EXTMOD
+-
+-else
+-# Dummy target needed, because used as prerequisite
+-include/config/auto.conf: ;
+-endif # $(dot-config)
+-
+-# The all: target is the default when no target is given on the
+-# command line.
+-# This allow a user to issue only 'make' to build a kernel including modules
+-# Defaults vmlinux but it is usually overridden in the arch makefile
+-all: vmlinux
+-
+-ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+-CFLAGS += -Os
+-else
+-CFLAGS += -O2
+-endif
+-
+-include $(srctree)/arch/$(ARCH)/Makefile
+-
+-ifdef CONFIG_FRAME_POINTER
+-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+-else
+-CFLAGS += -fomit-frame-pointer
+-endif
+-
+-ifdef CONFIG_DEBUG_INFO
+-CFLAGS += -g
+-endif
+-
+-# Force gcc to behave correct even for buggy distributions
+-CFLAGS += $(call cc-option, -fno-stack-protector)
+-
+-# arch Makefile may override CC so keep this after arch Makefile is included
+-NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
+-CHECKFLAGS += $(NOSTDINC_FLAGS)
+-
+-# warn about C99 declaration after statement
+-CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
+-
+-# disable pointer signed / unsigned warnings in gcc 4.0
+-CFLAGS += $(call cc-option,-Wno-pointer-sign,)
+-
+-# Default kernel image to build when no specific target is given.
+-# KBUILD_IMAGE may be overruled on the command line or
+-# set in the environment
+-# Also any assignments in arch/$(ARCH)/Makefile take precedence over
+-# this default value
+-export KBUILD_IMAGE ?= vmlinux
+-
+-#
+-# INSTALL_PATH specifies where to place the updated kernel and system map
+-# images. Default is /boot, but you can set it to other values
+-export INSTALL_PATH ?= /boot
+-
+-#
+-# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+-# relocations required by build roots. This is not defined in the
+-# makefile but the argument can be passed to make if needed.
+-#
+-
+-MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+-export MODLIB
+-
+-#
+-# INSTALL_MOD_STRIP, if defined, will cause modules to be
+-# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then
+-# the default option --strip-debug will be used. Otherwise,
+-# INSTALL_MOD_STRIP will used as the options to the strip command.
+-
+-ifdef INSTALL_MOD_STRIP
+-ifeq ($(INSTALL_MOD_STRIP),1)
+-mod_strip_cmd = $(STRIP) --strip-debug
+-else
+-mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP)
+-endif # INSTALL_MOD_STRIP=1
+-else
+-mod_strip_cmd = true
+-endif # INSTALL_MOD_STRIP
+-export mod_strip_cmd
+-
+-
+-ifeq ($(KBUILD_EXTMOD),)
+-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+-
+-vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
+- $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
+- $(net-y) $(net-m) $(libs-y) $(libs-m)))
+-
+-vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
+- $(init-n) $(init-) \
+- $(core-n) $(core-) $(drivers-n) $(drivers-) \
+- $(net-n) $(net-) $(libs-n) $(libs-))))
+-
+-init-y := $(patsubst %/, %/built-in.o, $(init-y))
+-core-y := $(patsubst %/, %/built-in.o, $(core-y))
+-drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y))
+-net-y := $(patsubst %/, %/built-in.o, $(net-y))
+-libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
+-libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
+-libs-y := $(libs-y1) $(libs-y2)
+-
+-# Build vmlinux
+-# ---------------------------------------------------------------------------
+-# vmlinux is built from the objects selected by $(vmlinux-init) and
+-# $(vmlinux-main). Most are built-in.o files from top-level directories
+-# in the kernel tree, others are specified in arch/$(ARCH)/Makefile.
+-# Ordering when linking is important, and $(vmlinux-init) must be first.
+-#
+-# vmlinux
+-# ^
+-# |
+-# +-< $(vmlinux-init)
+-# | +--< init/version.o + more
+-# |
+-# +--< $(vmlinux-main)
+-# | +--< driver/built-in.o mm/built-in.o + more
+-# |
+-# +-< kallsyms.o (see description in CONFIG_KALLSYMS section)
+-#
+-# vmlinux version (uname -v) cannot be updated during normal
+-# descending-into-subdirs phase since we do not yet know if we need to
+-# update vmlinux.
+-# Therefore this step is delayed until just before final link of vmlinux -
+-# except in the kallsyms case where it is done just before adding the
+-# symbols to the kernel.
+-#
+-# System.map is generated to document addresses of all kernel symbols
+-
+-vmlinux-init := $(head-y) $(init-y)
+-vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y)
+-vmlinux-all := $(vmlinux-init) $(vmlinux-main)
+-vmlinux-lds := arch/$(ARCH)/kernel/vmlinux.lds
+-export KBUILD_VMLINUX_OBJS := $(vmlinux-all)
+-
+-# Rule to link vmlinux - also used during CONFIG_KALLSYMS
+-# May be overridden by arch/$(ARCH)/Makefile
+-quiet_cmd_vmlinux__ ?= LD $@
+- cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \
+- -T $(vmlinux-lds) $(vmlinux-init) \
+- --start-group $(vmlinux-main) --end-group \
+- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^)
+-
+-# Generate new vmlinux version
+-quiet_cmd_vmlinux_version = GEN .version
+- cmd_vmlinux_version = set -e; \
+- if [ ! -r .version ]; then \
+- rm -f .version; \
+- echo 1 >.version; \
+- else \
+- mv .version .old_version; \
+- expr 0$$(cat .old_version) + 1 >.version; \
+- fi; \
+- $(MAKE) $(build)=init
+-
+-# Generate System.map
+-quiet_cmd_sysmap = SYSMAP
+- cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap
+-
+-# Link of vmlinux
+-# If CONFIG_KALLSYMS is set .version is already updated
+-# Generate System.map and verify that the content is consistent
+-# Use + in front of the vmlinux_version rule to silent warning with make -j2
+-# First command is ':' to allow us to use + in front of the rule
+-define rule_vmlinux__
+- :
+- $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version))
+-
+- $(call cmd,vmlinux__)
+- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd
+-
+- $(Q)$(if $($(quiet)cmd_sysmap), \
+- echo ' $($(quiet)cmd_sysmap) System.map' &&) \
+- $(cmd_sysmap) $@ System.map; \
+- if [ $$? -ne 0 ]; then \
+- rm -f $@; \
+- /bin/false; \
+- fi;
+- $(verify_kallsyms)
+-endef
+-
+-
+-ifdef CONFIG_KALLSYMS
+-# Generate section listing all symbols and add it into vmlinux $(kallsyms.o)
+-# It's a three stage process:
+-# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is
+-# empty
+-# Running kallsyms on that gives us .tmp_kallsyms1.o with
+-# the right size - vmlinux version (uname -v) is updated during this step
+-# o .tmp_vmlinux2 now has a __kallsyms section of the right size,
+-# but due to the added section, some addresses have shifted.
+-# From here, we generate a correct .tmp_kallsyms2.o
+-# o The correct .tmp_kallsyms2.o is linked into the final vmlinux.
+-# o Verify that the System.map from vmlinux matches the map from
+-# .tmp_vmlinux2, just in case we did not generate kallsyms correctly.
+-# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using
+-# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a
+-# temporary bypass to allow the kernel to be built while the
+-# maintainers work out what went wrong with kallsyms.
+-
+-ifdef CONFIG_KALLSYMS_EXTRA_PASS
+-last_kallsyms := 3
+-else
+-last_kallsyms := 2
+-endif
+-
+-kallsyms.o := .tmp_kallsyms$(last_kallsyms).o
+-
+-define verify_kallsyms
+- $(Q)$(if $($(quiet)cmd_sysmap), \
+- echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \
+- $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map
+- $(Q)cmp -s System.map .tmp_System.map || \
+- (echo Inconsistent kallsyms data; \
+- echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \
+- rm .tmp_kallsyms* ; /bin/false )
+-endef
+-
+-# Update vmlinux version before link
+-# Use + in front of this rule to silent warning about make -j1
+-# First command is ':' to allow us to use + in front of this rule
+-cmd_ksym_ld = $(cmd_vmlinux__)
+-define rule_ksym_ld
+- :
+- +$(call cmd,vmlinux_version)
+- $(call cmd,vmlinux__)
+- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd
+-endef
+-
+-# Generate .S file with all kernel symbols
+-quiet_cmd_kallsyms = KSYM $@
+- cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \
+- $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@
+-
+-.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE
+- $(call if_changed_dep,as_o_S)
+-
+-.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS)
+- $(call cmd,kallsyms)
+-
+-# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version
+-.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE
+- $(call if_changed_rule,ksym_ld)
+-
+-.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE
+- $(call if_changed,vmlinux__)
+-
+-.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE
+- $(call if_changed,vmlinux__)
+-
+-# Needs to visit scripts/ before $(KALLSYMS) can be used.
+-$(KALLSYMS): scripts ;
+-
+-# Generate some data for debugging strange kallsyms problems
+-debug_kallsyms: .tmp_map$(last_kallsyms)
+-
+-.tmp_map%: .tmp_vmlinux% FORCE
+- ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@
+-
+-.tmp_map3: .tmp_map2
+-
+-.tmp_map2: .tmp_map1
+-
+-endif # ifdef CONFIG_KALLSYMS
+-
+-# vmlinux image - including updated kernel symbols
+-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
+-ifdef CONFIG_HEADERS_CHECK
+- $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
+-endif
+- $(call if_changed_rule,vmlinux__)
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@
+- $(Q)rm -f .old_version
+-
+-# The actual objects are generated when descending,
+-# make sure no implicit rule kicks in
+-$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ;
+-
+-# Handle descending into subdirectories listed in $(vmlinux-dirs)
+-# Preset locale variables to speed up the build process. Limit locale
+-# tweaks to this spot to avoid wrong language settings when running
+-# make menuconfig etc.
+-# Error messages still appears in the original language
+-
+-PHONY += $(vmlinux-dirs)
+-$(vmlinux-dirs): prepare scripts
+- $(Q)$(MAKE) $(build)=$@
+-
+-# Build the kernel release string
+-#
+-# The KERNELRELEASE value built here is stored in the file
+-# include/config/kernel.release, and is used when executing several
+-# make targets, such as "make install" or "make modules_install."
+-#
+-# The eventual kernel release string consists of the following fields,
+-# shown in a hierarchical format to show how smaller parts are concatenated
+-# to form the larger and final value, with values coming from places like
+-# the Makefile, kernel config options, make command line options and/or
+-# SCM tag information.
+-#
+-# $(KERNELVERSION)
+-# $(VERSION) eg, 2
+-# $(PATCHLEVEL) eg, 6
+-# $(SUBLEVEL) eg, 18
+-# $(EXTRAVERSION) eg, -rc6
+-# $(localver-full)
+-# $(localver)
+-# localversion* (files without backups, containing '~')
+-# $(CONFIG_LOCALVERSION) (from kernel config setting)
+-# $(localver-auto) (only if CONFIG_LOCALVERSION_AUTO is set)
+-# ./scripts/setlocalversion (SCM tag, if one exists)
+-# $(LOCALVERSION) (from make command line if provided)
+-#
+-# Note how the final $(localver-auto) string is included *only* if the
+-# kernel config option CONFIG_LOCALVERSION_AUTO is selected. Also, at the
+-# moment, only git is supported but other SCMs can edit the script
+-# scripts/setlocalversion and add the appropriate checks as needed.
+-
+-pattern = ".*/localversion[^~]*"
+-string = $(shell cat /dev/null \
+- `find $(objtree) $(srctree) -maxdepth 1 -regex $(pattern) | sort -u`)
+-
+-localver = $(subst $(space),, $(string) \
+- $(patsubst "%",%,$(CONFIG_LOCALVERSION)))
+-
+-# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called
+-# and if the SCM is know a tag from the SCM is appended.
+-# The appended tag is determined by the SCM used.
+-#
+-# Currently, only git is supported.
+-# Other SCMs can edit scripts/setlocalversion and add the appropriate
+-# checks as needed.
+-ifdef CONFIG_LOCALVERSION_AUTO
+- _localver-auto = $(shell $(CONFIG_SHELL) \
+- $(srctree)/scripts/setlocalversion $(srctree))
+- localver-auto = $(LOCALVERSION)$(_localver-auto)
+-endif
+-
+-localver-full = $(localver)$(localver-auto)
+-
+-# Store (new) KERNELRELASE string in include/config/kernel.release
+-kernelrelease = $(KERNELVERSION)$(localver-full)
+-include/config/kernel.release: include/config/auto.conf FORCE
+- $(Q)rm -f $@
+- $(Q)echo $(kernelrelease) > $@
+-
+-
+-# Things we need to do before we recursively start building the kernel
+-# or the modules are listed in "prepare".
+-# A multi level approach is used. prepareN is processed before prepareN-1.
+-# archprepare is used in arch Makefiles and when processed asm symlink,
+-# version.h and scripts_basic is processed / created.
+-
+-# Listed in dependency order
+-PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
+-
+-# prepare3 is used to check if we are building in a separate output directory,
+-# and if so do:
+-# 1) Check that make has not been executed in the kernel src $(srctree)
+-# 2) Create the include2 directory, used for the second asm symlink
+-prepare3: include/config/kernel.release
+-ifneq ($(KBUILD_SRC),)
+- @echo ' Using $(srctree) as source for kernel'
+- $(Q)if [ -f $(srctree)/.config -o -d $(srctree)/include/config ]; then \
+- echo " $(srctree) is not clean, please run 'make mrproper'";\
+- echo " in the '$(srctree)' directory.";\
+- /bin/false; \
+- fi;
+- $(Q)if [ ! -d include2 ]; then mkdir -p include2; fi;
+- $(Q)ln -fsn $(srctree)/include/asm-$(ARCH) include2/asm
+-endif
+-
+-# prepare2 creates a makefile if using a separate output directory
+-prepare2: prepare3 outputmakefile
+-
+-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+- include/asm include/config/auto.conf
+-ifneq ($(KBUILD_MODULES),)
+- $(Q)mkdir -p $(MODVERDIR)
+- $(Q)rm -f $(MODVERDIR)/*
+-endif
+-
+-archprepare: prepare1 scripts_basic
+-
+-prepare0: archprepare FORCE
+- $(Q)$(MAKE) $(build)=.
+- $(Q)$(MAKE) $(build)=. missing-syscalls
+-
+-# All the preparing..
+-prepare: prepare0
+-
+-# Leave this as default for preprocessing vmlinux.lds.S, which is now
+-# done in arch/$(ARCH)/kernel/Makefile
+-
+-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
+-
+-# FIXME: The asm symlink changes when $(ARCH) changes. That's
+-# hard to detect, but I suppose "make mrproper" is a good idea
+-# before switching between archs anyway.
+-
+-include/asm:
+- @echo ' SYMLINK $@ -> include/asm-$(ARCH)'
+- $(Q)if [ ! -d include ]; then mkdir -p include; fi;
+- @ln -fsn asm-$(ARCH) $@
+-
+-# Generate some files
+-# ---------------------------------------------------------------------------
+-
+-# KERNELRELEASE can change from a few different places, meaning version.h
+-# needs to be updated, so this check is forced on all builds
+-
+-uts_len := 64
+-define filechk_utsrelease.h
+- if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \
+- echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \
+- exit 1; \
+- fi; \
+- (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";)
+-endef
+-
+-define filechk_version.h
+- (echo \#define LINUX_VERSION_CODE $(shell \
+- expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
+- echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';)
+-endef
+-
+-include/linux/version.h: $(srctree)/Makefile FORCE
+- $(call filechk,version.h)
+-
+-include/linux/utsrelease.h: include/config/kernel.release FORCE
+- $(call filechk,utsrelease.h)
+-
+-# ---------------------------------------------------------------------------
+-
+-PHONY += depend dep
+-depend dep:
+- @echo '*** Warning: make $@ is unnecessary now.'
+-
+-# ---------------------------------------------------------------------------
+-# Kernel headers
+-INSTALL_HDR_PATH=$(objtree)/usr
+-export INSTALL_HDR_PATH
+-
+-HDRARCHES=$(filter-out generic,$(patsubst $(srctree)/include/asm-%/Kbuild,%,$(wildcard $(srctree)/include/asm-*/Kbuild)))
+-
+-PHONY += headers_install_all
+-headers_install_all: include/linux/version.h scripts_basic FORCE
+- $(Q)$(MAKE) $(build)=scripts scripts/unifdef
+- $(Q)for arch in $(HDRARCHES); do \
+- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch ;\
+- done
+-
+-PHONY += headers_install
+-headers_install: include/linux/version.h scripts_basic FORCE
+- @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
+- echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \
+- exit 1 ; fi
+- $(Q)$(MAKE) $(build)=scripts scripts/unifdef
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include
+-
+-PHONY += headers_check_all
+-headers_check_all: headers_install_all
+- $(Q)for arch in $(HDRARCHES); do \
+- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch HDRCHECK=1 ;\
+- done
+-
+-PHONY += headers_check
+-headers_check: headers_install
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1
+-
+-# ---------------------------------------------------------------------------
+-# Modules
+-
+-ifdef CONFIG_MODULES
+-
+-# By default, build modules as well
+-
+-all: modules
+-
+-# Build modules
+-
+-PHONY += modules
+-modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
+- @echo ' Building modules, stage 2.';
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+-
+-
+-# Target to prepare building external modules
+-PHONY += modules_prepare
+-modules_prepare: prepare scripts
+-
+-# Target to install modules
+-PHONY += modules_install
+-modules_install: _modinst_ _modinst_post
+-
+-PHONY += _modinst_
+-_modinst_:
+- @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \
+- echo "Warning: you may need to install module-init-tools"; \
+- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\
+- sleep 1; \
+- fi
+- @rm -rf $(MODLIB)/kernel
+- @rm -f $(MODLIB)/source
+- @mkdir -p $(MODLIB)/kernel
+- @ln -s $(srctree) $(MODLIB)/source
+- @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \
+- rm -f $(MODLIB)/build ; \
+- ln -s $(objtree) $(MODLIB)/build ; \
+- fi
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
+-
+-# If System.map exists, run depmod. This deliberately does not have a
+-# dependency on System.map since that would run the dependency tree on
+-# vmlinux. This depmod is only for convenience to give the initial
+-# boot a modules.dep even before / is mounted read-write. However the
+-# boot script depmod is the master version.
+-ifeq "$(strip $(INSTALL_MOD_PATH))" ""
+-depmod_opts :=
+-else
+-depmod_opts := -b $(INSTALL_MOD_PATH) -r
+-endif
+-PHONY += _modinst_post
+-_modinst_post: _modinst_
+- if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
+-
+-else # CONFIG_MODULES
+-
+-# Modules not configured
+-# ---------------------------------------------------------------------------
+-
+-modules modules_install: FORCE
+- @echo
+- @echo "The present kernel configuration has modules disabled."
+- @echo "Type 'make config' and enable loadable module support."
+- @echo "Then build a kernel with module support enabled."
+- @echo
+- @exit 1
+-
+-endif # CONFIG_MODULES
+-
+-###
+-# Cleaning is done on three levels.
+-# make clean Delete most generated files
+-# Leave enough to build external modules
+-# make mrproper Delete the current configuration, and all generated files
+-# make distclean Remove editor backup files, patch leftover files and the like
+-
+-# Directories & files removed with 'make clean'
+-CLEAN_DIRS += $(MODVERDIR)
+-CLEAN_FILES += vmlinux System.map \
+- .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map
+-
+-# Directories & files removed with 'make mrproper'
+-MRPROPER_DIRS += include/config include2 usr/include
+-MRPROPER_FILES += .config .config.old include/asm .version .old_version \
+- include/linux/autoconf.h include/linux/version.h \
+- include/linux/utsrelease.h \
+- Module.symvers tags TAGS cscope*
+-
+-# clean - Delete most, but leave enough to build external modules
+-#
+-clean: rm-dirs := $(CLEAN_DIRS)
+-clean: rm-files := $(CLEAN_FILES)
+-clean-dirs := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs))
+-
+-PHONY += $(clean-dirs) clean archclean
+-$(clean-dirs):
+- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
+-
+-clean: archclean $(clean-dirs)
+- $(call cmd,rmdirs)
+- $(call cmd,rmfiles)
+- @find . $(RCS_FIND_IGNORE) \
+- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
+- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
+- -o -name '*.symtypes' \) \
+- -type f -print | xargs rm -f
+-
+-# mrproper - Delete all generated files, including .config
+-#
+-mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS))
+-mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
+-mrproper-dirs := $(addprefix _mrproper_,Documentation/DocBook scripts)
+-
+-PHONY += $(mrproper-dirs) mrproper archmrproper
+-$(mrproper-dirs):
+- $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
+-
+-mrproper: clean archmrproper $(mrproper-dirs)
+- $(call cmd,rmdirs)
+- $(call cmd,rmfiles)
+-
+-# distclean
+-#
+-PHONY += distclean
+-
+-distclean: mrproper
+- @find $(srctree) $(RCS_FIND_IGNORE) \
+- \( -name '*.orig' -o -name '*.rej' -o -name '*~' \
+- -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
+- -o -name '.*.rej' -o -size 0 \
+- -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \
+- -type f -print | xargs rm -f
+-
+-
+-# Packaging of the kernel to various formats
+-# ---------------------------------------------------------------------------
+-# rpm target kept for backward compatibility
+-package-dir := $(srctree)/scripts/package
+-
+-%pkg: include/config/kernel.release FORCE
+- $(Q)$(MAKE) $(build)=$(package-dir) $@
+-rpm: include/config/kernel.release FORCE
+- $(Q)$(MAKE) $(build)=$(package-dir) $@
+-
+-
+-# Brief documentation of the typical targets used
+-# ---------------------------------------------------------------------------
+-
+-boards := $(wildcard $(srctree)/arch/$(ARCH)/configs/*_defconfig)
+-boards := $(notdir $(boards))
+-
+-help:
+- @echo 'Cleaning targets:'
+- @echo ' clean - Remove most generated files but keep the config and'
+- @echo ' enough build support to build external modules'
+- @echo ' mrproper - Remove all generated files + config + various backup files'
+- @echo ' distclean - mrproper + remove editor backup and patch files'
+- @echo ''
+- @echo 'Configuration targets:'
+- @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help
+- @echo ''
+- @echo 'Other generic targets:'
+- @echo ' all - Build all targets marked with [*]'
+- @echo '* vmlinux - Build the bare kernel'
+- @echo '* modules - Build all modules'
+- @echo ' modules_install - Install all modules to INSTALL_MOD_PATH (default: /)'
+- @echo ' dir/ - Build all files in dir and below'
+- @echo ' dir/file.[ois] - Build specified target only'
+- @echo ' dir/file.ko - Build module including final link'
+- @echo ' rpm - Build a kernel as an RPM package'
+- @echo ' tags/TAGS - Generate tags file for editors'
+- @echo ' cscope - Generate cscope index'
+- @echo ' kernelrelease - Output the release version string'
+- @echo ' kernelversion - Output the version stored in Makefile'
+- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
+- echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \
+- echo ' (default: $(INSTALL_HDR_PATH))'; \
+- fi
+- @echo ''
+- @echo 'Static analysers'
+- @echo ' checkstack - Generate a list of stack hogs'
+- @echo ' namespacecheck - Name space analysis on compiled kernel'
+- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \
+- echo ' headers_check - Sanity check on exported headers'; \
+- fi
+- @echo ''
+- @echo 'Kernel packaging:'
+- @$(MAKE) $(build)=$(package-dir) help
+- @echo ''
+- @echo 'Documentation targets:'
+- @$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp
+- @echo ''
+- @echo 'Architecture specific targets ($(ARCH)):'
+- @$(if $(archhelp),$(archhelp),\
+- echo ' No architecture specific help defined for $(ARCH)')
+- @echo ''
+- @$(if $(boards), \
+- $(foreach b, $(boards), \
+- printf " %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \
+- echo '')
+-
+- @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
+- @echo ' make V=2 [targets] 2 => give reason for rebuild of target'
+- @echo ' make O=dir [targets] Locate all output files in "dir", including .config'
+- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
+- @echo ' make C=2 [targets] Force check of all c source with $$CHECK'
+- @echo ''
+- @echo 'Execute "make" or "make all" to build all targets marked with [*] '
+- @echo 'For further info see the ./README file'
+-
+-
+-# Documentation targets
+-# ---------------------------------------------------------------------------
+-%docs: scripts_basic FORCE
+- $(Q)$(MAKE) $(build)=Documentation/DocBook $@
+-
+-else # KBUILD_EXTMOD
+-
+-###
+-# External module support.
+-# When building external modules the kernel used as basis is considered
+-# read-only, and no consistency checks are made and the make
+-# system is not used on the basis kernel. If updates are required
+-# in the basis kernel ordinary make commands (without M=...) must
+-# be used.
+-#
+-# The following are the only valid targets when building external
+-# modules.
+-# make M=dir clean Delete all automatically generated files
+-# make M=dir modules Make all modules in specified dir
+-# make M=dir Same as 'make M=dir modules'
+-# make M=dir modules_install
+-# Install the modules built in the module directory
+-# Assumes install directory is already created
+-
+-# We are always building modules
+-KBUILD_MODULES := 1
+-PHONY += crmodverdir
+-crmodverdir:
+- $(Q)mkdir -p $(MODVERDIR)
+- $(Q)rm -f $(MODVERDIR)/*
+-
+-PHONY += $(objtree)/Module.symvers
+-$(objtree)/Module.symvers:
+- @test -e $(objtree)/Module.symvers || ( \
+- echo; \
+- echo " WARNING: Symbol version dump $(objtree)/Module.symvers"; \
+- echo " is missing; modules will have no dependencies and modversions."; \
+- echo )
+-
+-module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
+-PHONY += $(module-dirs) modules
+-$(module-dirs): crmodverdir $(objtree)/Module.symvers
+- $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
+-
+-modules: $(module-dirs)
+- @echo ' Building modules, stage 2.';
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+-
+-PHONY += modules_install
+-modules_install: _emodinst_ _emodinst_post
+-
+-install-dir := $(if $(INSTALL_MOD_DIR),$(INSTALL_MOD_DIR),extra)
+-PHONY += _emodinst_
+-_emodinst_:
+- $(Q)mkdir -p $(MODLIB)/$(install-dir)
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
+-
+-# Run depmod only is we have System.map and depmod is executable
+-quiet_cmd_depmod = DEPMOD $(KERNELRELEASE)
+- cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \
+- $(DEPMOD) -ae -F System.map \
+- $(if $(strip $(INSTALL_MOD_PATH)), \
+- -b $(INSTALL_MOD_PATH) -r) \
+- $(KERNELRELEASE); \
+- fi
+-
+-PHONY += _emodinst_post
+-_emodinst_post: _emodinst_
+- $(call cmd,depmod)
+-
+-clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD))
+-
+-PHONY += $(clean-dirs) clean
+-$(clean-dirs):
+- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
+-
+-clean: rm-dirs := $(MODVERDIR)
+-clean: $(clean-dirs)
+- $(call cmd,rmdirs)
+- @find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \
+- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
+- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \
+- -type f -print | xargs rm -f
+-
+-help:
+- @echo ' Building external modules.'
+- @echo ' Syntax: make -C path/to/kernel/src M=$$PWD target'
+- @echo ''
+- @echo ' modules - default target, build the module(s)'
+- @echo ' modules_install - install the module'
+- @echo ' clean - remove generated files in module directory only'
+- @echo ''
+-
+-# Dummies...
+-PHONY += prepare scripts
+-prepare: ;
+-scripts: ;
+-endif # KBUILD_EXTMOD
+-
+-# Generate tags for editors
+-# ---------------------------------------------------------------------------
+-
+-#We want __srctree to totally vanish out when KBUILD_OUTPUT is not set
+-#(which is the most common case IMHO) to avoid unneeded clutter in the big tags file.
+-#Adding $(srctree) adds about 20M on i386 to the size of the output file!
+-
+-ifeq ($(src),$(obj))
+-__srctree =
+-else
+-__srctree = $(srctree)/
+-endif
+-
+-ifeq ($(ALLSOURCE_ARCHS),)
+-ifeq ($(ARCH),um)
+-ALLINCLUDE_ARCHS := $(ARCH) $(SUBARCH)
+-else
+-ALLINCLUDE_ARCHS := $(ARCH)
+-endif
+-else
+-#Allow user to specify only ALLSOURCE_PATHS on the command line, keeping existing behavour.
+-ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS)
+-endif
+-
+-ALLSOURCE_ARCHS := $(ARCH)
+-
+-define find-sources
+- ( for ARCH in $(ALLSOURCE_ARCHS) ; do \
+- find $(__srctree)arch/$${ARCH} $(RCS_FIND_IGNORE) \
+- -name $1 -print; \
+- done ; \
+- find $(__srctree)security/selinux/include $(RCS_FIND_IGNORE) \
+- -name $1 -print; \
+- find $(__srctree)include $(RCS_FIND_IGNORE) \
+- \( -name config -o -name 'asm-*' \) -prune \
+- -o -name $1 -print; \
+- for ARCH in $(ALLINCLUDE_ARCHS) ; do \
+- find $(__srctree)include/asm-$${ARCH} $(RCS_FIND_IGNORE) \
+- -name $1 -print; \
+- done ; \
+- find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \
+- -name $1 -print; \
+- find $(__srctree) $(RCS_FIND_IGNORE) \
+- \( -name include -o -name arch \) -prune -o \
+- -name $1 -print; \
+- )
+-endef
+-
+-define all-sources
+- $(call find-sources,'*.[chS]')
+-endef
+-define all-kconfigs
+- $(call find-sources,'Kconfig*')
+-endef
+-define all-defconfigs
+- $(call find-sources,'defconfig')
+-endef
+-
+-define xtags
+- if $1 --version 2>&1 | grep -iq exuberant; then \
+- $(all-sources) | xargs $1 -a \
+- -I __initdata,__exitdata,__acquires,__releases \
+- -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
+- --extra=+f --c-kinds=+px \
+- --regex-asm='/ENTRY\(([^)]*)\).*/\1/'; \
+- $(all-kconfigs) | xargs $1 -a \
+- --langdef=kconfig \
+- --language-force=kconfig \
+- --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \
+- $(all-defconfigs) | xargs -r $1 -a \
+- --langdef=dotconfig \
+- --language-force=dotconfig \
+- --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'; \
+- elif $1 --version 2>&1 | grep -iq emacs; then \
+- $(all-sources) | xargs $1 -a; \
+- $(all-kconfigs) | xargs $1 -a \
+- --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \
+- $(all-defconfigs) | xargs -r $1 -a \
+- --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \
+- else \
+- $(all-sources) | xargs $1 -a; \
+- fi
+-endef
+-
+-quiet_cmd_cscope-file = FILELST cscope.files
+- cmd_cscope-file = (echo \-k; echo \-q; $(all-sources)) > cscope.files
+-
+-quiet_cmd_cscope = MAKE cscope.out
+- cmd_cscope = cscope -b
+-
+-cscope: FORCE
+- $(call cmd,cscope-file)
+- $(call cmd,cscope)
+-
+-quiet_cmd_TAGS = MAKE $@
+-define cmd_TAGS
+- rm -f $@; \
+- $(call xtags,etags)
+-endef
+-
+-TAGS: FORCE
+- $(call cmd,TAGS)
+-
+-quiet_cmd_tags = MAKE $@
+-define cmd_tags
+- rm -f $@; \
+- $(call xtags,ctags)
+-endef
+-
+-tags: FORCE
+- $(call cmd,tags)
+-
+-
+-# Scripts to check various things for consistency
+-# ---------------------------------------------------------------------------
+-
+-includecheck:
+- find * $(RCS_FIND_IGNORE) \
+- -name '*.[hcS]' -type f -print | sort \
+- | xargs $(PERL) -w scripts/checkincludes.pl
+-
+-versioncheck:
+- find * $(RCS_FIND_IGNORE) \
+- -name '*.[hcS]' -type f -print | sort \
+- | xargs $(PERL) -w scripts/checkversion.pl
+-
+-namespacecheck:
+- $(PERL) $(srctree)/scripts/namespace.pl
+-
+-endif #ifeq ($(config-targets),1)
+-endif #ifeq ($(mixed-targets),1)
+-
+-PHONY += checkstack kernelrelease kernelversion
+-
+-# UML needs a little special treatment here. It wants to use the host
+-# toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone
+-# else wants $(ARCH), including people doing cross-builds, which means
+-# that $(SUBARCH) doesn't work here.
+-ifeq ($(ARCH), um)
+-CHECKSTACK_ARCH := $(SUBARCH)
+-else
+-CHECKSTACK_ARCH := $(ARCH)
+-endif
+-checkstack:
+- $(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
+- $(PERL) $(src)/scripts/checkstack.pl $(CHECKSTACK_ARCH)
+-
+-kernelrelease:
+- $(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \
+- $(error kernelrelease not valid - run 'make prepare' to update it))
+-kernelversion:
+- @echo $(KERNELVERSION)
+-
+-# Single targets
+-# ---------------------------------------------------------------------------
+-# Single targets are compatible with:
+-# - build whith mixed source and output
+-# - build with separate output dir 'make O=...'
+-# - external modules
+-#
+-# target-dir => where to store outputfile
+-# build-dir => directory in kernel source tree to use
+-
+-ifeq ($(KBUILD_EXTMOD),)
+- build-dir = $(patsubst %/,%,$(dir $@))
+- target-dir = $(dir $@)
+-else
+- zap-slash=$(filter-out .,$(patsubst %/,%,$(dir $@)))
+- build-dir = $(KBUILD_EXTMOD)$(if $(zap-slash),/$(zap-slash))
+- target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@))
+-endif
+-
+-%.s: %.c prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.i: %.c prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.o: %.c prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.lst: %.c prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.s: %.S prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.o: %.S prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-%.symtypes: %.c prepare scripts FORCE
+- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
+-
+-# Modules
+-/ %/: prepare scripts FORCE
+- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
+- $(build)=$(build-dir)
+-%.ko: prepare scripts FORCE
+- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
+- $(build)=$(build-dir) $(@:.ko=.o)
+- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+-
+-# FIXME Should go into a make.lib or something
+-# ===========================================================================
+-
+-quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs)))
+- cmd_rmdirs = rm -rf $(rm-dirs)
+-
+-quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)))
+- cmd_rmfiles = rm -f $(rm-files)
+-
+-
+-a_flags = -Wp,-MD,$(depfile) $(AFLAGS) $(AFLAGS_KERNEL) \
+- $(NOSTDINC_FLAGS) $(CPPFLAGS) \
+- $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o)
+-
+-quiet_cmd_as_o_S = AS $@
+-cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+-
+-# read all saved command lines
+-
+-targets := $(wildcard $(sort $(targets)))
+-cmd_files := $(wildcard .*.cmd $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+-
+-ifneq ($(cmd_files),)
+- $(cmd_files): ; # Do not try to update included dependency files
+- include $(cmd_files)
+-endif
+-
+-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir
+-# Usage:
+-# $(Q)$(MAKE) $(clean)=dir
+-clean := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.clean obj
+-
+-endif # skip-makefile
+-
+-PHONY += FORCE
+-FORCE:
+-
+-# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes.
+-Makefile: ;
+-
+-# Declare the contents of the .PHONY variable as phony. We keep that
+-# information in a variable se we can use it in if_changed and friends.
+-.PHONY: $(PHONY)
+diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-591/arch/arm/Kconfig
+--- linux-2.6.22-570/arch/arm/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/arm/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -1034,6 +1034,8 @@
+
+ source "drivers/rtc/Kconfig"
+
++source "drivers/dma/Kconfig"
++
+ endmenu
+
+ source "fs/Kconfig"
+diff -Nurb linux-2.6.22-570/arch/arm/boot/.gitignore.rej linux-2.6.22-591/arch/arm/boot/.gitignore.rej
+--- linux-2.6.22-570/arch/arm/boot/.gitignore.rej 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/arm/boot/.gitignore.rej 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,10 @@
++***************
++*** 1,2 ****
++ Image
++ zImage
++--- 1,5 ----
++ Image
++ zImage
+++ xipImage
+++ bootpImage
+++ uImage
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/Makefile linux-2.6.22-591/arch/arm/kernel/Makefile
+--- linux-2.6.22-570/arch/arm/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -20,6 +20,7 @@
+ obj-$(CONFIG_SMP) += smp.o
+ obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+ obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o
++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o
+
+ obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o
+ AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,32 @@
++/*
++ * arch/arm/kernel/kgdb-jmp.S
++ *
++ * Trivial setjmp and longjmp procedures to support bus error recovery
++ * which may occur during kgdb memory read/write operations.
++ *
++ * Author: MontaVista Software, Inc. <source@mvista.com>
++ * source@mvista.com
++ *
++ * 2002-2005 (c) MontaVista Software, Inc. This file is licensed under the
++ * terms of the GNU General Public License version 2. This program as licensed
++ * "as is" without any warranty of any kind, whether express or implied.
++ */
++#include <linux/linkage.h>
++
++ENTRY (kgdb_fault_setjmp)
++ /* Save registers */
++ stmia r0, {r0-r14}
++ str lr,[r0, #60]
++ mrs r1,cpsr
++ str r1,[r0,#64]
++ ldr r1,[r0,#4]
++ mov r0, #0
++ mov pc,lr
++
++ENTRY (kgdb_fault_longjmp)
++ /* Restore registers */
++ mov r1,#1
++ str r1,[r0]
++ ldr r1,[r0, #64]
++ msr spsr,r1
++ ldmia r0,{r0-pc}^
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb.c linux-2.6.22-591/arch/arm/kernel/kgdb.c
+--- linux-2.6.22-570/arch/arm/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/arm/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,202 @@
++/*
++ * arch/arm/kernel/kgdb.c
++ *
++ * ARM KGDB support
++ *
++ * Copyright (c) 2002-2004 MontaVista Software, Inc
++ *
++ * Authors: George Davis <davis_g@mvista.com>
++ * Deepak Saxena <dsaxena@plexity.net>
++ */
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/signal.h>
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/spinlock.h>
++#include <linux/personality.h>
++#include <linux/ptrace.h>
++#include <linux/elf.h>
++#include <linux/interrupt.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/atomic.h>
++#include <asm/io.h>
++#include <asm/pgtable.h>
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++#include <asm/ptrace.h>
++#include <asm/traps.h>
++
++/* Make a local copy of the registers passed into the handler (bletch) */
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
++{
++ int regno;
++
++ /* Initialize all to zero (??) */
++ for (regno = 0; regno < GDB_MAX_REGS; regno++)
++ gdb_regs[regno] = 0;
++
++ gdb_regs[_R0] = kernel_regs->ARM_r0;
++ gdb_regs[_R1] = kernel_regs->ARM_r1;
++ gdb_regs[_R2] = kernel_regs->ARM_r2;
++ gdb_regs[_R3] = kernel_regs->ARM_r3;
++ gdb_regs[_R4] = kernel_regs->ARM_r4;
++ gdb_regs[_R5] = kernel_regs->ARM_r5;
++ gdb_regs[_R6] = kernel_regs->ARM_r6;
++ gdb_regs[_R7] = kernel_regs->ARM_r7;
++ gdb_regs[_R8] = kernel_regs->ARM_r8;
++ gdb_regs[_R9] = kernel_regs->ARM_r9;
++ gdb_regs[_R10] = kernel_regs->ARM_r10;
++ gdb_regs[_FP] = kernel_regs->ARM_fp;
++ gdb_regs[_IP] = kernel_regs->ARM_ip;
++ gdb_regs[_SP] = kernel_regs->ARM_sp;
++ gdb_regs[_LR] = kernel_regs->ARM_lr;
++ gdb_regs[_PC] = kernel_regs->ARM_pc;
++ gdb_regs[_CPSR] = kernel_regs->ARM_cpsr;
++}
++
++/* Copy local gdb registers back to kgdb regs, for later copy to kernel */
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
++{
++ kernel_regs->ARM_r0 = gdb_regs[_R0];
++ kernel_regs->ARM_r1 = gdb_regs[_R1];
++ kernel_regs->ARM_r2 = gdb_regs[_R2];
++ kernel_regs->ARM_r3 = gdb_regs[_R3];
++ kernel_regs->ARM_r4 = gdb_regs[_R4];
++ kernel_regs->ARM_r5 = gdb_regs[_R5];
++ kernel_regs->ARM_r6 = gdb_regs[_R6];
++ kernel_regs->ARM_r7 = gdb_regs[_R7];
++ kernel_regs->ARM_r8 = gdb_regs[_R8];
++ kernel_regs->ARM_r9 = gdb_regs[_R9];
++ kernel_regs->ARM_r10 = gdb_regs[_R10];
++ kernel_regs->ARM_fp = gdb_regs[_FP];
++ kernel_regs->ARM_ip = gdb_regs[_IP];
++ kernel_regs->ARM_sp = gdb_regs[_SP];
++ kernel_regs->ARM_lr = gdb_regs[_LR];
++ kernel_regs->ARM_pc = gdb_regs[_PC];
++ kernel_regs->ARM_cpsr = gdb_regs[_CPSR];
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
++ struct task_struct *task)
++{
++ int regno;
++ struct pt_regs *thread_regs;
++
++ /* Just making sure... */
++ if (task == NULL)
++ return;
++
++ /* Initialize to zero */
++ for (regno = 0; regno < GDB_MAX_REGS; regno++)
++ gdb_regs[regno] = 0;
++
++ /* Otherwise, we have only some registers from switch_to() */
++ thread_regs = task_pt_regs(task);
++ gdb_regs[_R0] = thread_regs->ARM_r0; /* Not really valid? */
++ gdb_regs[_R1] = thread_regs->ARM_r1; /* " " */
++ gdb_regs[_R2] = thread_regs->ARM_r2; /* " " */
++ gdb_regs[_R3] = thread_regs->ARM_r3; /* " " */
++ gdb_regs[_R4] = thread_regs->ARM_r4;
++ gdb_regs[_R5] = thread_regs->ARM_r5;
++ gdb_regs[_R6] = thread_regs->ARM_r6;
++ gdb_regs[_R7] = thread_regs->ARM_r7;
++ gdb_regs[_R8] = thread_regs->ARM_r8;
++ gdb_regs[_R9] = thread_regs->ARM_r9;
++ gdb_regs[_R10] = thread_regs->ARM_r10;
++ gdb_regs[_FP] = thread_regs->ARM_fp;
++ gdb_regs[_IP] = thread_regs->ARM_ip;
++ gdb_regs[_SP] = thread_regs->ARM_sp;
++ gdb_regs[_LR] = thread_regs->ARM_lr;
++ gdb_regs[_PC] = thread_regs->ARM_pc;
++ gdb_regs[_CPSR] = thread_regs->ARM_cpsr;
++}
++
++static int compiled_break;
++
++int kgdb_arch_handle_exception(int exception_vector, int signo,
++ int err_code, char *remcom_in_buffer,
++ char *remcom_out_buffer,
++ struct pt_regs *linux_regs)
++{
++ long addr;
++ char *ptr;
++
++ switch (remcom_in_buffer[0]) {
++ case 'D':
++ case 'k':
++ case 'c':
++ kgdb_contthread = NULL;
++
++ /*
++ * Try to read optional parameter, pc unchanged if no parm.
++ * If this was a compiled breakpoint, we need to move
++ * to the next instruction or we will just breakpoint
++ * over and over again.
++ */
++ ptr = &remcom_in_buffer[1];
++ if (kgdb_hex2long(&ptr, &addr)) {
++ linux_regs->ARM_pc = addr;
++ } else if (compiled_break == 1) {
++ linux_regs->ARM_pc += 4;
++ }
++
++ compiled_break = 0;
++
++ return 0;
++ }
++
++ return -1;
++}
++
++static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
++{
++ kgdb_handle_exception(1, SIGTRAP, 0, regs);
++
++ return 0;
++}
++
++static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
++{
++ compiled_break = 1;
++ kgdb_handle_exception(1, SIGTRAP, 0, regs);
++
++ return 0;
++}
++
++static struct undef_hook kgdb_brkpt_hook = {
++ .instr_mask = 0xffffffff,
++ .instr_val = KGDB_BREAKINST,
++ .fn = kgdb_brk_fn
++};
++
++static struct undef_hook kgdb_compiled_brkpt_hook = {
++ .instr_mask = 0xffffffff,
++ .instr_val = KGDB_COMPILED_BREAK,
++ .fn = kgdb_compiled_brk_fn
++};
++
++/*
++ * Register our undef instruction hooks with ARM undef core.
++ * We regsiter a hook specifically looking for the KGB break inst
++ * and we handle the normal undef case within the do_undefinstr
++ * handler.
++ */
++int kgdb_arch_init(void)
++{
++ register_undef_hook(&kgdb_brkpt_hook);
++ register_undef_hook(&kgdb_compiled_brkpt_hook);
++
++ return 0;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifndef __ARMEB__
++ .gdb_bpt_instr = {0xfe, 0xde, 0xff, 0xe7}
++#else
++ .gdb_bpt_instr = {0xe7, 0xff, 0xde, 0xfe}
++#endif
++};
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/setup.c linux-2.6.22-591/arch/arm/kernel/setup.c
+--- linux-2.6.22-570/arch/arm/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -832,6 +832,11 @@
+ conswitchp = &dummy_con;
+ #endif
+ #endif
++
++#if defined(CONFIG_KGDB)
++ extern void __init early_trap_init(void);
++ early_trap_init();
++#endif
+ }
+
+
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/traps.c linux-2.6.22-591/arch/arm/kernel/traps.c
+--- linux-2.6.22-570/arch/arm/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/arm/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -301,6 +301,7 @@
+ unsigned int instr;
+ struct undef_hook *hook;
+ siginfo_t info;
++ mm_segment_t fs;
+ void __user *pc;
+ unsigned long flags;
+
+@@ -311,6 +312,8 @@
+ */
+ regs->ARM_pc -= correction;
+
++ fs = get_fs();
++ set_fs(KERNEL_DS);
+ pc = (void __user *)instruction_pointer(regs);
+
+ if (processor_mode(regs) == SVC_MODE) {
+@@ -320,6 +323,7 @@
+ } else {
+ get_user(instr, (u32 __user *)pc);
+ }
++ set_fs(fs);
+
+ spin_lock_irqsave(&undef_lock, flags);
+ list_for_each_entry(hook, &undef_hook, node) {
+@@ -707,6 +711,13 @@
+
+ void __init trap_init(void)
+ {
++#if defined(CONFIG_KGDB)
++ return;
++}
++
++void __init early_trap_init(void)
++{
++#endif
+ unsigned long vectors = CONFIG_VECTORS_BASE;
+ extern char __stubs_start[], __stubs_end[];
+ extern char __vectors_start[], __vectors_end[];
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c
+--- linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <asm/hardware.h>
+ #include <asm/irq.h>
+ #include <asm/io.h>
++#include <asm/hardware/iop_adma.h>
+
+ #define IOP13XX_UART_XTAL 33334000
+ #define IOP13XX_SETUP_DEBUG 0
+@@ -236,19 +237,143 @@
+ }
+ #endif
+
++/* ADMA Channels */
++static struct resource iop13xx_adma_0_resources[] = {
++ [0] = {
++ .start = IOP13XX_ADMA_PHYS_BASE(0),
++ .end = IOP13XX_ADMA_UPPER_PA(0),
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_IOP13XX_ADMA0_EOT,
++ .end = IRQ_IOP13XX_ADMA0_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_IOP13XX_ADMA0_EOC,
++ .end = IRQ_IOP13XX_ADMA0_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_IOP13XX_ADMA0_ERR,
++ .end = IRQ_IOP13XX_ADMA0_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++static struct resource iop13xx_adma_1_resources[] = {
++ [0] = {
++ .start = IOP13XX_ADMA_PHYS_BASE(1),
++ .end = IOP13XX_ADMA_UPPER_PA(1),
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_IOP13XX_ADMA1_EOT,
++ .end = IRQ_IOP13XX_ADMA1_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_IOP13XX_ADMA1_EOC,
++ .end = IRQ_IOP13XX_ADMA1_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_IOP13XX_ADMA1_ERR,
++ .end = IRQ_IOP13XX_ADMA1_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++static struct resource iop13xx_adma_2_resources[] = {
++ [0] = {
++ .start = IOP13XX_ADMA_PHYS_BASE(2),
++ .end = IOP13XX_ADMA_UPPER_PA(2),
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_IOP13XX_ADMA2_EOT,
++ .end = IRQ_IOP13XX_ADMA2_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_IOP13XX_ADMA2_EOC,
++ .end = IRQ_IOP13XX_ADMA2_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_IOP13XX_ADMA2_ERR,
++ .end = IRQ_IOP13XX_ADMA2_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK;
++static struct iop_adma_platform_data iop13xx_adma_0_data = {
++ .hw_id = 0,
++ .pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop13xx_adma_1_data = {
++ .hw_id = 1,
++ .pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop13xx_adma_2_data = {
++ .hw_id = 2,
++ .pool_size = PAGE_SIZE,
++};
++
++/* The ids are fixed up later in iop13xx_platform_init */
++static struct platform_device iop13xx_adma_0_channel = {
++ .name = "iop-adma",
++ .id = 0,
++ .num_resources = 4,
++ .resource = iop13xx_adma_0_resources,
++ .dev = {
++ .dma_mask = &iop13xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop13xx_adma_0_data,
++ },
++};
++
++static struct platform_device iop13xx_adma_1_channel = {
++ .name = "iop-adma",
++ .id = 0,
++ .num_resources = 4,
++ .resource = iop13xx_adma_1_resources,
++ .dev = {
++ .dma_mask = &iop13xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop13xx_adma_1_data,
++ },
++};
++
++static struct platform_device iop13xx_adma_2_channel = {
++ .name = "iop-adma",
++ .id = 0,
++ .num_resources = 4,
++ .resource = iop13xx_adma_2_resources,
++ .dev = {
++ .dma_mask = &iop13xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop13xx_adma_2_data,
++ },
++};
++
+ void __init iop13xx_map_io(void)
+ {
+ /* Initialize the Static Page Table maps */
+ iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc));
+ }
+
+-static int init_uart = 0;
+-static int init_i2c = 0;
++static int init_uart;
++static int init_i2c;
++static int init_adma;
+
+ void __init iop13xx_platform_init(void)
+ {
+ int i;
+- u32 uart_idx, i2c_idx, plat_idx;
++ u32 uart_idx, i2c_idx, adma_idx, plat_idx;
+ struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES];
+
+ /* set the bases so we can read the device id */
+@@ -294,6 +419,12 @@
+ }
+ }
+
++ if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) {
++ init_adma |= IOP13XX_INIT_ADMA_0;
++ init_adma |= IOP13XX_INIT_ADMA_1;
++ init_adma |= IOP13XX_INIT_ADMA_2;
++ }
++
+ plat_idx = 0;
+ uart_idx = 0;
+ i2c_idx = 0;
+@@ -332,6 +463,56 @@
+ }
+ }
+
++ /* initialize adma channel ids and capabilities */
++ adma_idx = 0;
++ for (i = 0; i < IQ81340_NUM_ADMA; i++) {
++ struct iop_adma_platform_data *plat_data;
++ if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG)
++ printk(KERN_INFO
++ "Adding adma%d to platform device list\n", i);
++ switch (init_adma & (1 << i)) {
++ case IOP13XX_INIT_ADMA_0:
++ iop13xx_adma_0_channel.id = adma_idx++;
++ iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel;
++ plat_data = &iop13xx_adma_0_data;
++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++ dma_cap_set(DMA_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++ break;
++ case IOP13XX_INIT_ADMA_1:
++ iop13xx_adma_1_channel.id = adma_idx++;
++ iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel;
++ plat_data = &iop13xx_adma_1_data;
++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++ dma_cap_set(DMA_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++ break;
++ case IOP13XX_INIT_ADMA_2:
++ iop13xx_adma_2_channel.id = adma_idx++;
++ iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel;
++ plat_data = &iop13xx_adma_2_data;
++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++ dma_cap_set(DMA_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++ dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
++ dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
++ dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
++ break;
++ }
++ }
++
+ #ifdef CONFIG_MTD_PHYSMAP
+ iq8134x_flash_resource.end = iq8134x_flash_resource.start +
+ iq8134x_probe_flash_size() - 1;
+@@ -399,5 +580,35 @@
+ return 1;
+ }
+
++static int __init iop13xx_init_adma_setup(char *str)
++{
++ if (str) {
++ while (*str != '\0') {
++ switch (*str) {
++ case '0':
++ init_adma |= IOP13XX_INIT_ADMA_0;
++ break;
++ case '1':
++ init_adma |= IOP13XX_INIT_ADMA_1;
++ break;
++ case '2':
++ init_adma |= IOP13XX_INIT_ADMA_2;
++ break;
++ case ',':
++ case '=':
++ break;
++ default:
++ PRINTK("\"iop13xx_init_adma\" malformed"
++ " at character: \'%c\'", *str);
++ *(str + 1) = '\0';
++ init_adma = IOP13XX_INIT_ADMA_DEFAULT;
++ }
++ str++;
++ }
++ }
++ return 1;
++}
++
++__setup("iop13xx_init_adma", iop13xx_init_adma_setup);
+ __setup("iop13xx_init_uart", iop13xx_init_uart_setup);
+ __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup);
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c 2007-12-21 15:36:11.000000000 -0500
+@@ -180,6 +180,8 @@
+ platform_device_register(&iop3xx_i2c1_device);
+ platform_device_register(&glantank_flash_device);
+ platform_device_register(&glantank_serial_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
+
+ pm_power_off = glantank_power_off;
+ }
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c 2007-12-21 15:36:11.000000000 -0500
+@@ -298,9 +298,14 @@
+ platform_device_register(&iop3xx_i2c1_device);
+ platform_device_register(&iq31244_flash_device);
+ platform_device_register(&iq31244_serial_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
+
+ if (is_ep80219())
+ pm_power_off = ep80219_power_off;
++
++ if (!is_80219())
++ platform_device_register(&iop3xx_aau_channel);
+ }
+
+ static int __init force_ep80219_setup(char *str)
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c 2007-12-21 15:36:11.000000000 -0500
+@@ -181,6 +181,9 @@
+ platform_device_register(&iop3xx_i2c1_device);
+ platform_device_register(&iq80321_flash_device);
+ platform_device_register(&iq80321_serial_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
++ platform_device_register(&iop3xx_aau_channel);
+ }
+
+ MACHINE_START(IQ80321, "Intel IQ80321")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c 2007-12-21 15:36:11.000000000 -0500
+@@ -245,6 +245,8 @@
+ platform_device_register(&iop3xx_i2c0_device);
+ platform_device_register(&n2100_flash_device);
+ platform_device_register(&n2100_serial_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
+
+ pm_power_off = n2100_power_off;
+
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c
+--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c 2007-12-21 15:36:11.000000000 -0500
+@@ -136,6 +136,9 @@
+ platform_device_register(&iop33x_uart0_device);
+ platform_device_register(&iop33x_uart1_device);
+ platform_device_register(&iq80331_flash_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
++ platform_device_register(&iop3xx_aau_channel);
+ }
+
+ MACHINE_START(IQ80331, "Intel IQ80331")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c
+--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c 2007-12-21 15:36:11.000000000 -0500
+@@ -136,6 +136,9 @@
+ platform_device_register(&iop33x_uart0_device);
+ platform_device_register(&iop33x_uart1_device);
+ platform_device_register(&iq80332_flash_device);
++ platform_device_register(&iop3xx_dma_0_channel);
++ platform_device_register(&iop3xx_dma_1_channel);
++ platform_device_register(&iop3xx_aau_channel);
+ }
+
+ MACHINE_START(IQ80332, "Intel IQ80332")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/core.c linux-2.6.22-591/arch/arm/mach-ixp2000/core.c
+--- linux-2.6.22-570/arch/arm/mach-ixp2000/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-ixp2000/core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,6 +34,7 @@
+ #include <asm/system.h>
+ #include <asm/tlbflush.h>
+ #include <asm/pgtable.h>
++#include <asm/kgdb.h>
+
+ #include <asm/mach/map.h>
+ #include <asm/mach/time.h>
+@@ -184,6 +185,9 @@
+ void __init ixp2000_uart_init(void)
+ {
+ platform_device_register(&ixp2000_serial_device);
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(0, &ixp2000_serial_port);
++#endif
+ }
+
+
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c
+--- linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c 2007-12-21 15:36:11.000000000 -0500
+@@ -38,6 +38,7 @@
+ #include <asm/system.h>
+ #include <asm/hardware.h>
+ #include <asm/mach-types.h>
++#include <asm/kgdb.h>
+
+ #include <asm/mach/pci.h>
+ #include <asm/mach/map.h>
+@@ -413,6 +414,11 @@
+ platform_add_devices(ixdp2x01_devices, ARRAY_SIZE(ixdp2x01_devices));
+ ixp2000_uart_init();
+ ixdp2x01_uart_init();
++
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(0, ixdp2x01_serial_port1);
++ kgdb8250_add_platform_port(1, ixdp2x01_serial_port1);
++#endif
+ }
+
+
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c
+--- linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -96,6 +96,10 @@
+ }
+
+ platform_add_devices(coyote_devices, ARRAY_SIZE(coyote_devices));
++
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(0, &coyote_uart_data);
++#endif
+ }
+
+ #ifdef CONFIG_ARCH_ADI_COYOTE
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c
+--- linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -76,7 +76,8 @@
+ .mapbase = IXP4XX_UART1_BASE_PHYS,
+ .membase = (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET,
+ .irq = IRQ_IXP4XX_UART1,
+- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
++ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
++ UPF_SHARE_IRQ,
+ .iotype = UPIO_MEM,
+ .regshift = 2,
+ .uartclk = IXP4XX_UART_XTAL,
+@@ -85,7 +86,8 @@
+ .mapbase = IXP4XX_UART2_BASE_PHYS,
+ .membase = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
+ .irq = IRQ_IXP4XX_UART2,
+- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
++ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
++ UPF_SHARE_IRQ,
+ .iotype = UPIO_MEM,
+ .regshift = 2,
+ .uartclk = IXP4XX_UART_XTAL,
+@@ -123,12 +125,22 @@
+ platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices));
+ }
+
++static void __init ixdp425_map_io(void)
++{
++ ixp4xx_map_io();
++
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(0, &ixdp425_uart_data[0]);
++ kgdb8250_add_platform_port(1, &ixdp425_uart_data[1]);
++#endif
++}
++
+ #ifdef CONFIG_ARCH_IXDP425
+ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
+ /* Maintainer: MontaVista Software, Inc. */
+ .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS,
+ .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc,
+- .map_io = ixp4xx_map_io,
++ .map_io = ixdp425_map_io,
+ .init_irq = ixp4xx_init_irq,
+ .timer = &ixp4xx_timer,
+ .boot_params = 0x0100,
+@@ -141,7 +153,7 @@
+ /* Maintainer: MontaVista Software, Inc. */
+ .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS,
+ .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc,
+- .map_io = ixp4xx_map_io,
++ .map_io = ixdp425_map_io,
+ .init_irq = ixp4xx_init_irq,
+ .timer = &ixp4xx_timer,
+ .boot_params = 0x0100,
+diff -Nurb linux-2.6.22-570/arch/arm/mach-omap1/serial.c linux-2.6.22-591/arch/arm/mach-omap1/serial.c
+--- linux-2.6.22-570/arch/arm/mach-omap1/serial.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-omap1/serial.c 2007-12-21 15:36:11.000000000 -0500
+@@ -15,6 +15,7 @@
+ #include <linux/delay.h>
+ #include <linux/serial.h>
+ #include <linux/tty.h>
++#include <linux/kgdb.h>
+ #include <linux/serial_8250.h>
+ #include <linux/serial_reg.h>
+ #include <linux/clk.h>
+@@ -199,6 +200,9 @@
+ break;
+ }
+ omap_serial_reset(&serial_platform_data[i]);
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(i, &serial_platform_data[i]);
++#endif
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pnx4008/core.c linux-2.6.22-591/arch/arm/mach-pnx4008/core.c
+--- linux-2.6.22-570/arch/arm/mach-pnx4008/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-pnx4008/core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -224,6 +224,10 @@
+ spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+ /* Switch on the UART clocks */
+ pnx4008_uart_init();
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(0, &platform_serial_ports[0]);
++ kgdb8250_add_platform_port(1, &platform_serial_ports[1]);
++#endif
+ }
+
+ static struct map_desc pnx4008_io_desc[] __initdata = {
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/Makefile linux-2.6.22-591/arch/arm/mach-pxa/Makefile
+--- linux-2.6.22-570/arch/arm/mach-pxa/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-pxa/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -31,6 +31,7 @@
+ # Misc features
+ obj-$(CONFIG_PM) += pm.o sleep.o
+ obj-$(CONFIG_PXA_SSP) += ssp.o
++obj-$(CONFIG_KGDB_PXA_SERIAL) += kgdb-serial.o
+
+ ifeq ($(CONFIG_PXA27x),y)
+ obj-$(CONFIG_PM) += standby.o
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c
+--- linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,97 @@
++/*
++ * linux/arch/arm/mach-pxa/kgdb-serial.c
++ *
++ * Provides low level kgdb serial support hooks for PXA2xx boards
++ *
++ * Author: Nicolas Pitre
++ * Copyright: (C) 2002-2005 MontaVista Software Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/serial_reg.h>
++#include <linux/kgdb.h>
++#include <asm/processor.h>
++#include <asm/hardware.h>
++#include <asm/arch/pxa-regs.h>
++
++#if defined(CONFIG_KGDB_PXA_FFUART)
++
++#define UART FFUART
++#define CKEN_UART CKEN6_FFUART
++#define GPIO_RX_MD GPIO34_FFRXD_MD
++#define GPIO_TX_MD GPIO39_FFTXD_MD
++
++#elif defined(CONFIG_KGDB_PXA_BTUART)
++
++#define UART BTUART
++#define CKEN_UART CKEN7_BTUART
++#define GPIO_RX_MD GPIO42_BTRXD_MD
++#define GPIO_TX_MD GPIO43_BTTXD_MD
++
++#elif defined(CONFIG_KGDB_PXA_STUART)
++
++#define UART STUART
++#define CKEN_UART CKEN5_STUART
++#define GPIO_RX_MD GPIO46_STRXD_MD
++#define GPIO_TX_MD GPIO47_STTXD_MD
++
++#endif
++
++#define UART_BAUDRATE (CONFIG_KGDB_BAUDRATE)
++
++static volatile unsigned long *port = (unsigned long *)&UART;
++
++static int kgdb_serial_init(void)
++{
++ pxa_set_cken(CKEN_UART, 1);
++ pxa_gpio_mode(GPIO_RX_MD);
++ pxa_gpio_mode(GPIO_TX_MD);
++
++ port[UART_IER] = 0;
++ port[UART_LCR] = LCR_DLAB;
++ port[UART_DLL] = ((921600 / UART_BAUDRATE) & 0xff);
++ port[UART_DLM] = ((921600 / UART_BAUDRATE) >> 8);
++ port[UART_LCR] = LCR_WLS1 | LCR_WLS0;
++ port[UART_MCR] = 0;
++ port[UART_IER] = IER_UUE;
++ port[UART_FCR] = FCR_ITL_16;
++
++ return 0;
++}
++
++static void kgdb_serial_putchar(u8 c)
++{
++ if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE)
++ kgdb_serial_init();
++ while (!(port[UART_LSR] & LSR_TDRQ))
++ cpu_relax();
++ port[UART_TX] = c;
++}
++
++static void kgdb_serial_flush(void)
++{
++ if ((CKEN & CKEN_UART) && (port[UART_IER] & IER_UUE))
++ while (!(port[UART_LSR] & LSR_TEMT))
++ cpu_relax();
++}
++
++static int kgdb_serial_getchar(void)
++{
++ unsigned char c;
++ if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE)
++ kgdb_serial_init();
++ while (!(port[UART_LSR] & UART_LSR_DR))
++ cpu_relax();
++ c = port[UART_RX];
++ return c;
++}
++
++struct kgdb_io kgdb_io_ops = {
++ .init = kgdb_serial_init,
++ .write_char = kgdb_serial_putchar,
++ .flush = kgdb_serial_flush,
++ .read_char = kgdb_serial_getchar,
++};
+diff -Nurb linux-2.6.22-570/arch/arm/mach-versatile/core.c linux-2.6.22-591/arch/arm/mach-versatile/core.c
+--- linux-2.6.22-570/arch/arm/mach-versatile/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mach-versatile/core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -184,6 +184,14 @@
+ .type = MT_DEVICE
+ },
+ #endif
++#ifdef CONFIG_KGDB_AMBA_PL011
++ {
++ .virtual = IO_ADDRESS(CONFIG_KGDB_AMBA_BASE),
++ .pfn = __phys_to_pfn(CONFIG_KGDB_AMBA_BASE),
++ .length = SZ_4K,
++ .type = MT_DEVICE
++ },
++#endif
+ #ifdef CONFIG_PCI
+ {
+ .virtual = IO_ADDRESS(VERSATILE_PCI_CORE_BASE),
+diff -Nurb linux-2.6.22-570/arch/arm/mm/extable.c linux-2.6.22-591/arch/arm/mm/extable.c
+--- linux-2.6.22-570/arch/arm/mm/extable.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/mm/extable.c 2007-12-21 15:36:11.000000000 -0500
+@@ -2,6 +2,7 @@
+ * linux/arch/arm/mm/extable.c
+ */
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/uaccess.h>
+
+ int fixup_exception(struct pt_regs *regs)
+@@ -11,6 +12,12 @@
+ fixup = search_exception_tables(instruction_pointer(regs));
+ if (fixup)
+ regs->ARM_pc = fixup->fixup;
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault)
++ /* Restore our previous state. */
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Not reached. */
++#endif
+
+ return fixup != NULL;
+ }
+diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/Makefile linux-2.6.22-591/arch/arm/plat-iop/Makefile
+--- linux-2.6.22-570/arch/arm/plat-iop/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/arm/plat-iop/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -12,6 +12,7 @@
+ obj-$(CONFIG_ARCH_IOP32X) += time.o
+ obj-$(CONFIG_ARCH_IOP32X) += io.o
+ obj-$(CONFIG_ARCH_IOP32X) += cp6.o
++obj-$(CONFIG_ARCH_IOP32X) += adma.o
+
+ # IOP33X
+ obj-$(CONFIG_ARCH_IOP33X) += gpio.o
+@@ -21,6 +22,7 @@
+ obj-$(CONFIG_ARCH_IOP33X) += time.o
+ obj-$(CONFIG_ARCH_IOP33X) += io.o
+ obj-$(CONFIG_ARCH_IOP33X) += cp6.o
++obj-$(CONFIG_ARCH_IOP33X) += adma.o
+
+ # IOP13XX
+ obj-$(CONFIG_ARCH_IOP13XX) += cp6.o
+diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/adma.c linux-2.6.22-591/arch/arm/plat-iop/adma.c
+--- linux-2.6.22-570/arch/arm/plat-iop/adma.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/arm/plat-iop/adma.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,209 @@
++/*
++ * platform device definitions for the iop3xx dma/xor engines
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/platform_device.h>
++#include <asm/hardware/iop3xx.h>
++#include <linux/dma-mapping.h>
++#include <asm/arch/adma.h>
++#include <asm/hardware/iop_adma.h>
++
++#ifdef CONFIG_ARCH_IOP32X
++#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT
++#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC
++#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR
++
++#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT
++#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC
++#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR
++
++#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT
++#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC
++#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR
++#endif
++#ifdef CONFIG_ARCH_IOP33X
++#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT
++#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC
++#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR
++
++#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT
++#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC
++#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR
++
++#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT
++#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC
++#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR
++#endif
++/* AAU and DMA Channels */
++static struct resource iop3xx_dma_0_resources[] = {
++ [0] = {
++ .start = IOP3XX_DMA_PHYS_BASE(0),
++ .end = IOP3XX_DMA_UPPER_PA(0),
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_DMA0_EOT,
++ .end = IRQ_DMA0_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_DMA0_EOC,
++ .end = IRQ_DMA0_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_DMA0_ERR,
++ .end = IRQ_DMA0_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++static struct resource iop3xx_dma_1_resources[] = {
++ [0] = {
++ .start = IOP3XX_DMA_PHYS_BASE(1),
++ .end = IOP3XX_DMA_UPPER_PA(1),
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_DMA1_EOT,
++ .end = IRQ_DMA1_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_DMA1_EOC,
++ .end = IRQ_DMA1_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_DMA1_ERR,
++ .end = IRQ_DMA1_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++
++static struct resource iop3xx_aau_resources[] = {
++ [0] = {
++ .start = IOP3XX_AAU_PHYS_BASE,
++ .end = IOP3XX_AAU_UPPER_PA,
++ .flags = IORESOURCE_MEM,
++ },
++ [1] = {
++ .start = IRQ_AA_EOT,
++ .end = IRQ_AA_EOT,
++ .flags = IORESOURCE_IRQ
++ },
++ [2] = {
++ .start = IRQ_AA_EOC,
++ .end = IRQ_AA_EOC,
++ .flags = IORESOURCE_IRQ
++ },
++ [3] = {
++ .start = IRQ_AA_ERR,
++ .end = IRQ_AA_ERR,
++ .flags = IORESOURCE_IRQ
++ }
++};
++
++static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK;
++
++static struct iop_adma_platform_data iop3xx_dma_0_data = {
++ .hw_id = DMA0_ID,
++ .pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop3xx_dma_1_data = {
++ .hw_id = DMA1_ID,
++ .pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop3xx_aau_data = {
++ .hw_id = AAU_ID,
++ .pool_size = 3 * PAGE_SIZE,
++};
++
++struct platform_device iop3xx_dma_0_channel = {
++ .name = "iop-adma",
++ .id = 0,
++ .num_resources = 4,
++ .resource = iop3xx_dma_0_resources,
++ .dev = {
++ .dma_mask = &iop3xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop3xx_dma_0_data,
++ },
++};
++
++struct platform_device iop3xx_dma_1_channel = {
++ .name = "iop-adma",
++ .id = 1,
++ .num_resources = 4,
++ .resource = iop3xx_dma_1_resources,
++ .dev = {
++ .dma_mask = &iop3xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop3xx_dma_1_data,
++ },
++};
++
++struct platform_device iop3xx_aau_channel = {
++ .name = "iop-adma",
++ .id = 2,
++ .num_resources = 4,
++ .resource = iop3xx_aau_resources,
++ .dev = {
++ .dma_mask = &iop3xx_adma_dmamask,
++ .coherent_dma_mask = DMA_64BIT_MASK,
++ .platform_data = (void *) &iop3xx_aau_data,
++ },
++};
++
++static int __init iop3xx_adma_cap_init(void)
++{
++ #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
++ #else
++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
++ dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
++ #endif
++
++ #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
++ #else
++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
++ dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
++ #endif
++
++ #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */
++ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
++ dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
++ #else
++ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
++ dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
++ dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
++ dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
++ #endif
++
++ return 0;
++}
++
++arch_initcall(iop3xx_adma_cap_init);
+diff -Nurb linux-2.6.22-570/arch/i386/Kconfig linux-2.6.22-591/arch/i386/Kconfig
+--- linux-2.6.22-570/arch/i386/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/i386/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -1053,6 +1053,8 @@
+
+ source "arch/i386/kernel/cpu/cpufreq/Kconfig"
+
++source "drivers/cpuidle/Kconfig"
++
+ endmenu
+
+ menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/Makefile linux-2.6.22-591/arch/i386/kernel/Makefile
+--- linux-2.6.22-570/arch/i386/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -39,6 +39,7 @@
+ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+ obj-$(CONFIG_HPET_TIMER) += hpet.o
+ obj-$(CONFIG_K8_NB) += k8.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+
+ obj-$(CONFIG_VMI) += vmi.o vmiclock.o
+ obj-$(CONFIG_PARAVIRT) += paravirt.o
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/acpi/boot.c linux-2.6.22-591/arch/i386/kernel/acpi/boot.c
+--- linux-2.6.22-570/arch/i386/kernel/acpi/boot.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/i386/kernel/acpi/boot.c 2007-12-21 15:36:11.000000000 -0500
+@@ -950,14 +950,6 @@
+ },
+ {
+ .callback = force_acpi_ht,
+- .ident = "DELL GX240",
+- .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+- DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+- },
+- },
+- {
+- .callback = force_acpi_ht,
+ .ident = "HP VISUALIZE NT Workstation",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/apm.c linux-2.6.22-591/arch/i386/kernel/apm.c
+--- linux-2.6.22-570/arch/i386/kernel/apm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/i386/kernel/apm.c 2007-12-21 15:36:11.000000000 -0500
+@@ -222,6 +222,7 @@
+ #include <linux/capability.h>
+ #include <linux/device.h>
+ #include <linux/kernel.h>
++#include <linux/freezer.h>
+ #include <linux/smp.h>
+ #include <linux/dmi.h>
+ #include <linux/suspend.h>
+@@ -2311,7 +2312,6 @@
+ remove_proc_entry("apm", NULL);
+ return err;
+ }
+- kapmd_task->flags |= PF_NOFREEZE;
+ wake_up_process(kapmd_task);
+
+ if (num_online_cpus() > 1 && !smp ) {
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/io_apic.c linux-2.6.22-591/arch/i386/kernel/io_apic.c
+--- linux-2.6.22-570/arch/i386/kernel/io_apic.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/io_apic.c 2007-12-21 15:36:11.000000000 -0500
+@@ -667,6 +667,7 @@
+ set_pending_irq(i, cpumask_of_cpu(0));
+ }
+
++ set_freezable();
+ for ( ; ; ) {
+ time_remaining = schedule_timeout_interruptible(time_remaining);
+ try_to_freeze();
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,74 @@
++/*
++ * arch/i386/kernel/kgdb-jmp.S
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: George Anzinger <george@mvista.com>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 1996, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
++ * Copyright (C) 2005 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/linkage.h>
++
++#define PCOFF 0
++#define LINKAGE 4 /* just the return address */
++#define PTR_SIZE 4
++#define PARMS LINKAGE /* no space for saved regs */
++#define JMPBUF PARMS
++#define VAL JMPBUF+PTR_SIZE
++
++#define JB_BX 0
++#define JB_SI 1
++#define JB_DI 2
++#define JB_BP 3
++#define JB_SP 4
++#define JB_PC 5
++
++/* This must be called prior to kgdb_fault_longjmp and
++ * kgdb_fault_longjmp must not be called outside of the context of the
++ * last call to kgdb_fault_setjmp.
++ * kgdb_fault_setjmp(int *jmp_buf[6])
++ */
++ENTRY(kgdb_fault_setjmp)
++ movl JMPBUF(%esp), %eax
++
++ /* Save registers. */
++ movl %ebx, (JB_BX*4)(%eax)
++ movl %esi, (JB_SI*4)(%eax)
++ movl %edi, (JB_DI*4)(%eax)
++ /* Save SP as it will be after we return. */
++ leal JMPBUF(%esp), %ecx
++ movl %ecx, (JB_SP*4)(%eax)
++ movl PCOFF(%esp), %ecx /* Save PC we are returning to now. */
++ movl %ecx, (JB_PC*4)(%eax)
++ movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */
++
++ /* Restore state so we can now try the access. */
++ movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */
++ /* Save the return address now. */
++ movl (JB_PC*4)(%ecx), %edx
++ /* Restore registers. */
++ movl $0, %eax
++ movl (JB_SP*4)(%ecx), %esp
++ jmp *%edx /* Jump to saved PC. */
++
++/* kgdb_fault_longjmp(int *jmp_buf[6]) */
++ENTRY(kgdb_fault_longjmp)
++ movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */
++ /* Save the return address now. */
++ movl (JB_PC*4)(%ecx), %edx
++ /* Restore registers. */
++ movl (JB_BX*4)(%ecx), %ebx
++ movl (JB_SI*4)(%ecx), %esi
++ movl (JB_DI*4)(%ecx), %edi
++ movl (JB_BP*4)(%ecx), %ebp
++ movl $1, %eax
++ movl (JB_SP*4)(%ecx), %esp
++ jmp *%edx /* Jump to saved PC. */
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb.c linux-2.6.22-591/arch/i386/kernel/kgdb.c
+--- linux-2.6.22-570/arch/i386/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,388 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2007 Wind River Systems, Inc.
++ */
++/*
++ * Contributor: Lake Stevens Instrument Division$
++ * Written by: Glenn Engel $
++ * Updated by: Amit Kale<akale@veritas.com>
++ * Updated by: Tom Rini <trini@kernel.crashing.org>
++ * Updated by: Jason Wessel <jason.wessel@windriver.com>
++ * Modified for 386 by Jim Kingdon, Cygnus Support.
++ * Origianl kgdb, compatibility with 2.1.xx kernel by
++ * David Grothe <dave@gcom.com>
++ * Additional support from Tigran Aivazian <tigran@sco.com>
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/vm86.h>
++#include <asm/system.h>
++#include <asm/ptrace.h> /* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/apicdef.h>
++#include <asm/desc.h>
++#include <asm/kdebug.h>
++
++#include "mach_ipi.h"
++
++/* Put the error code here just in case the user cares. */
++int gdb_i386errcode;
++/* Likewise, the vector number here (since GDB only gets the signal
++ number through the usual means, and that's not very specific). */
++int gdb_i386vector = -1;
++
++extern atomic_t cpu_doing_single_step;
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ gdb_regs[_EAX] = regs->eax;
++ gdb_regs[_EBX] = regs->ebx;
++ gdb_regs[_ECX] = regs->ecx;
++ gdb_regs[_EDX] = regs->edx;
++ gdb_regs[_ESI] = regs->esi;
++ gdb_regs[_EDI] = regs->edi;
++ gdb_regs[_EBP] = regs->ebp;
++ gdb_regs[_DS] = regs->xds;
++ gdb_regs[_ES] = regs->xes;
++ gdb_regs[_PS] = regs->eflags;
++ gdb_regs[_CS] = regs->xcs;
++ gdb_regs[_PC] = regs->eip;
++ gdb_regs[_ESP] = (int)(®s->esp);
++ gdb_regs[_SS] = __KERNEL_DS;
++ gdb_regs[_FS] = 0xFFFF;
++ gdb_regs[_GS] = 0xFFFF;
++}
++
++/*
++ * Extracts ebp, esp and eip values understandable by gdb from the values
++ * saved by switch_to.
++ * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp
++ * prior to entering switch_to is 8 greater then the value that is saved.
++ * If switch_to changes, change following code appropriately.
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ gdb_regs[_EAX] = 0;
++ gdb_regs[_EBX] = 0;
++ gdb_regs[_ECX] = 0;
++ gdb_regs[_EDX] = 0;
++ gdb_regs[_ESI] = 0;
++ gdb_regs[_EDI] = 0;
++ gdb_regs[_EBP] = *(unsigned long *)p->thread.esp;
++ gdb_regs[_DS] = __KERNEL_DS;
++ gdb_regs[_ES] = __KERNEL_DS;
++ gdb_regs[_PS] = 0;
++ gdb_regs[_CS] = __KERNEL_CS;
++ gdb_regs[_PC] = p->thread.eip;
++ gdb_regs[_ESP] = p->thread.esp;
++ gdb_regs[_SS] = __KERNEL_DS;
++ gdb_regs[_FS] = 0xFFFF;
++ gdb_regs[_GS] = 0xFFFF;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ regs->eax = gdb_regs[_EAX];
++ regs->ebx = gdb_regs[_EBX];
++ regs->ecx = gdb_regs[_ECX];
++ regs->edx = gdb_regs[_EDX];
++ regs->esi = gdb_regs[_ESI];
++ regs->edi = gdb_regs[_EDI];
++ regs->ebp = gdb_regs[_EBP];
++ regs->xds = gdb_regs[_DS];
++ regs->xes = gdb_regs[_ES];
++ regs->eflags = gdb_regs[_PS];
++ regs->xcs = gdb_regs[_CS];
++ regs->eip = gdb_regs[_PC];
++}
++
++static struct hw_breakpoint {
++ unsigned enabled;
++ unsigned type;
++ unsigned len;
++ unsigned addr;
++} breakinfo[4] = {
++ { .enabled = 0 },
++ { .enabled = 0 },
++ { .enabled = 0 },
++ { .enabled = 0 },
++};
++
++static void kgdb_correct_hw_break(void)
++{
++ int breakno;
++ int correctit;
++ int breakbit;
++ unsigned long dr7;
++
++ get_debugreg(dr7, 7);
++ correctit = 0;
++ for (breakno = 0; breakno < 3; breakno++) {
++ breakbit = 2 << (breakno << 1);
++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
++ correctit = 1;
++ dr7 |= breakbit;
++ dr7 &= ~(0xf0000 << (breakno << 2));
++ dr7 |= (((breakinfo[breakno].len << 2) |
++ breakinfo[breakno].type) << 16) <<
++ (breakno << 2);
++ switch (breakno) {
++ case 0:
++ set_debugreg(breakinfo[breakno].addr, 0);
++ break;
++
++ case 1:
++ set_debugreg(breakinfo[breakno].addr, 1);
++ break;
++
++ case 2:
++ set_debugreg(breakinfo[breakno].addr, 2);
++ break;
++
++ case 3:
++ set_debugreg(breakinfo[breakno].addr, 3);
++ break;
++ }
++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
++ correctit = 1;
++ dr7 &= ~breakbit;
++ dr7 &= ~(0xf0000 << (breakno << 2));
++ }
++ }
++ if (correctit)
++ set_debugreg(dr7, 7);
++}
++
++static int kgdb_remove_hw_break(unsigned long addr, int len,
++ enum kgdb_bptype bptype)
++{
++ int i, idx = -1;
++ for (i = 0; i < 4; i++) {
++ if (breakinfo[i].addr == addr && breakinfo[i].enabled) {
++ idx = i;
++ break;
++ }
++ }
++ if (idx == -1)
++ return -1;
++
++ breakinfo[idx].enabled = 0;
++ return 0;
++}
++
++static void kgdb_remove_all_hw_break(void)
++{
++ int i;
++
++ for (i = 0; i < 4; i++) {
++ memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint));
++ }
++}
++
++static int kgdb_set_hw_break(unsigned long addr, int len,
++ enum kgdb_bptype bptype)
++{
++ int i, idx = -1;
++ for (i = 0; i < 4; i++) {
++ if (!breakinfo[i].enabled) {
++ idx = i;
++ break;
++ }
++ }
++ if (idx == -1)
++ return -1;
++ if (bptype == bp_hardware_breakpoint) {
++ breakinfo[idx].type = 0;
++ breakinfo[idx].len = 0;
++ } else if (bptype == bp_write_watchpoint) {
++ breakinfo[idx].type = 1;
++ if (len == 1 || len == 2 || len == 4)
++ breakinfo[idx].len = len - 1;
++ else
++ return -1;
++ } else if (bptype == bp_access_watchpoint) {
++ breakinfo[idx].type = 3;
++ if (len == 1 || len == 2 || len == 4)
++ breakinfo[idx].len = len - 1;
++ else
++ return -1;
++ } else
++ return -1;
++ breakinfo[idx].enabled = 1;
++ breakinfo[idx].addr = addr;
++ return 0;
++}
++
++void kgdb_disable_hw_debug(struct pt_regs *regs)
++{
++ /* Disable hardware debugging while we are in kgdb */
++ set_debugreg(0, 7);
++}
++
++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code)
++{
++ /* Master processor is completely in the debugger */
++ gdb_i386vector = e_vector;
++ gdb_i386errcode = err_code;
++}
++
++#ifdef CONFIG_SMP
++void kgdb_roundup_cpus(unsigned long flags)
++{
++ send_IPI_allbutself(APIC_DM_NMI);
++}
++#endif
++
++int kgdb_arch_handle_exception(int e_vector, int signo,
++ int err_code, char *remcom_in_buffer,
++ char *remcom_out_buffer,
++ struct pt_regs *linux_regs)
++{
++ long addr;
++ char *ptr;
++ int newPC, dr6;
++
++ switch (remcom_in_buffer[0]) {
++ case 'c':
++ case 's':
++ /* try to read optional parameter, pc unchanged if no parm */
++ ptr = &remcom_in_buffer[1];
++ if (kgdb_hex2long(&ptr, &addr))
++ linux_regs->eip = addr;
++ newPC = linux_regs->eip;
++
++ /* clear the trace bit */
++ linux_regs->eflags &= ~TF_MASK;
++ atomic_set(&cpu_doing_single_step, -1);
++
++ /* set the trace bit if we're stepping */
++ if (remcom_in_buffer[0] == 's') {
++ linux_regs->eflags |= TF_MASK;
++ debugger_step = 1;
++ atomic_set(&cpu_doing_single_step,raw_smp_processor_id());
++ }
++
++ get_debugreg(dr6, 6);
++ if (!(dr6 & 0x4000)) {
++ long breakno;
++ for (breakno = 0; breakno < 4; ++breakno) {
++ if (dr6 & (1 << breakno) &&
++ breakinfo[breakno].type == 0) {
++ /* Set restore flag */
++ linux_regs->eflags |= X86_EFLAGS_RF;
++ break;
++ }
++ }
++ }
++ set_debugreg(0, 6);
++ kgdb_correct_hw_break();
++
++ return (0);
++ } /* switch */
++ /* this means that we do not want to exit from the handler */
++ return -1;
++}
++
++/* Register KGDB with the i386die_chain so that we hook into all of the right
++ * spots. */
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++ void *ptr)
++{
++ struct die_args *args = ptr;
++ struct pt_regs *regs = args->regs;
++
++ /* Bad memory access? */
++ if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active)
++ && kgdb_may_fault) {
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ return NOTIFY_STOP;
++ } else if (cmd == DIE_PAGE_FAULT)
++ /* A normal page fault, ignore. */
++ return NOTIFY_DONE;
++ else if ((cmd == DIE_NMI || cmd == DIE_NMI_IPI ||
++ cmd == DIE_NMIWATCHDOG) && atomic_read(&debugger_active)) {
++ /* CPU roundup */
++ kgdb_nmihook(raw_smp_processor_id(), regs);
++ return NOTIFY_STOP;
++ } else if (cmd == DIE_DEBUG
++ && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id()
++ && user_mode(regs)) {
++ /* single step exception from kernel space to user space so
++ * eat the exception and continue the process
++ */
++ printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n");
++ kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs);
++ return NOTIFY_STOP;
++ } else if (cmd == DIE_NMI_IPI || cmd == DIE_NMI || user_mode(regs) ||
++ (cmd == DIE_DEBUG && atomic_read(&debugger_active)))
++ /* Normal watchdog event or userspace debugging, or spurious
++ * debug exception, ignore. */
++ return NOTIFY_DONE;
++
++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++
++ return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++ .notifier_call = kgdb_notify,
++};
++
++int kgdb_arch_init(void)
++{
++ register_die_notifier(&kgdb_notifier);
++ return 0;
++}
++
++/*
++ * Skip an int3 exception when it occurs after a breakpoint has been
++ * removed. Backtrack eip by 1 since the int3 would have caused it to
++ * increment by 1.
++ */
++
++int kgdb_skipexception(int exception, struct pt_regs *regs)
++{
++ if (exception == 3 && kgdb_isremovedbreak(regs->eip - 1)) {
++ regs->eip -= 1;
++ return 1;
++ }
++ return 0;
++}
++
++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++ if (exception == 3) {
++ return instruction_pointer(regs) - 1;
++ }
++ return instruction_pointer(regs);
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++ .gdb_bpt_instr = {0xcc},
++ .flags = KGDB_HW_BREAKPOINT,
++ .set_hw_breakpoint = kgdb_set_hw_break,
++ .remove_hw_breakpoint = kgdb_remove_hw_break,
++ .remove_all_hw_break = kgdb_remove_all_hw_break,
++ .correct_hw_break = kgdb_correct_hw_break,
++};
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/process.c linux-2.6.22-591/arch/i386/kernel/process.c
+--- linux-2.6.22-570/arch/i386/kernel/process.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/process.c 2007-12-21 15:36:11.000000000 -0500
+@@ -179,13 +179,13 @@
+
+ /* endless idle loop with no priority at all */
+ while (1) {
+- tick_nohz_stop_sched_tick();
+ while (!need_resched()) {
+ void (*idle)(void);
+
+ if (__get_cpu_var(cpu_idle_state))
+ __get_cpu_var(cpu_idle_state) = 0;
+
++ tick_nohz_stop_sched_tick();
+ check_pgt_cache();
+ rmb();
+ idle = pm_idle;
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/setup.c linux-2.6.22-591/arch/i386/kernel/setup.c
+--- linux-2.6.22-570/arch/i386/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/i386/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -124,6 +124,7 @@
+ #endif
+
+ extern void early_cpu_init(void);
++extern void early_trap_init(void);
+ extern int root_mountflags;
+
+ unsigned long saved_videomode;
+@@ -514,6 +515,7 @@
+ memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+ pre_setup_arch_hook();
+ early_cpu_init();
++ early_trap_init();
+
+ /*
+ * FIXME: This isn't an official loader_type right
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/signal.c linux-2.6.22-591/arch/i386/kernel/signal.c
+--- linux-2.6.22-570/arch/i386/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/i386/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500
+@@ -199,6 +199,13 @@
+ return eax;
+
+ badframe:
++ if (show_unhandled_signals && printk_ratelimit())
++ printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
++ " esp:%lx oeax:%lx\n",
++ current->pid > 1 ? KERN_INFO : KERN_EMERG,
++ current->comm, current->pid, frame, regs->eip,
++ regs->esp, regs->orig_eax);
++
+ force_sig(SIGSEGV, current);
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/syscall_table.S linux-2.6.22-591/arch/i386/kernel/syscall_table.S
+--- linux-2.6.22-570/arch/i386/kernel/syscall_table.S 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/syscall_table.S 2007-12-21 15:36:11.000000000 -0500
+@@ -323,3 +323,6 @@
+ .long sys_signalfd
+ .long sys_timerfd
+ .long sys_eventfd
++ .long sys_revokeat
++ .long sys_frevoke /* 325 */
++ .long sys_fallocate
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/traps.c linux-2.6.22-591/arch/i386/kernel/traps.c
+--- linux-2.6.22-570/arch/i386/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -97,6 +97,11 @@
+
+ int kstack_depth_to_print = 24;
+ static unsigned int code_bytes = 64;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+
+ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+@@ -136,6 +141,34 @@
+ return ebp;
+ }
+
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
++static asmlinkage int
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
++{
++ struct ops_and_data *oad = (struct ops_and_data *)data;
++ int n = 0;
++ unsigned long sp = UNW_SP(info);
++
++ if (arch_unw_user_mode(info))
++ return -1;
++ while (unwind(info) == 0 && UNW_PC(info)) {
++ n++;
++ oad->ops->address(oad->data, UNW_PC(info));
++ if (arch_unw_user_mode(info))
++ break;
++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++ && sp > UNW_SP(info))
++ break;
++ sp = UNW_SP(info);
++ touch_nmi_watchdog();
++ }
++ return n;
++}
++
+ #define MSG(msg) ops->warning(data, msg)
+
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+@@ -147,6 +180,41 @@
+ if (!task)
+ task = current;
+
++ if (call_trace >= 0) {
++ int unw_ret = 0;
++ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
++
++ if (regs) {
++ if (unwind_init_frame_info(&info, task, regs) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ } else if (task == current)
++ unw_ret = unwind_init_running(&info, dump_trace_unwind,
++ &oad);
++ else {
++ if (unwind_init_blocked(&info, task) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ }
++ if (unw_ret > 0) {
++ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++ ops->warning_symbol(data,
++ "DWARF2 unwinder stuck at %s",
++ UNW_PC(&info));
++ if (UNW_SP(&info) >= PAGE_OFFSET) {
++ MSG("Leftover inexact backtrace:");
++ stack = (void *)UNW_SP(&info);
++ if (!stack)
++ return;
++ ebp = UNW_FP(&info);
++ } else
++ MSG("Full inexact backtrace again:");
++ } else if (call_trace >= 1)
++ return;
++ else
++ MSG("Full inexact backtrace again:");
++ } else
++ MSG("Inexact backtrace:");
++ }
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+@@ -614,6 +682,13 @@
+
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
++ if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
++ printk_ratelimit())
++ printk(KERN_INFO
++ "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
++ current->comm, current->pid,
++ regs->eip, regs->esp, error_code);
++
+ force_sig(SIGSEGV, current);
+ return;
+
+@@ -854,6 +929,7 @@
+ */
+ clear_dr7:
+ set_debugreg(0, 7);
++ notify_die(DIE_DEBUG, "debug2", regs, condition, error_code, SIGTRAP);
+ return;
+
+ debug_vm86:
+@@ -1118,6 +1194,12 @@
+ _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
+ }
+
++/* Some traps need to be set early. */
++void __init early_trap_init(void) {
++ set_intr_gate(1,&debug);
++ set_system_intr_gate(3, &int3); /* int3 can be called from all */
++ set_intr_gate(14,&page_fault);
++}
+
+ void __init trap_init(void)
+ {
+@@ -1134,10 +1216,8 @@
+ #endif
+
+ set_trap_gate(0,÷_error);
+- set_intr_gate(1,&debug);
+ set_intr_gate(2,&nmi);
+- set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
+- set_system_gate(4,&overflow);
++ set_system_gate(4,&overflow); /* int4/5 can be called from all */
+ set_trap_gate(5,&bounds);
+ set_trap_gate(6,&invalid_op);
+ set_trap_gate(7,&device_not_available);
+@@ -1147,7 +1227,6 @@
+ set_trap_gate(11,&segment_not_present);
+ set_trap_gate(12,&stack_segment);
+ set_trap_gate(13,&general_protection);
+- set_intr_gate(14,&page_fault);
+ set_trap_gate(15,&spurious_interrupt_bug);
+ set_trap_gate(16,&coprocessor_error);
+ set_trap_gate(17,&alignment_check);
+@@ -1204,3 +1283,19 @@
+ return 1;
+ }
+ __setup("code_bytes=", code_bytes_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++ if (strcmp(s, "old") == 0)
++ call_trace = -1;
++ else if (strcmp(s, "both") == 0)
++ call_trace = 0;
++ else if (strcmp(s, "newfallback") == 0)
++ call_trace = 1;
++ else if (strcmp(s, "new") == 2)
++ call_trace = 2;
++ return 1;
++}
++__setup("call_trace=", call_trace_setup);
++#endif
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/unwind.S linux-2.6.22-591/arch/i386/kernel/unwind.S
+--- linux-2.6.22-570/arch/i386/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/i386/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,36 @@
++/* Assembler support code for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/ptrace-abi.h>
++#include <asm/segment.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++ CFI_STARTPROC
++ movl 4(%esp), %edx
++ movl (%esp), %ecx
++ leal 4(%esp), %eax
++ movl %ebx, PT_EBX(%edx)
++ xorl %ebx, %ebx
++ movl %ebx, PT_ECX(%edx)
++ movl %ebx, PT_EDX(%edx)
++ movl %esi, PT_ESI(%edx)
++ movl %edi, PT_EDI(%edx)
++ movl %ebp, PT_EBP(%edx)
++ movl %ebx, PT_EAX(%edx)
++ movl $__USER_DS, PT_DS(%edx)
++ movl $__USER_DS, PT_ES(%edx)
++ movl $0, PT_FS(%edx)
++ movl %ebx, PT_ORIG_EAX(%edx)
++ movl %ecx, PT_EIP(%edx)
++ movl 12(%esp), %ecx
++ movl $__KERNEL_CS, PT_CS(%edx)
++ movl %ebx, PT_EFLAGS(%edx)
++ movl %eax, PT_OLDESP(%edx)
++ movl 8(%esp), %eax
++ movl %ecx, 8(%esp)
++ movl PT_EBX(%edx), %ebx
++ movl $__KERNEL_DS, PT_OLDSS(%edx)
++ jmpl *%eax
++ CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
+diff -Nurb linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c
+--- linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c 2007-12-21 15:36:11.000000000 -0500
+@@ -52,7 +52,7 @@
+ NULL,
+ };
+
+- if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) {
++ if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+ printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
+ string, ret);
+ }
+diff -Nurb linux-2.6.22-570/arch/i386/mm/fault.c linux-2.6.22-591/arch/i386/mm/fault.c
+--- linux-2.6.22-570/arch/i386/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/i386/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -284,6 +284,8 @@
+ return 0;
+ }
+
++int show_unhandled_signals = 1;
++
+ /*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+@@ -471,6 +473,14 @@
+ if (is_prefetch(regs, address, error_code))
+ return;
+
++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
++ printk_ratelimit()) {
++ printk("%s%s[%d]: segfault at %08lx eip %08lx "
++ "esp %08lx error %lx\n",
++ tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
++ tsk->comm, tsk->pid, address, regs->eip,
++ regs->esp, error_code);
++ }
+ tsk->thread.cr2 = address;
+ /* Kernel addresses are always protection faults */
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+@@ -508,6 +518,10 @@
+ if (is_prefetch(regs, address, error_code))
+ return;
+
++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++ error_code, 14, SIGSEGV) == NOTIFY_STOP)
++ return;
++
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+diff -Nurb linux-2.6.22-570/arch/ia64/hp/sim/simeth.c linux-2.6.22-591/arch/ia64/hp/sim/simeth.c
+--- linux-2.6.22-570/arch/ia64/hp/sim/simeth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ia64/hp/sim/simeth.c 2007-12-21 15:36:14.000000000 -0500
+@@ -300,6 +300,9 @@
+ return NOTIFY_DONE;
+ }
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+
+ /*
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/Makefile linux-2.6.22-591/arch/ia64/kernel/Makefile
+--- linux-2.6.22-570/arch/ia64/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ia64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -35,6 +35,7 @@
+ obj-$(CONFIG_PCI_MSI) += msi_ia64.o
+ mca_recovery-y += mca_drv.o mca_drv_asm.o
+ obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o
+
+ obj-$(CONFIG_IA64_ESI) += esi.o
+ ifneq ($(CONFIG_IA64_ESI),)
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,238 @@
++/* setjmp() and longjmp() assembler support for kdb on ia64.
++
++ This code was copied from glibc CVS as of 2001-06-27 and modified where
++ necessary to fit the kernel.
++ Keith Owens <kaos@melbourne.sgi.com> 2001-06-27
++ */
++
++/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
++ Contributed by David Mosberger-Tang <davidm@hpl.hp.com>.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Library General Public License as
++ published by the Free Software Foundation; either version 2 of the
++ License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Library General Public License for more details.
++
++ You should have received a copy of the GNU Library General Public
++ License along with the GNU C Library; see the file COPYING.LIB. If
++ not, write to the Free Software Foundation, Inc.,
++ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++*/
++
++#include <asm/asmmacro.h>
++GLOBAL_ENTRY(kgdb_fault_setjmp)
++ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
++ alloc loc1=ar.pfs,2,2,2,0
++ mov r16=ar.unat
++ ;;
++ mov r17=ar.fpsr
++ mov r2=in0
++ add r3=8,in0
++ ;;
++.mem.offset 0,0;
++ st8.spill.nta [r2]=sp,16 // r12 (sp)
++.mem.offset 8,0;
++ st8.spill.nta [r3]=gp,16 // r1 (gp)
++ ;;
++ st8.nta [r2]=r16,16 // save caller's unat
++ st8.nta [r3]=r17,16 // save fpsr
++ add r8=0xa0,in0
++ ;;
++.mem.offset 160,0;
++ st8.spill.nta [r2]=r4,16 // r4
++.mem.offset 168,0;
++ st8.spill.nta [r3]=r5,16 // r5
++ add r9=0xb0,in0
++ ;;
++ stf.spill.nta [r8]=f2,32
++ stf.spill.nta [r9]=f3,32
++ mov loc0=rp
++ .body
++ ;;
++ stf.spill.nta [r8]=f4,32
++ stf.spill.nta [r9]=f5,32
++ mov r17=b1
++ ;;
++ stf.spill.nta [r8]=f16,32
++ stf.spill.nta [r9]=f17,32
++ mov r18=b2
++ ;;
++ stf.spill.nta [r8]=f18,32
++ stf.spill.nta [r9]=f19,32
++ mov r19=b3
++ ;;
++ stf.spill.nta [r8]=f20,32
++ stf.spill.nta [r9]=f21,32
++ mov r20=b4
++ ;;
++ stf.spill.nta [r8]=f22,32
++ stf.spill.nta [r9]=f23,32
++ mov r21=b5
++ ;;
++ stf.spill.nta [r8]=f24,32
++ stf.spill.nta [r9]=f25,32
++ mov r22=ar.lc
++ ;;
++ stf.spill.nta [r8]=f26,32
++ stf.spill.nta [r9]=f27,32
++ mov r24=pr
++ ;;
++ stf.spill.nta [r8]=f28,32
++ stf.spill.nta [r9]=f29,32
++ ;;
++ stf.spill.nta [r8]=f30
++ stf.spill.nta [r9]=f31
++
++.mem.offset 0,0;
++ st8.spill.nta [r2]=r6,16 // r6
++.mem.offset 8,0;
++ st8.spill.nta [r3]=r7,16 // r7
++ ;;
++ mov r23=ar.bsp
++ mov r25=ar.unat
++ st8.nta [r2]=loc0,16 // b0
++ st8.nta [r3]=r17,16 // b1
++ ;;
++ st8.nta [r2]=r18,16 // b2
++ st8.nta [r3]=r19,16 // b3
++ ;;
++ st8.nta [r2]=r20,16 // b4
++ st8.nta [r3]=r21,16 // b5
++ ;;
++ st8.nta [r2]=loc1,16 // ar.pfs
++ st8.nta [r3]=r22,16 // ar.lc
++ ;;
++ st8.nta [r2]=r24,16 // pr
++ st8.nta [r3]=r23,16 // ar.bsp
++ ;;
++ st8.nta [r2]=r25 // ar.unat
++ st8.nta [r3]=in0 // &__jmp_buf
++ mov r8=0
++ mov rp=loc0
++ mov ar.pfs=loc1
++ br.ret.sptk.few rp
++END(kdba_setjmp)
++#define pPos p6 /* is rotate count positive? */
++#define pNeg p7 /* is rotate count negative? */
++GLOBAL_ENTRY(kgdb_fault_longjmp)
++ alloc r8=ar.pfs,2,1,0,0
++ mov r27=ar.rsc
++ add r2=0x98,in0 // r2 <- &jmpbuf.orig_jmp_buf_addr
++ ;;
++ ld8 r8=[r2],-16 // r8 <- orig_jmp_buf_addr
++ mov r10=ar.bsp
++ and r11=~0x3,r27 // clear ar.rsc.mode
++ ;;
++ flushrs // flush dirty regs to backing store (must be first in insn grp)
++ ld8 r23=[r2],8 // r23 <- jmpbuf.ar_bsp
++ sub r8=r8,in0 // r8 <- &orig_jmpbuf - &jmpbuf
++ ;;
++ ld8 r25=[r2] // r25 <- jmpbuf.ar_unat
++ extr.u r8=r8,3,6 // r8 <- (&orig_jmpbuf - &jmpbuf)/8 & 0x3f
++ ;;
++ cmp.lt pNeg,pPos=r8,r0
++ mov r2=in0
++ ;;
++(pPos) mov r16=r8
++(pNeg) add r16=64,r8
++(pPos) sub r17=64,r8
++(pNeg) sub r17=r0,r8
++ ;;
++ mov ar.rsc=r11 // put RSE in enforced lazy mode
++ shr.u r8=r25,r16
++ add r3=8,in0 // r3 <- &jmpbuf.r1
++ shl r9=r25,r17
++ ;;
++ or r25=r8,r9
++ ;;
++ mov r26=ar.rnat
++ mov ar.unat=r25 // setup ar.unat (NaT bits for r1, r4-r7, and r12)
++ ;;
++ ld8.fill.nta sp=[r2],16 // r12 (sp)
++ ld8.fill.nta gp=[r3],16 // r1 (gp)
++ dep r11=-1,r23,3,6 // r11 <- ia64_rse_rnat_addr(jmpbuf.ar_bsp)
++ ;;
++ ld8.nta r16=[r2],16 // caller's unat
++ ld8.nta r17=[r3],16 // fpsr
++ ;;
++ ld8.fill.nta r4=[r2],16 // r4
++ ld8.fill.nta r5=[r3],16 // r5 (gp)
++ cmp.geu p8,p0=r10,r11 // p8 <- (ar.bsp >= jmpbuf.ar_bsp)
++ ;;
++ ld8.fill.nta r6=[r2],16 // r6
++ ld8.fill.nta r7=[r3],16 // r7
++ ;;
++ mov ar.unat=r16 // restore caller's unat
++ mov ar.fpsr=r17 // restore fpsr
++ ;;
++ ld8.nta r16=[r2],16 // b0
++ ld8.nta r17=[r3],16 // b1
++ ;;
++(p8) ld8 r26=[r11] // r26 <- *ia64_rse_rnat_addr(jmpbuf.ar_bsp)
++ mov ar.bspstore=r23 // restore ar.bspstore
++ ;;
++ ld8.nta r18=[r2],16 // b2
++ ld8.nta r19=[r3],16 // b3
++ ;;
++ ld8.nta r20=[r2],16 // b4
++ ld8.nta r21=[r3],16 // b5
++ ;;
++ ld8.nta r11=[r2],16 // ar.pfs
++ ld8.nta r22=[r3],56 // ar.lc
++ ;;
++ ld8.nta r24=[r2],32 // pr
++ mov b0=r16
++ ;;
++ ldf.fill.nta f2=[r2],32
++ ldf.fill.nta f3=[r3],32
++ mov b1=r17
++ ;;
++ ldf.fill.nta f4=[r2],32
++ ldf.fill.nta f5=[r3],32
++ mov b2=r18
++ ;;
++ ldf.fill.nta f16=[r2],32
++ ldf.fill.nta f17=[r3],32
++ mov b3=r19
++ ;;
++ ldf.fill.nta f18=[r2],32
++ ldf.fill.nta f19=[r3],32
++ mov b4=r20
++ ;;
++ ldf.fill.nta f20=[r2],32
++ ldf.fill.nta f21=[r3],32
++ mov b5=r21
++ ;;
++ ldf.fill.nta f22=[r2],32
++ ldf.fill.nta f23=[r3],32
++ mov ar.lc=r22
++ ;;
++ ldf.fill.nta f24=[r2],32
++ ldf.fill.nta f25=[r3],32
++ cmp.eq p8,p9=0,in1
++ ;;
++ ldf.fill.nta f26=[r2],32
++ ldf.fill.nta f27=[r3],32
++ mov ar.pfs=r11
++ ;;
++ ldf.fill.nta f28=[r2],32
++ ldf.fill.nta f29=[r3],32
++ ;;
++ ldf.fill.nta f30=[r2]
++ ldf.fill.nta f31=[r3]
++(p8) mov r8=1
++
++ mov ar.rnat=r26 // restore ar.rnat
++ ;;
++ mov ar.rsc=r27 // restore ar.rsc
++(p9) mov r8=in1
++
++ invala // virt. -> phys. regnum mapping may change
++ mov pr=r24,-1
++ br.ret.sptk.few rp
++END(kgdb_fault_longjmp)
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb.c linux-2.6.22-591/arch/ia64/kernel/kgdb.c
+--- linux-2.6.22-570/arch/ia64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/ia64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,944 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * (c) Copyright 2005 Hewlett-Packard Development Company, L.P.
++ * Bob Picco <bob.picco@hp.com>
++ */
++/*
++ * Contributor: Lake Stevens Instrument Division$
++ * Written by: Glenn Engel $
++ * Updated by: Amit Kale<akale@veritas.com>
++ * Modified for 386 by Jim Kingdon, Cygnus Support.
++ * Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe <dave@gcom.com>
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h> /* for linux pt_regs struct */
++#include <asm/unwind.h>
++#include <asm/rse.h>
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <asm/cacheflush.h>
++#include <asm/kdebug.h>
++
++#define NUM_REGS 590
++#define REGISTER_BYTES (NUM_REGS*8+128*8)
++#define REGISTER_BYTE(N) (((N) * 8) \
++ + ((N) <= IA64_FR0_REGNUM ? \
++ 0 : 8 * (((N) > IA64_FR127_REGNUM) ? 128 : (N) - IA64_FR0_REGNUM)))
++#define REGISTER_SIZE(N) \
++ (((N) >= IA64_FR0_REGNUM && (N) <= IA64_FR127_REGNUM) ? 16 : 8)
++#define IA64_GR0_REGNUM 0
++#define IA64_FR0_REGNUM 128
++#define IA64_FR127_REGNUM (IA64_FR0_REGNUM+127)
++#define IA64_PR0_REGNUM 256
++#define IA64_BR0_REGNUM 320
++#define IA64_VFP_REGNUM 328
++#define IA64_PR_REGNUM 330
++#define IA64_IP_REGNUM 331
++#define IA64_PSR_REGNUM 332
++#define IA64_CFM_REGNUM 333
++#define IA64_AR0_REGNUM 334
++#define IA64_NAT0_REGNUM 462
++#define IA64_NAT31_REGNUM (IA64_NAT0_REGNUM+31)
++#define IA64_NAT32_REGNUM (IA64_NAT0_REGNUM+32)
++#define IA64_RSC_REGNUM (IA64_AR0_REGNUM+16)
++#define IA64_BSP_REGNUM (IA64_AR0_REGNUM+17)
++#define IA64_BSPSTORE_REGNUM (IA64_AR0_REGNUM+18)
++#define IA64_RNAT_REGNUM (IA64_AR0_REGNUM+19)
++#define IA64_FCR_REGNUM (IA64_AR0_REGNUM+21)
++#define IA64_EFLAG_REGNUM (IA64_AR0_REGNUM+24)
++#define IA64_CSD_REGNUM (IA64_AR0_REGNUM+25)
++#define IA64_SSD_REGNUM (IA64_AR0_REGNUM+26)
++#define IA64_CFLG_REGNUM (IA64_AR0_REGNUM+27)
++#define IA64_FSR_REGNUM (IA64_AR0_REGNUM+28)
++#define IA64_FIR_REGNUM (IA64_AR0_REGNUM+29)
++#define IA64_FDR_REGNUM (IA64_AR0_REGNUM+30)
++#define IA64_CCV_REGNUM (IA64_AR0_REGNUM+32)
++#define IA64_UNAT_REGNUM (IA64_AR0_REGNUM+36)
++#define IA64_FPSR_REGNUM (IA64_AR0_REGNUM+40)
++#define IA64_ITC_REGNUM (IA64_AR0_REGNUM+44)
++#define IA64_PFS_REGNUM (IA64_AR0_REGNUM+64)
++#define IA64_LC_REGNUM (IA64_AR0_REGNUM+65)
++#define IA64_EC_REGNUM (IA64_AR0_REGNUM+66)
++
++#define REGISTER_INDEX(N) (REGISTER_BYTE(N) / sizeof (unsigned long))
++#define BREAK_INSTR_ALIGN (~0xfULL)
++
++#define ptoff(V) ((unsigned int) &((struct pt_regs *)0x0)->V)
++struct reg_to_ptreg_index {
++ unsigned int reg;
++ unsigned int ptregoff;
++};
++
++static struct reg_to_ptreg_index gr_reg_to_ptreg_index[] = {
++ {IA64_GR0_REGNUM + 1, ptoff(r1)},
++ {IA64_GR0_REGNUM + 2, ptoff(r2)},
++ {IA64_GR0_REGNUM + 3, ptoff(r3)},
++ {IA64_GR0_REGNUM + 8, ptoff(r8)},
++ {IA64_GR0_REGNUM + 9, ptoff(r9)},
++ {IA64_GR0_REGNUM + 10, ptoff(r10)},
++ {IA64_GR0_REGNUM + 11, ptoff(r11)},
++ {IA64_GR0_REGNUM + 12, ptoff(r12)},
++ {IA64_GR0_REGNUM + 13, ptoff(r13)},
++ {IA64_GR0_REGNUM + 14, ptoff(r14)},
++ {IA64_GR0_REGNUM + 15, ptoff(r15)},
++ {IA64_GR0_REGNUM + 16, ptoff(r16)},
++ {IA64_GR0_REGNUM + 17, ptoff(r17)},
++ {IA64_GR0_REGNUM + 18, ptoff(r18)},
++ {IA64_GR0_REGNUM + 19, ptoff(r19)},
++ {IA64_GR0_REGNUM + 20, ptoff(r20)},
++ {IA64_GR0_REGNUM + 21, ptoff(r21)},
++ {IA64_GR0_REGNUM + 22, ptoff(r22)},
++ {IA64_GR0_REGNUM + 23, ptoff(r23)},
++ {IA64_GR0_REGNUM + 24, ptoff(r24)},
++ {IA64_GR0_REGNUM + 25, ptoff(r25)},
++ {IA64_GR0_REGNUM + 26, ptoff(r26)},
++ {IA64_GR0_REGNUM + 27, ptoff(r27)},
++ {IA64_GR0_REGNUM + 28, ptoff(r28)},
++ {IA64_GR0_REGNUM + 29, ptoff(r29)},
++ {IA64_GR0_REGNUM + 30, ptoff(r30)},
++ {IA64_GR0_REGNUM + 31, ptoff(r31)},
++};
++
++static struct reg_to_ptreg_index br_reg_to_ptreg_index[] = {
++ {IA64_BR0_REGNUM, ptoff(b0)},
++ {IA64_BR0_REGNUM + 6, ptoff(b6)},
++ {IA64_BR0_REGNUM + 7, ptoff(b7)},
++};
++
++static struct reg_to_ptreg_index ar_reg_to_ptreg_index[] = {
++ {IA64_PFS_REGNUM, ptoff(ar_pfs)},
++ {IA64_UNAT_REGNUM, ptoff(ar_unat)},
++ {IA64_RNAT_REGNUM, ptoff(ar_rnat)},
++ {IA64_BSPSTORE_REGNUM, ptoff(ar_bspstore)},
++ {IA64_RSC_REGNUM, ptoff(ar_rsc)},
++ {IA64_CSD_REGNUM, ptoff(ar_csd)},
++ {IA64_SSD_REGNUM, ptoff(ar_ssd)},
++ {IA64_FPSR_REGNUM, ptoff(ar_fpsr)},
++ {IA64_CCV_REGNUM, ptoff(ar_ccv)},
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int kgdb_gr_reg(int regnum, struct unw_frame_info *info,
++ unsigned long *reg, int rw)
++{
++ char nat;
++
++ if ((regnum >= IA64_GR0_REGNUM && regnum <= (IA64_GR0_REGNUM + 1)) ||
++ (regnum >= (IA64_GR0_REGNUM + 4) &&
++ regnum <= (IA64_GR0_REGNUM + 7)))
++ return !unw_access_gr(info, regnum - IA64_GR0_REGNUM,
++ reg, &nat, rw);
++ else
++ return 0;
++}
++static int kgdb_gr_ptreg(int regnum, struct pt_regs * ptregs,
++ struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++ int i, result = 1;
++ char nat;
++
++ if (!((regnum >= (IA64_GR0_REGNUM + 2) &&
++ regnum <= (IA64_GR0_REGNUM + 3)) ||
++ (regnum >= (IA64_GR0_REGNUM + 8) &&
++ regnum <= (IA64_GR0_REGNUM + 15)) ||
++ (regnum >= (IA64_GR0_REGNUM + 16) &&
++ regnum <= (IA64_GR0_REGNUM + 31))))
++ return 0;
++ else if (rw && ptregs) {
++ for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++)
++ if (gr_reg_to_ptreg_index[i].reg == regnum) {
++ *((unsigned long *)(((void *)ptregs) +
++ gr_reg_to_ptreg_index[i].ptregoff)) = *reg;
++ break;
++ }
++ } else if (!rw && ptregs) {
++ for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++)
++ if (gr_reg_to_ptreg_index[i].reg == regnum) {
++ *reg = *((unsigned long *)
++ (((void *)ptregs) +
++ gr_reg_to_ptreg_index[i].ptregoff));
++ break;
++ }
++ } else
++ result = !unw_access_gr(info, regnum - IA64_GR0_REGNUM,
++ reg, &nat, rw);
++ return result;
++}
++
++static int kgdb_br_reg(int regnum, struct pt_regs * ptregs,
++ struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++ int i, result = 1;
++
++ if (!(regnum >= IA64_BR0_REGNUM && regnum <= (IA64_BR0_REGNUM + 7)))
++ return 0;
++
++ switch (regnum) {
++ case IA64_BR0_REGNUM:
++ case IA64_BR0_REGNUM + 6:
++ case IA64_BR0_REGNUM + 7:
++ if (rw) {
++ for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++)
++ if (br_reg_to_ptreg_index[i].reg == regnum) {
++ *((unsigned long *)
++ (((void *)ptregs) +
++ br_reg_to_ptreg_index[i].ptregoff)) =
++ *reg;
++ break;
++ }
++ } else
++ for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++)
++ if (br_reg_to_ptreg_index[i].reg == regnum) {
++ *reg = *((unsigned long *)
++ (((void *)ptregs) +
++ br_reg_to_ptreg_index[i].
++ ptregoff));
++ break;
++ }
++ break;
++ case IA64_BR0_REGNUM + 1:
++ case IA64_BR0_REGNUM + 2:
++ case IA64_BR0_REGNUM + 3:
++ case IA64_BR0_REGNUM + 4:
++ case IA64_BR0_REGNUM + 5:
++ result = !unw_access_br(info, regnum - IA64_BR0_REGNUM,
++ reg, rw);
++ break;
++ }
++
++ return result;
++}
++
++static int kgdb_fr_reg(int regnum, char *inbuffer, struct pt_regs * ptregs,
++ struct unw_frame_info *info, unsigned long *reg,
++ struct ia64_fpreg *freg, int rw)
++{
++ int result = 1;
++
++ if (!(regnum >= IA64_FR0_REGNUM && regnum <= (IA64_FR0_REGNUM + 127)))
++ return 0;
++
++ switch (regnum) {
++ case IA64_FR0_REGNUM + 6:
++ case IA64_FR0_REGNUM + 7:
++ case IA64_FR0_REGNUM + 8:
++ case IA64_FR0_REGNUM + 9:
++ case IA64_FR0_REGNUM + 10:
++ case IA64_FR0_REGNUM + 11:
++ case IA64_FR0_REGNUM + 12:
++ if (rw) {
++ char *ptr = inbuffer;
++
++ freg->u.bits[0] = *reg;
++ kgdb_hex2long(&ptr, &freg->u.bits[1]);
++ *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))) =
++ *freg;
++ break;
++ } else if (!ptregs)
++ result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM,
++ freg, rw);
++ else
++ *freg =
++ *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6)));
++ break;
++ default:
++ if (!rw)
++ result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM,
++ freg, rw);
++ else
++ result = 0;
++ break;
++ }
++
++ return result;
++}
++
++static int kgdb_ar_reg(int regnum, struct pt_regs * ptregs,
++ struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++ int result = 0, i;
++
++ if (!(regnum >= IA64_AR0_REGNUM && regnum <= IA64_EC_REGNUM))
++ return 0;
++
++ if (rw && ptregs) {
++ for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++)
++ if (ar_reg_to_ptreg_index[i].reg == regnum) {
++ *((unsigned long *) (((void *)ptregs) +
++ ar_reg_to_ptreg_index[i].ptregoff)) =
++ *reg;
++ result = 1;
++ break;
++ }
++ } else if (ptregs) {
++ for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++)
++ if (ar_reg_to_ptreg_index[i].reg == regnum) {
++ *reg = *((unsigned long *) (((void *)ptregs) +
++ ar_reg_to_ptreg_index[i].ptregoff));
++ result = 1;
++ break;
++ }
++ }
++
++ if (result)
++ return result;
++
++ result = 1;
++
++ switch (regnum) {
++ case IA64_CSD_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_CSD, reg, rw);
++ break;
++ case IA64_SSD_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_SSD, reg, rw);
++ break;
++ case IA64_UNAT_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++ break;
++ case IA64_RNAT_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++ break;
++ case IA64_BSPSTORE_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++ break;
++ case IA64_PFS_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++ break;
++ case IA64_LC_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_LC, reg, rw);
++ break;
++ case IA64_EC_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_EC, reg, rw);
++ break;
++ case IA64_FPSR_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_FPSR, reg, rw);
++ break;
++ case IA64_RSC_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_RSC, reg, rw);
++ break;
++ case IA64_CCV_REGNUM:
++ result = !unw_access_ar(info, UNW_AR_CCV, reg, rw);
++ break;
++ default:
++ result = 0;
++ }
++
++ return result;
++}
++
++void kgdb_get_reg(char *outbuffer, int regnum, struct unw_frame_info *info,
++ struct pt_regs *ptregs)
++{
++ unsigned long reg, size = 0, *mem = ®
++ struct ia64_fpreg freg;
++
++ if (kgdb_gr_reg(regnum, info, ®, 0) ||
++ kgdb_gr_ptreg(regnum, ptregs, info, ®, 0) ||
++ kgdb_br_reg(regnum, ptregs, info, ®, 0) ||
++ kgdb_ar_reg(regnum, ptregs, info, ®, 0))
++ size = sizeof(reg);
++ else if (kgdb_fr_reg(regnum, NULL, ptregs, info, ®, &freg, 0)) {
++ size = sizeof(freg);
++ mem = (unsigned long *)&freg;
++ } else if (regnum == IA64_IP_REGNUM) {
++ if (!ptregs) {
++ unw_get_ip(info, ®);
++ size = sizeof(reg);
++ } else {
++ reg = ptregs->cr_iip;
++ size = sizeof(reg);
++ }
++ } else if (regnum == IA64_CFM_REGNUM) {
++ if (!ptregs)
++ unw_get_cfm(info, ®);
++ else
++ reg = ptregs->cr_ifs;
++ size = sizeof(reg);
++ } else if (regnum == IA64_PSR_REGNUM) {
++ if (!ptregs && kgdb_usethread)
++ ptregs = (struct pt_regs *)
++ ((unsigned long)kgdb_usethread +
++ IA64_STK_OFFSET) - 1;
++ if (ptregs)
++ reg = ptregs->cr_ipsr;
++ size = sizeof(reg);
++ } else if (regnum == IA64_PR_REGNUM) {
++ if (ptregs)
++ reg = ptregs->pr;
++ else
++ unw_access_pr(info, ®, 0);
++ size = sizeof(reg);
++ } else if (regnum == IA64_BSP_REGNUM) {
++ unw_get_bsp(info, ®);
++ size = sizeof(reg);
++ }
++
++ if (size) {
++ kgdb_mem2hex((char *) mem, outbuffer, size);
++ outbuffer[size*2] = 0;
++ }
++ else
++ strcpy(outbuffer, "E0");
++
++ return;
++}
++
++void kgdb_put_reg(char *inbuffer, char *outbuffer, int regnum,
++ struct unw_frame_info *info, struct pt_regs *ptregs)
++{
++ unsigned long reg;
++ struct ia64_fpreg freg;
++ char *ptr = inbuffer;
++
++ kgdb_hex2long(&ptr, ®);
++ strcpy(outbuffer, "OK");
++
++ if (kgdb_gr_reg(regnum, info, ®, 1) ||
++ kgdb_gr_ptreg(regnum, ptregs, info, ®, 1) ||
++ kgdb_br_reg(regnum, ptregs, info, ®, 1) ||
++ kgdb_fr_reg(regnum, inbuffer, ptregs, info, ®, &freg, 1) ||
++ kgdb_ar_reg(regnum, ptregs, info, ®, 1)) ;
++ else if (regnum == IA64_IP_REGNUM)
++ ptregs->cr_iip = reg;
++ else if (regnum == IA64_CFM_REGNUM)
++ ptregs->cr_ifs = reg;
++ else if (regnum == IA64_PSR_REGNUM)
++ ptregs->cr_ipsr = reg;
++ else if (regnum == IA64_PR_REGNUM)
++ ptregs->pr = reg;
++ else
++ strcpy(outbuffer, "E01");
++ return;
++}
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++
++}
++
++#define MAX_HW_BREAKPOINT (20)
++long hw_break_total_dbr, hw_break_total_ibr;
++#define HW_BREAKPOINT (hw_break_total_dbr + hw_break_total_ibr)
++#define WATCH_INSTRUCTION 0x0
++#define WATCH_WRITE 0x1
++#define WATCH_READ 0x2
++#define WATCH_ACCESS 0x3
++
++#define HWCAP_DBR ((1 << WATCH_WRITE) | (1 << WATCH_READ))
++#define HWCAP_IBR (1 << WATCH_INSTRUCTION)
++struct hw_breakpoint {
++ unsigned enabled;
++ unsigned long capable;
++ unsigned long type;
++ unsigned long mask;
++ unsigned long addr;
++} *breakinfo;
++
++static struct hw_breakpoint hwbreaks[MAX_HW_BREAKPOINT];
++
++enum instruction_type { A, I, M, F, B, L, X, u };
++
++static enum instruction_type bundle_encoding[32][3] = {
++ {M, I, I}, /* 00 */
++ {M, I, I}, /* 01 */
++ {M, I, I}, /* 02 */
++ {M, I, I}, /* 03 */
++ {M, L, X}, /* 04 */
++ {M, L, X}, /* 05 */
++ {u, u, u}, /* 06 */
++ {u, u, u}, /* 07 */
++ {M, M, I}, /* 08 */
++ {M, M, I}, /* 09 */
++ {M, M, I}, /* 0A */
++ {M, M, I}, /* 0B */
++ {M, F, I}, /* 0C */
++ {M, F, I}, /* 0D */
++ {M, M, F}, /* 0E */
++ {M, M, F}, /* 0F */
++ {M, I, B}, /* 10 */
++ {M, I, B}, /* 11 */
++ {M, B, B}, /* 12 */
++ {M, B, B}, /* 13 */
++ {u, u, u}, /* 14 */
++ {u, u, u}, /* 15 */
++ {B, B, B}, /* 16 */
++ {B, B, B}, /* 17 */
++ {M, M, B}, /* 18 */
++ {M, M, B}, /* 19 */
++ {u, u, u}, /* 1A */
++ {u, u, u}, /* 1B */
++ {M, F, B}, /* 1C */
++ {M, F, B}, /* 1D */
++ {u, u, u}, /* 1E */
++ {u, u, u}, /* 1F */
++};
++
++int kgdb_validate_break_address(unsigned long addr)
++{
++ int error;
++ char tmp_variable[BREAK_INSTR_SIZE];
++ error = kgdb_get_mem((char *)(addr & BREAK_INSTR_ALIGN), tmp_variable,
++ BREAK_INSTR_SIZE);
++ return error;
++}
++
++int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
++{
++ extern unsigned long _start[];
++ unsigned long slot = addr & BREAK_INSTR_ALIGN, bundle_addr;
++ unsigned long template;
++ struct bundle {
++ struct {
++ unsigned long long template:5;
++ unsigned long long slot0:41;
++ unsigned long long slot1_p0:64 - 46;
++ } quad0;
++ struct {
++ unsigned long long slot1_p1:41 - (64 - 46);
++ unsigned long long slot2:41;
++ } quad1;
++ } bundle;
++ int ret;
++
++ bundle_addr = addr & ~0xFULL;
++
++ if (bundle_addr == (unsigned long)_start)
++ return 0;
++
++ ret = kgdb_get_mem((char *)bundle_addr, (char *)&bundle,
++ BREAK_INSTR_SIZE);
++ if (ret < 0)
++ return ret;
++
++ if (slot > 2)
++ slot = 0;
++
++ memcpy(saved_instr, &bundle, BREAK_INSTR_SIZE);
++ template = bundle.quad0.template;
++
++ if (slot == 1 && bundle_encoding[template][1] == L)
++ slot = 2;
++
++ switch (slot) {
++ case 0:
++ bundle.quad0.slot0 = BREAKNUM;
++ break;
++ case 1:
++ bundle.quad0.slot1_p0 = BREAKNUM;
++ bundle.quad1.slot1_p1 = (BREAKNUM >> (64 - 46));
++ break;
++ case 2:
++ bundle.quad1.slot2 = BREAKNUM;
++ break;
++ }
++
++ return kgdb_set_mem((char *)bundle_addr, (char *)&bundle,
++ BREAK_INSTR_SIZE);
++}
++
++int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
++{
++ extern unsigned long _start[];
++
++ addr = addr & BREAK_INSTR_ALIGN;
++ if (addr == (unsigned long)_start)
++ return 0;
++ return kgdb_set_mem((char *)addr, (char *)bundle, BREAK_INSTR_SIZE);
++}
++
++volatile static struct smp_unw {
++ struct unw_frame_info *unw;
++ struct task_struct *task;
++} smp_unw[NR_CPUS];
++
++static int inline kgdb_get_blocked_state(struct task_struct *p,
++ struct unw_frame_info *unw)
++{
++ unsigned long ip;
++ int count = 0;
++
++ unw_init_from_blocked_task(unw, p);
++ ip = 0UL;
++ do {
++ if (unw_unwind(unw) < 0)
++ return -1;
++ unw_get_ip(unw, &ip);
++ if (!in_sched_functions(ip))
++ break;
++ } while (count++ < 16);
++
++ if (!ip)
++ return -1;
++ else
++ return 0;
++}
++
++static void inline kgdb_wait(struct pt_regs *regs)
++{
++ unsigned long hw_breakpoint_status = ia64_getreg(_IA64_REG_PSR);
++ if (hw_breakpoint_status & IA64_PSR_DB)
++ ia64_setreg(_IA64_REG_PSR_L,
++ hw_breakpoint_status ^ IA64_PSR_DB);
++ kgdb_nmihook(smp_processor_id(), regs);
++ if (hw_breakpoint_status & IA64_PSR_DB)
++ ia64_setreg(_IA64_REG_PSR_L, hw_breakpoint_status);
++
++ return;
++}
++
++static void inline normalize(struct unw_frame_info *running,
++ struct pt_regs *regs)
++{
++ unsigned long sp;
++
++ do {
++ unw_get_sp(running, &sp);
++ if ((sp + 0x10) >= (unsigned long)regs)
++ break;
++ } while (unw_unwind(running) >= 0);
++
++ return;
++}
++
++static void kgdb_init_running(struct unw_frame_info *unw, void *data)
++{
++ struct pt_regs *regs;
++
++ regs = data;
++ normalize(unw, regs);
++ smp_unw[smp_processor_id()].unw = unw;
++ kgdb_wait(regs);
++}
++
++void kgdb_wait_ipi(struct pt_regs *regs)
++{
++ struct unw_frame_info unw;
++
++ smp_unw[smp_processor_id()].task = current;
++
++ if (user_mode(regs)) {
++ smp_unw[smp_processor_id()].unw = (struct unw_frame_info *)1;
++ kgdb_wait(regs);
++ } else {
++ if (current->state == TASK_RUNNING)
++ unw_init_running(kgdb_init_running, regs);
++ else {
++ if (kgdb_get_blocked_state(current, &unw))
++ smp_unw[smp_processor_id()].unw =
++ (struct unw_frame_info *)1;
++ else
++ smp_unw[smp_processor_id()].unw = &unw;
++ kgdb_wait(regs);
++ }
++ }
++
++ smp_unw[smp_processor_id()].unw = NULL;
++ return;
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++ if (num_online_cpus() > 1)
++ smp_send_nmi_allbutself();
++}
++
++static volatile int kgdb_hwbreak_sstep[NR_CPUS];
++
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++ void *ptr)
++{
++ struct die_args *args = ptr;
++ struct pt_regs *regs = args->regs;
++ unsigned long err = args->err;
++
++ switch (cmd) {
++ default:
++ return NOTIFY_DONE;
++ case DIE_PAGE_FAULT_NO_CONTEXT:
++ if (atomic_read(&debugger_active) && kgdb_may_fault) {
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ return NOTIFY_STOP;
++ }
++ break;
++ case DIE_BREAK:
++ if (user_mode(regs) || err == 0x80001)
++ return NOTIFY_DONE;
++ break;
++ case DIE_FAULT:
++ if (user_mode(regs))
++ return NOTIFY_DONE;
++ else if (err == 36 && kgdb_hwbreak_sstep[smp_processor_id()]) {
++ kgdb_hwbreak_sstep[smp_processor_id()] = 0;
++ regs->cr_ipsr &= ~IA64_PSR_SS;
++ return NOTIFY_STOP;
++ }
++ case DIE_MCA_MONARCH_PROCESS:
++ case DIE_INIT_MONARCH_PROCESS:
++ break;
++ }
++
++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++ return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++ .notifier_call = kgdb_notify,
++};
++
++int kgdb_arch_init(void)
++{
++ atomic_notifier_chain_register(&ia64die_chain, &kgdb_notifier);
++ return 0;
++}
++
++static void do_kgdb_handle_exception(struct unw_frame_info *, void *data);
++
++struct kgdb_state {
++ int e_vector;
++ int signo;
++ unsigned long err_code;
++ struct pt_regs *regs;
++ struct unw_frame_info *unw;
++ char *inbuf;
++ char *outbuf;
++ int unwind;
++ int ret;
++};
++
++static void inline kgdb_pc(struct pt_regs *regs, unsigned long pc)
++{
++ regs->cr_iip = pc & ~0xf;
++ ia64_psr(regs)->ri = pc & 0x3;
++ return;
++}
++
++int kgdb_arch_handle_exception(int e_vector, int signo,
++ int err_code, char *remcom_in_buffer,
++ char *remcom_out_buffer,
++ struct pt_regs *linux_regs)
++{
++ struct kgdb_state info;
++
++ info.e_vector = e_vector;
++ info.signo = signo;
++ info.err_code = err_code;
++ info.unw = (void *)0;
++ info.inbuf = remcom_in_buffer;
++ info.outbuf = remcom_out_buffer;
++ info.unwind = 0;
++ info.ret = -1;
++
++ if (remcom_in_buffer[0] == 'c' || remcom_in_buffer[0] == 's') {
++ info.regs = linux_regs;
++ do_kgdb_handle_exception(NULL, &info);
++ } else if (kgdb_usethread == current) {
++ info.regs = linux_regs;
++ info.unwind = 1;
++ unw_init_running(do_kgdb_handle_exception, &info);
++ } else if (kgdb_usethread->state != TASK_RUNNING) {
++ struct unw_frame_info unw_info;
++
++ if (kgdb_get_blocked_state(kgdb_usethread, &unw_info)) {
++ info.ret = 1;
++ goto bad;
++ }
++ info.regs = NULL;
++ do_kgdb_handle_exception(&unw_info, &info);
++ } else {
++ int i;
++
++ for (i = 0; i < NR_CPUS; i++)
++ if (smp_unw[i].task == kgdb_usethread && smp_unw[i].unw
++ && smp_unw[i].unw != (struct unw_frame_info *)1) {
++ info.regs = NULL;
++ do_kgdb_handle_exception(smp_unw[i].unw, &info);
++ break;
++ } else {
++ info.ret = 1;
++ goto bad;
++ }
++ }
++
++ bad:
++ if (info.ret != -1 && remcom_in_buffer[0] == 'p') {
++ unsigned long bad = 0xbad4badbadbadbadUL;
++
++ printk("kgdb_arch_handle_exception: p packet bad (%s)\n",
++ remcom_in_buffer);
++ kgdb_mem2hex((char *)&bad, remcom_out_buffer, sizeof(bad));
++ remcom_out_buffer[sizeof(bad) * 2] = 0;
++ info.ret = -1;
++ }
++ return info.ret;
++}
++
++/*
++ * This is done because I evidently made an incorrect 'p' encoding
++ * when my patch for gdb was committed. It was later corrected. This
++ * check supports both my wrong encoding of the register number and
++ * the correct encoding. Eventually this should be eliminated and
++ * kgdb_hex2long should be demarshalling the regnum.
++ */
++static inline int check_packet(unsigned int regnum, char *packet)
++{
++ static int check_done, swap;
++ unsigned long reglong;
++
++ if (likely(check_done)) {
++ if (swap) {
++ kgdb_hex2long(&packet, ®long);
++ regnum = (int) reglong;
++ }
++
++ } else {
++ if (regnum > NUM_REGS) {
++ kgdb_hex2long(&packet, ®long);
++ regnum = (int) reglong;
++ swap = 1;
++ }
++ check_done = 1;
++ }
++ return regnum;
++}
++
++static void do_kgdb_handle_exception(struct unw_frame_info *unw_info,
++ void *data)
++{
++ long addr;
++ char *ptr;
++ unsigned long newPC;
++ int e_vector, signo;
++ unsigned long err_code;
++ struct pt_regs *linux_regs;
++ struct kgdb_state *info;
++ char *remcom_in_buffer, *remcom_out_buffer;
++
++ info = data;
++ info->unw = unw_info;
++ e_vector = info->e_vector;
++ signo = info->signo;
++ err_code = info->err_code;
++ remcom_in_buffer = info->inbuf;
++ remcom_out_buffer = info->outbuf;
++ linux_regs = info->regs;
++
++ if (info->unwind)
++ normalize(unw_info, linux_regs);
++
++ switch (remcom_in_buffer[0]) {
++ case 'p':
++ {
++ unsigned int regnum;
++
++ kgdb_hex2mem(&remcom_in_buffer[1], (char *)®num,
++ sizeof(regnum));
++ regnum = check_packet(regnum, &remcom_in_buffer[1]);
++ if (regnum >= NUM_REGS) {
++ remcom_out_buffer[0] = 'E';
++ remcom_out_buffer[1] = 0;
++ } else
++ kgdb_get_reg(remcom_out_buffer, regnum,
++ unw_info, linux_regs);
++ break;
++ }
++ case 'P':
++ {
++ unsigned int regno;
++ long v;
++ char *ptr;
++
++ ptr = &remcom_in_buffer[1];
++ if ((!kgdb_usethread || kgdb_usethread == current) &&
++ kgdb_hex2long(&ptr, &v) &&
++ *ptr++ == '=' && (v >= 0)) {
++ regno = (unsigned int)v;
++ regno = (regno >= NUM_REGS ? 0 : regno);
++ kgdb_put_reg(ptr, remcom_out_buffer, regno,
++ unw_info, linux_regs);
++ } else
++ strcpy(remcom_out_buffer, "E01");
++ break;
++ }
++ case 'c':
++ case 's':
++ if (e_vector == TRAP_BRKPT && err_code == KGDBBREAKNUM) {
++ if (ia64_psr(linux_regs)->ri < 2)
++ kgdb_pc(linux_regs, linux_regs->cr_iip +
++ ia64_psr(linux_regs)->ri + 1);
++ else
++ kgdb_pc(linux_regs, linux_regs->cr_iip + 16);
++ }
++
++ /* try to read optional parameter, pc unchanged if no parm */
++ ptr = &remcom_in_buffer[1];
++ if (kgdb_hex2long(&ptr, &addr)) {
++ linux_regs->cr_iip = addr;
++ }
++ newPC = linux_regs->cr_iip;
++
++ /* clear the trace bit */
++ linux_regs->cr_ipsr &= ~IA64_PSR_SS;
++
++ atomic_set(&cpu_doing_single_step, -1);
++
++ /* set the trace bit if we're stepping or took a hardware break */
++ if (remcom_in_buffer[0] == 's' || e_vector == TRAP_HWBKPT) {
++ linux_regs->cr_ipsr |= IA64_PSR_SS;
++ debugger_step = 1;
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step,
++ smp_processor_id());
++ }
++
++ kgdb_correct_hw_break();
++
++ /* if not hardware breakpoint, then reenable them */
++ if (e_vector != TRAP_HWBKPT)
++ linux_regs->cr_ipsr |= IA64_PSR_DB;
++ else {
++ kgdb_hwbreak_sstep[smp_processor_id()] = 1;
++ linux_regs->cr_ipsr &= ~IA64_PSR_DB;
++ }
++
++ info->ret = 0;
++ break;
++ default:
++ break;
++ }
++
++ return;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++ .gdb_bpt_instr = {0xcc},
++};
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/smp.c linux-2.6.22-591/arch/ia64/kernel/smp.c
+--- linux-2.6.22-570/arch/ia64/kernel/smp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ia64/kernel/smp.c 2007-12-21 15:36:11.000000000 -0500
+@@ -48,6 +48,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
++#include <linux/kgdb.h>
+
+ /*
+ * Note: alignment of 4 entries/cacheline was empirically determined
+@@ -79,6 +80,9 @@
+
+ #define IPI_CALL_FUNC 0
+ #define IPI_CPU_STOP 1
++#ifdef CONFIG_KGDB
++#define IPI_KGDB_INTERRUPT 2
++#endif
+ #define IPI_KDUMP_CPU_STOP 3
+
+ /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
+@@ -169,6 +173,11 @@
+ case IPI_CPU_STOP:
+ stop_this_cpu();
+ break;
++#ifdef CONFIG_KGDB
++ case IPI_KGDB_INTERRUPT:
++ kgdb_wait_ipi(get_irq_regs());
++ break;
++#endif
+ #ifdef CONFIG_KEXEC
+ case IPI_KDUMP_CPU_STOP:
+ unw_init_running(kdump_cpu_freeze, NULL);
+@@ -399,6 +408,14 @@
+ }
+ EXPORT_SYMBOL(smp_call_function_single);
+
++#ifdef CONFIG_KGDB
++void
++smp_send_nmi_allbutself(void)
++{
++ send_IPI_allbutself(IPI_KGDB_INTERRUPT);
++}
++#endif
++
+ /*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/traps.c linux-2.6.22-591/arch/ia64/kernel/traps.c
+--- linux-2.6.22-570/arch/ia64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/ia64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -155,8 +155,12 @@
+ break;
+
+ default:
+- if (break_num < 0x40000 || break_num > 0x100000)
++ if (break_num < 0x40000 || break_num > 0x100000) {
++ if (notify_die(DIE_BREAK, "bad break", regs,
++ break_num, TRAP_BRKPT, SIGTRAP) == NOTIFY_STOP)
++ return;
+ die_if_kernel("Bad break", regs, break_num);
++ }
+
+ if (break_num < 0x80000) {
+ sig = SIGILL; code = __ILL_BREAK;
+diff -Nurb linux-2.6.22-570/arch/ia64/mm/extable.c linux-2.6.22-591/arch/ia64/mm/extable.c
+--- linux-2.6.22-570/arch/ia64/mm/extable.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ia64/mm/extable.c 2007-12-21 15:36:11.000000000 -0500
+@@ -6,6 +6,7 @@
+ */
+
+ #include <linux/sort.h>
++#include <linux/kgdb.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/module.h>
+@@ -73,6 +74,11 @@
+ else
+ last = mid - 1;
+ }
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault)
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Not reached. */
++#endif
+ return NULL;
+ }
+
+diff -Nurb linux-2.6.22-570/arch/ia64/mm/fault.c linux-2.6.22-591/arch/ia64/mm/fault.c
+--- linux-2.6.22-570/arch/ia64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/ia64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -255,6 +255,10 @@
+ */
+ bust_spinlocks(1);
+
++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++ isr, 14, SIGSEGV) == NOTIFY_STOP)
++ return;
++
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
+ else
+diff -Nurb linux-2.6.22-570/arch/mips/Kconfig linux-2.6.22-591/arch/mips/Kconfig
+--- linux-2.6.22-570/arch/mips/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/mips/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -30,7 +30,6 @@
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ help
+ The eXcite is a smart camera platform manufactured by
+ Basler Vision Technologies AG.
+@@ -98,7 +97,6 @@
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ help
+ This is an evaluation board based on the Galileo GT-64120
+ single-chip system controller that contains a MIPS R5000 compatible
+@@ -269,7 +267,6 @@
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ help
+ The Ocelot is a MIPS-based Single Board Computer (SBC) made by
+ Momentum Computer <http://www.momenco.com/>.
+@@ -331,8 +328,6 @@
+ select SYS_HAS_CPU_R5432
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL
+- select SYS_SUPPORTS_KGDB
+- select SYS_SUPPORTS_KGDB
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ help
+ This enables support for the R5432-based NEC DDB Vrc-5477,
+@@ -360,7 +355,6 @@
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+ select SYS_SUPPORTS_HIGHMEM
+- select SYS_SUPPORTS_KGDB
+ select SYS_SUPPORTS_SMP
+ help
+ Yosemite is an evaluation board for the RM9000x2 processor
+@@ -440,7 +434,6 @@
+ select SYS_HAS_CPU_R10000
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ select SYS_SUPPORTS_NUMA
+ select SYS_SUPPORTS_SMP
+ select GENERIC_HARDIRQS_NO__DO_IRQ
+@@ -490,7 +483,6 @@
+ select SYS_HAS_CPU_SB1
+ select SYS_SUPPORTS_BIG_ENDIAN
+ select SYS_SUPPORTS_HIGHMEM
+- select SYS_SUPPORTS_KGDB
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+
+ config SIBYTE_SENTOSA
+@@ -631,7 +623,6 @@
+ select SYS_SUPPORTS_64BIT_KERNEL
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ select GENERIC_HARDIRQS_NO__DO_IRQ
+ help
+ This Toshiba board is based on the TX4927 processor. Say Y here to
+@@ -650,7 +641,6 @@
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select SYS_SUPPORTS_BIG_ENDIAN
+- select SYS_SUPPORTS_KGDB
+ select GENERIC_HARDIRQS_NO__DO_IRQ
+ help
+ This Toshiba board is based on the TX4938 processor. Say Y here to
+@@ -826,7 +816,6 @@
+
+ config DDB5XXX_COMMON
+ bool
+- select SYS_SUPPORTS_KGDB
+
+ config MIPS_BOARDS_GEN
+ bool
+@@ -862,7 +851,6 @@
+ select SYS_HAS_EARLY_PRINTK
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select GENERIC_HARDIRQS_NO__DO_IRQ
+- select SYS_SUPPORTS_KGDB
+
+ config SWAP_IO_SPACE
+ bool
+diff -Nurb linux-2.6.22-570/arch/mips/Kconfig.debug linux-2.6.22-591/arch/mips/Kconfig.debug
+--- linux-2.6.22-570/arch/mips/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -46,28 +46,6 @@
+ arch/mips/kernel/smtc.c. This debugging option result in significant
+ overhead so should be disabled in production kernels.
+
+-config KGDB
+- bool "Remote GDB kernel debugging"
+- depends on DEBUG_KERNEL && SYS_SUPPORTS_KGDB
+- select DEBUG_INFO
+- help
+- If you say Y here, it will be possible to remotely debug the MIPS
+- kernel using gdb. This enlarges your kernel image disk size by
+- several megabytes and requires a machine with more than 16 MB,
+- better 32 MB RAM to avoid excessive linking time. This is only
+- useful for kernel hackers. If unsure, say N.
+-
+-config SYS_SUPPORTS_KGDB
+- bool
+-
+-config GDB_CONSOLE
+- bool "Console output to GDB"
+- depends on KGDB
+- help
+- If you are using GDB for remote debugging over a serial port and
+- would like kernel messages to be formatted into GDB $O packets so
+- that GDB prints them as program output, say 'Y'.
+-
+ config SB1XXX_CORELIS
+ bool "Corelis Debugger"
+ depends on SIBYTE_SB1xxx_SOC
+diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/Makefile linux-2.6.22-591/arch/mips/au1000/common/Makefile
+--- linux-2.6.22-570/arch/mips/au1000/common/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/au1000/common/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -10,5 +10,4 @@
+ au1xxx_irqmap.o clocks.o platform.o power.o setup.o \
+ sleeper.o cputable.o dma.o dbdma.o gpio.o
+
+-obj-$(CONFIG_KGDB) += dbg_io.o
+ obj-$(CONFIG_PCI) += pci.o
+diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c
+--- linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/io.h>
+-#include <asm/mach-au1x00/au1000.h>
+-
+-#ifdef CONFIG_KGDB
+-
+-/*
+- * FIXME the user should be able to select the
+- * uart to be used for debugging.
+- */
+-#define DEBUG_BASE UART_DEBUG_BASE
+-/**/
+-
+-/* we need uint32 uint8 */
+-/* #include "types.h" */
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-#define UART16550_BAUD_2400 2400
+-#define UART16550_BAUD_4800 4800
+-#define UART16550_BAUD_9600 9600
+-#define UART16550_BAUD_19200 19200
+-#define UART16550_BAUD_38400 38400
+-#define UART16550_BAUD_57600 57600
+-#define UART16550_BAUD_115200 115200
+-
+-#define UART16550_PARITY_NONE 0
+-#define UART16550_PARITY_ODD 0x08
+-#define UART16550_PARITY_EVEN 0x18
+-#define UART16550_PARITY_MARK 0x28
+-#define UART16550_PARITY_SPACE 0x38
+-
+-#define UART16550_DATA_5BIT 0x0
+-#define UART16550_DATA_6BIT 0x1
+-#define UART16550_DATA_7BIT 0x2
+-#define UART16550_DATA_8BIT 0x3
+-
+-#define UART16550_STOP_1BIT 0x0
+-#define UART16550_STOP_2BIT 0x4
+-
+-
+-#define UART_RX 0 /* Receive buffer */
+-#define UART_TX 4 /* Transmit buffer */
+-#define UART_IER 8 /* Interrupt Enable Register */
+-#define UART_IIR 0xC /* Interrupt ID Register */
+-#define UART_FCR 0x10 /* FIFO Control Register */
+-#define UART_LCR 0x14 /* Line Control Register */
+-#define UART_MCR 0x18 /* Modem Control Register */
+-#define UART_LSR 0x1C /* Line Status Register */
+-#define UART_MSR 0x20 /* Modem Status Register */
+-#define UART_CLK 0x28 /* Baud Rat4e Clock Divider */
+-#define UART_MOD_CNTRL 0x100 /* Module Control */
+-
+-/* memory-mapped read/write of the port */
+-#define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff)
+-#define UART16550_WRITE(y,z) (au_writel(z&0xff, DEBUG_BASE + y))
+-
+-extern unsigned long get_au1x00_uart_baud_base(void);
+-extern unsigned long cal_r4koff(void);
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+-
+- if (UART16550_READ(UART_MOD_CNTRL) != 0x3) {
+- UART16550_WRITE(UART_MOD_CNTRL, 3);
+- }
+- cal_r4koff();
+-
+- /* disable interrupts */
+- UART16550_WRITE(UART_IER, 0);
+-
+- /* set up baud rate */
+- {
+- uint32 divisor;
+-
+- /* set divisor */
+- divisor = get_au1x00_uart_baud_base() / baud;
+- UART16550_WRITE(UART_CLK, divisor & 0xffff);
+- }
+-
+- /* set data format */
+- UART16550_WRITE(UART_LCR, (data | parity | stop));
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_115200,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE,
+- UART16550_STOP_1BIT);
+- }
+-
+- while((UART16550_READ(UART_LSR) & 0x1) == 0);
+- return UART16550_READ(UART_RX);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+-// int i;
+-
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_115200,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE,
+- UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(UART_LSR)&0x40) == 0);
+- UART16550_WRITE(UART_TX, byte);
+- //for (i=0;i<0xfff;i++);
+-
+- return 1;
+-}
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/Makefile linux-2.6.22-591/arch/mips/basler/excite/Makefile
+--- linux-2.6.22-570/arch/mips/basler/excite/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/basler/excite/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -5,5 +5,4 @@
+ obj-$(CONFIG_BASLER_EXCITE) += excite_irq.o excite_prom.o excite_setup.o \
+ excite_device.o excite_procfs.o
+
+-obj-$(CONFIG_KGDB) += excite_dbg_io.o
+ obj-m += excite_iodev.o
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-/*
+- * Copyright (C) 2004 by Basler Vision Technologies AG
+- * Author: Thomas Koeller <thomas.koeller@baslerweb.com>
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+- */
+-
+-#include <linux/linkage.h>
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <asm/gdb-stub.h>
+-#include <asm/rm9k-ocd.h>
+-#include <excite.h>
+-
+-#if defined(CONFIG_SERIAL_8250) && CONFIG_SERIAL_8250_NR_UARTS > 1
+-#error Debug port used by serial driver
+-#endif
+-
+-#define UART_CLK 25000000
+-#define BASE_BAUD (UART_CLK / 16)
+-#define REGISTER_BASE_0 0x0208UL
+-#define REGISTER_BASE_1 0x0238UL
+-
+-#define REGISTER_BASE_DBG REGISTER_BASE_1
+-
+-#define CPRR 0x0004
+-#define UACFG 0x0200
+-#define UAINTS 0x0204
+-#define UARBR (REGISTER_BASE_DBG + 0x0000)
+-#define UATHR (REGISTER_BASE_DBG + 0x0004)
+-#define UADLL (REGISTER_BASE_DBG + 0x0008)
+-#define UAIER (REGISTER_BASE_DBG + 0x000c)
+-#define UADLH (REGISTER_BASE_DBG + 0x0010)
+-#define UAIIR (REGISTER_BASE_DBG + 0x0014)
+-#define UAFCR (REGISTER_BASE_DBG + 0x0018)
+-#define UALCR (REGISTER_BASE_DBG + 0x001c)
+-#define UAMCR (REGISTER_BASE_DBG + 0x0020)
+-#define UALSR (REGISTER_BASE_DBG + 0x0024)
+-#define UAMSR (REGISTER_BASE_DBG + 0x0028)
+-#define UASCR (REGISTER_BASE_DBG + 0x002c)
+-
+-#define PARITY_NONE 0
+-#define PARITY_ODD 0x08
+-#define PARITY_EVEN 0x18
+-#define PARITY_MARK 0x28
+-#define PARITY_SPACE 0x38
+-
+-#define DATA_5BIT 0x0
+-#define DATA_6BIT 0x1
+-#define DATA_7BIT 0x2
+-#define DATA_8BIT 0x3
+-
+-#define STOP_1BIT 0x0
+-#define STOP_2BIT 0x4
+-
+-#define BAUD_DBG 57600
+-#define PARITY_DBG PARITY_NONE
+-#define DATA_DBG DATA_8BIT
+-#define STOP_DBG STOP_1BIT
+-
+-/* Initialize the serial port for KGDB debugging */
+-void __init excite_kgdb_init(void)
+-{
+- const u32 divisor = BASE_BAUD / BAUD_DBG;
+-
+- /* Take the UART out of reset */
+- titan_writel(0x00ff1cff, CPRR);
+- titan_writel(0x00000000, UACFG);
+- titan_writel(0x00000002, UACFG);
+-
+- titan_writel(0x0, UALCR);
+- titan_writel(0x0, UAIER);
+-
+- /* Disable FIFOs */
+- titan_writel(0x00, UAFCR);
+-
+- titan_writel(0x80, UALCR);
+- titan_writel(divisor & 0xff, UADLL);
+- titan_writel((divisor & 0xff00) >> 8, UADLH);
+- titan_writel(0x0, UALCR);
+-
+- titan_writel(DATA_DBG | PARITY_DBG | STOP_DBG, UALCR);
+-
+- /* Enable receiver interrupt */
+- titan_readl(UARBR);
+- titan_writel(0x1, UAIER);
+-}
+-
+-int getDebugChar(void)
+-{
+- while (!(titan_readl(UALSR) & 0x1));
+- return titan_readl(UARBR);
+-}
+-
+-int putDebugChar(int data)
+-{
+- while (!(titan_readl(UALSR) & 0x20));
+- titan_writel(data, UATHR);
+- return 1;
+-}
+-
+-/* KGDB interrupt handler */
+-asmlinkage void excite_kgdb_inthdl(void)
+-{
+- if (unlikely(
+- ((titan_readl(UAIIR) & 0x7) == 4)
+- && ((titan_readl(UARBR) & 0xff) == 0x3)))
+- set_async_breakpoint(®s->cp0_epc);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -50,10 +50,6 @@
+ mips_cpu_irq_init();
+ rm7k_cpu_irq_init();
+ rm9k_cpu_irq_init();
+-
+-#ifdef CONFIG_KGDB
+- excite_kgdb_init();
+-#endif
+ }
+
+ asmlinkage void plat_irq_dispatch(void)
+@@ -90,9 +86,6 @@
+ msgint = msgintflags & msgintmask & (0x1 << (TITAN_MSGINT % 0x20));
+ if ((pending & (1 << TITAN_IRQ)) && msgint) {
+ ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10));
+-#if defined(CONFIG_KGDB)
+- excite_kgdb_inthdl();
+-#endif
+ do_IRQ(TITAN_IRQ);
+ return;
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -95,13 +95,13 @@
+ /* Take the DUART out of reset */
+ titan_writel(0x00ff1cff, CPRR);
+
+-#if defined(CONFIG_KGDB) || (CONFIG_SERIAL_8250_NR_UARTS > 1)
++#if (CONFIG_SERIAL_8250_NR_UARTS > 1)
+ /* Enable both ports */
+ titan_writel(MASK_SER0 | MASK_SER1, UACFG);
+ #else
+ /* Enable port #0 only */
+ titan_writel(MASK_SER0, UACFG);
+-#endif /* defined(CONFIG_KGDB) */
++#endif
+
+ /*
+ * Set up serial port #0. Do not use autodetection; the result is
+diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile
+--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -5,4 +5,3 @@
+ obj-y += irq.o irq_5477.o setup.o lcd44780.o
+
+ obj-$(CONFIG_RUNTIME_DEBUG) += debug.o
+-obj-$(CONFIG_KGDB) += kgdb_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c
+--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,136 +0,0 @@
+-/*
+- * kgdb io functions for DDB5477. We use the second serial port (upper one).
+- *
+- * Copyright (C) 2001 MontaVista Software Inc.
+- * Author: jsun@mvista.com or jsun@junsun.net
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- */
+-
+-/* ======================= CONFIG ======================== */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define BASE 0xbfa04240
+-#define MAX_BAUD 115200
+-
+-/* distance in bytes between two serial registers */
+-#define REG_OFFSET 8
+-
+-/*
+- * 0 - kgdb does serial init
+- * 1 - kgdb skip serial init
+- */
+-static int remoteDebugInitialized = 0;
+-
+-/*
+- * the default baud rate *if* kgdb does serial init
+- */
+-#define BAUD_DEFAULT UART16550_BAUD_38400
+-
+-/* ======================= END OF CONFIG ======================== */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-#define UART16550_BAUD_2400 2400
+-#define UART16550_BAUD_4800 4800
+-#define UART16550_BAUD_9600 9600
+-#define UART16550_BAUD_19200 19200
+-#define UART16550_BAUD_38400 38400
+-#define UART16550_BAUD_57600 57600
+-#define UART16550_BAUD_115200 115200
+-
+-#define UART16550_PARITY_NONE 0
+-#define UART16550_PARITY_ODD 0x08
+-#define UART16550_PARITY_EVEN 0x18
+-#define UART16550_PARITY_MARK 0x28
+-#define UART16550_PARITY_SPACE 0x38
+-
+-#define UART16550_DATA_5BIT 0x0
+-#define UART16550_DATA_6BIT 0x1
+-#define UART16550_DATA_7BIT 0x2
+-#define UART16550_DATA_8BIT 0x3
+-
+-#define UART16550_STOP_1BIT 0x0
+-#define UART16550_STOP_2BIT 0x4
+-
+-/* register offset */
+-#define OFS_RCV_BUFFER 0
+-#define OFS_TRANS_HOLD 0
+-#define OFS_SEND_BUFFER 0
+-#define OFS_INTR_ENABLE (1*REG_OFFSET)
+-#define OFS_INTR_ID (2*REG_OFFSET)
+-#define OFS_DATA_FORMAT (3*REG_OFFSET)
+-#define OFS_LINE_CONTROL (3*REG_OFFSET)
+-#define OFS_MODEM_CONTROL (4*REG_OFFSET)
+-#define OFS_RS232_OUTPUT (4*REG_OFFSET)
+-#define OFS_LINE_STATUS (5*REG_OFFSET)
+-#define OFS_MODEM_STATUS (6*REG_OFFSET)
+-#define OFS_RS232_INPUT (6*REG_OFFSET)
+-#define OFS_SCRATCH_PAD (7*REG_OFFSET)
+-
+-#define OFS_DIVISOR_LSB (0*REG_OFFSET)
+-#define OFS_DIVISOR_MSB (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y)))
+-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+- /* disable interrupts */
+- UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+- /* set up baud rate */
+- {
+- uint32 divisor;
+-
+- /* set DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+- /* set divisor */
+- divisor = MAX_BAUD / baud;
+- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+- /* clear DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+- }
+-
+- /* set data format */
+- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-
+-uint8 getDebugChar(void)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(BAUD_DEFAULT,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+- return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(BAUD_DEFAULT,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+- UART16550_WRITE(OFS_SEND_BUFFER, byte);
+- return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile
+--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -3,5 +3,3 @@
+ #
+
+ obj-y += irq.o prom.o reset.o setup.o
+-
+-obj-$(CONFIG_KGDB) += dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c
+--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/serial.h> /* For the serial port location and base baud */
+-
+-/* --- CONFIG --- */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-/* --- END OF CONFIG --- */
+-
+-#define UART16550_BAUD_2400 2400
+-#define UART16550_BAUD_4800 4800
+-#define UART16550_BAUD_9600 9600
+-#define UART16550_BAUD_19200 19200
+-#define UART16550_BAUD_38400 38400
+-#define UART16550_BAUD_57600 57600
+-#define UART16550_BAUD_115200 115200
+-
+-#define UART16550_PARITY_NONE 0
+-#define UART16550_PARITY_ODD 0x08
+-#define UART16550_PARITY_EVEN 0x18
+-#define UART16550_PARITY_MARK 0x28
+-#define UART16550_PARITY_SPACE 0x38
+-
+-#define UART16550_DATA_5BIT 0x0
+-#define UART16550_DATA_6BIT 0x1
+-#define UART16550_DATA_7BIT 0x2
+-#define UART16550_DATA_8BIT 0x3
+-
+-#define UART16550_STOP_1BIT 0x0
+-#define UART16550_STOP_2BIT 0x4
+-
+-/* ----------------------------------------------------- */
+-
+-/* === CONFIG === */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define BASE OCELOT_SERIAL1_BASE
+-#define MAX_BAUD OCELOT_BASE_BAUD
+-
+-/* === END OF CONFIG === */
+-
+-#define REG_OFFSET 4
+-
+-/* register offset */
+-#define OFS_RCV_BUFFER 0
+-#define OFS_TRANS_HOLD 0
+-#define OFS_SEND_BUFFER 0
+-#define OFS_INTR_ENABLE (1*REG_OFFSET)
+-#define OFS_INTR_ID (2*REG_OFFSET)
+-#define OFS_DATA_FORMAT (3*REG_OFFSET)
+-#define OFS_LINE_CONTROL (3*REG_OFFSET)
+-#define OFS_MODEM_CONTROL (4*REG_OFFSET)
+-#define OFS_RS232_OUTPUT (4*REG_OFFSET)
+-#define OFS_LINE_STATUS (5*REG_OFFSET)
+-#define OFS_MODEM_STATUS (6*REG_OFFSET)
+-#define OFS_RS232_INPUT (6*REG_OFFSET)
+-#define OFS_SCRATCH_PAD (7*REG_OFFSET)
+-
+-#define OFS_DIVISOR_LSB (0*REG_OFFSET)
+-#define OFS_DIVISOR_MSB (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y)))
+-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+- /* disable interrupts */
+- UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+- /* set up baud rate */
+- {
+- uint32 divisor;
+-
+- /* set DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+- /* set divisor */
+- divisor = MAX_BAUD / baud;
+- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+- /* clear DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+- }
+-
+- /* set data format */
+- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_38400,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+- return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_38400,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+- UART16550_WRITE(OFS_SEND_BUFFER, byte);
+- return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile
+--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -3,4 +3,3 @@
+ #
+
+ obj-y += init.o irq.o setup.o
+-obj-$(CONFIG_KGDB) += kgdb_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c
+--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,105 +0,0 @@
+-/*
+- * BRIEF MODULE DESCRIPTION
+- * Low level uart routines to directly access a TX[34]927 SIO.
+- *
+- * Copyright 2001 MontaVista Software Inc.
+- * Author: MontaVista Software, Inc.
+- * ahennessy@mvista.com or source@mvista.com
+- *
+- * Based on arch/mips/ddb5xxx/ddb5477/kgdb_io.c
+- *
+- * Copyright (C) 2000-2001 Toshiba Corporation
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#include <asm/jmr3927/jmr3927.h>
+-
+-#define TIMEOUT 0xffffff
+-
+-static int remoteDebugInitialized = 0;
+-static void debugInit(int baud);
+-
+-int putDebugChar(unsigned char c)
+-{
+- int i = 0;
+-
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(38400);
+- }
+-
+- do {
+- slow_down();
+- i++;
+- if (i>TIMEOUT) {
+- break;
+- }
+- } while (!(tx3927_sioptr(0)->cisr & TXx927_SICISR_TXALS));
+- tx3927_sioptr(0)->tfifo = c;
+-
+- return 1;
+-}
+-
+-unsigned char getDebugChar(void)
+-{
+- int i = 0;
+- int dicr;
+- char c;
+-
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(38400);
+- }
+-
+- /* diable RX int. */
+- dicr = tx3927_sioptr(0)->dicr;
+- tx3927_sioptr(0)->dicr = 0;
+-
+- do {
+- slow_down();
+- i++;
+- if (i>TIMEOUT) {
+- break;
+- }
+- } while (tx3927_sioptr(0)->disr & TXx927_SIDISR_UVALID)
+- ;
+- c = tx3927_sioptr(0)->rfifo;
+-
+- /* clear RX int. status */
+- tx3927_sioptr(0)->disr &= ~TXx927_SIDISR_RDIS;
+- /* enable RX int. */
+- tx3927_sioptr(0)->dicr = dicr;
+-
+- return c;
+-}
+-
+-static void debugInit(int baud)
+-{
+- tx3927_sioptr(0)->lcr = 0x020;
+- tx3927_sioptr(0)->dicr = 0;
+- tx3927_sioptr(0)->disr = 0x4100;
+- tx3927_sioptr(0)->cisr = 0x014;
+- tx3927_sioptr(0)->fcr = 0;
+- tx3927_sioptr(0)->flcr = 0x02;
+- tx3927_sioptr(0)->bgr = ((JMR3927_BASE_BAUD + baud / 2) / baud) |
+- TXx927_SIBGR_BCLK_T0;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/Makefile linux-2.6.22-591/arch/mips/kernel/Makefile
+--- linux-2.6.22-570/arch/mips/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -57,7 +57,8 @@
+ obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o
+ obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o
+
+-obj-$(CONFIG_KGDB) += gdb-low.o gdb-stub.o
++obj-$(CONFIG_KGDB) += kgdb_handler.o kgdb.o kgdb-jmp.o \
++ kgdb-setjmp.o
+ obj-$(CONFIG_PROC_FS) += proc.o
+
+ obj-$(CONFIG_64BIT) += cpu-bugs64.o
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/cpu-probe.c linux-2.6.22-591/arch/mips/kernel/cpu-probe.c
+--- linux-2.6.22-570/arch/mips/kernel/cpu-probe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/kernel/cpu-probe.c 2007-12-21 15:36:11.000000000 -0500
+@@ -177,6 +177,17 @@
+
+ cpu_wait = r4k_wait;
+ break;
++ case CPU_20KC:
++ /*
++ * WAIT on Rev1.0 has E1, E2, E3 and E16.
++ * WAIT on Rev2.0 and Rev3.0 has E16.
++ * Rev3.1 WAIT is nop, why bother
++ */
++ if ((c->processor_id & 0xff) <= 0x64)
++ break;
++
++ cpu_wait = r4k_wait;
++ break;
+ case CPU_RM9000:
+ if ((c->processor_id & 0x00ff) >= 0x40)
+ cpu_wait = r4k_wait;
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-low.S linux-2.6.22-591/arch/mips/kernel/gdb-low.S
+--- linux-2.6.22-570/arch/mips/kernel/gdb-low.S 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/kernel/gdb-low.S 1969-12-31 19:00:00.000000000 -0500
+@@ -1,394 +0,0 @@
+-/*
+- * gdb-low.S contains the low-level trap handler for the GDB stub.
+- *
+- * Copyright (C) 1995 Andreas Busse
+- */
+-#include <linux/sys.h>
+-
+-#include <asm/asm.h>
+-#include <asm/errno.h>
+-#include <asm/irqflags.h>
+-#include <asm/mipsregs.h>
+-#include <asm/regdef.h>
+-#include <asm/stackframe.h>
+-#include <asm/gdb-stub.h>
+-
+-#ifdef CONFIG_32BIT
+-#define DMFC0 mfc0
+-#define DMTC0 mtc0
+-#define LDC1 lwc1
+-#define SDC1 lwc1
+-#endif
+-#ifdef CONFIG_64BIT
+-#define DMFC0 dmfc0
+-#define DMTC0 dmtc0
+-#define LDC1 ldc1
+-#define SDC1 ldc1
+-#endif
+-
+-/*
+- * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed)
+- * part is used to store registers and passed to exception handler.
+- * The upper part is reserved for "call func" feature where gdb client
+- * saves some of the regs, setups call frame and passes args.
+- *
+- * A trace shows about 200 bytes are used to store about half of all regs.
+- * The rest should be big enough for frame setup and passing args.
+- */
+-
+-/*
+- * The low level trap handler
+- */
+- .align 5
+- NESTED(trap_low, GDB_FR_SIZE, sp)
+- .set noat
+- .set noreorder
+-
+- mfc0 k0, CP0_STATUS
+- sll k0, 3 /* extract cu0 bit */
+- bltz k0, 1f
+- move k1, sp
+-
+- /*
+- * Called from user mode, go somewhere else.
+- */
+- mfc0 k0, CP0_CAUSE
+- andi k0, k0, 0x7c
+-#ifdef CONFIG_64BIT
+- dsll k0, k0, 1
+-#endif
+- PTR_L k1, saved_vectors(k0)
+- jr k1
+- nop
+-1:
+- move k0, sp
+- PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above
+- LONG_S k0, GDB_FR_REG29(sp)
+- LONG_S $2, GDB_FR_REG2(sp)
+-
+-/*
+- * First save the CP0 and special registers
+- */
+-
+- mfc0 v0, CP0_STATUS
+- LONG_S v0, GDB_FR_STATUS(sp)
+- mfc0 v0, CP0_CAUSE
+- LONG_S v0, GDB_FR_CAUSE(sp)
+- DMFC0 v0, CP0_EPC
+- LONG_S v0, GDB_FR_EPC(sp)
+- DMFC0 v0, CP0_BADVADDR
+- LONG_S v0, GDB_FR_BADVADDR(sp)
+- mfhi v0
+- LONG_S v0, GDB_FR_HI(sp)
+- mflo v0
+- LONG_S v0, GDB_FR_LO(sp)
+-
+-/*
+- * Now the integer registers
+- */
+-
+- LONG_S zero, GDB_FR_REG0(sp) /* I know... */
+- LONG_S $1, GDB_FR_REG1(sp)
+- /* v0 already saved */
+- LONG_S $3, GDB_FR_REG3(sp)
+- LONG_S $4, GDB_FR_REG4(sp)
+- LONG_S $5, GDB_FR_REG5(sp)
+- LONG_S $6, GDB_FR_REG6(sp)
+- LONG_S $7, GDB_FR_REG7(sp)
+- LONG_S $8, GDB_FR_REG8(sp)
+- LONG_S $9, GDB_FR_REG9(sp)
+- LONG_S $10, GDB_FR_REG10(sp)
+- LONG_S $11, GDB_FR_REG11(sp)
+- LONG_S $12, GDB_FR_REG12(sp)
+- LONG_S $13, GDB_FR_REG13(sp)
+- LONG_S $14, GDB_FR_REG14(sp)
+- LONG_S $15, GDB_FR_REG15(sp)
+- LONG_S $16, GDB_FR_REG16(sp)
+- LONG_S $17, GDB_FR_REG17(sp)
+- LONG_S $18, GDB_FR_REG18(sp)
+- LONG_S $19, GDB_FR_REG19(sp)
+- LONG_S $20, GDB_FR_REG20(sp)
+- LONG_S $21, GDB_FR_REG21(sp)
+- LONG_S $22, GDB_FR_REG22(sp)
+- LONG_S $23, GDB_FR_REG23(sp)
+- LONG_S $24, GDB_FR_REG24(sp)
+- LONG_S $25, GDB_FR_REG25(sp)
+- LONG_S $26, GDB_FR_REG26(sp)
+- LONG_S $27, GDB_FR_REG27(sp)
+- LONG_S $28, GDB_FR_REG28(sp)
+- /* sp already saved */
+- LONG_S $30, GDB_FR_REG30(sp)
+- LONG_S $31, GDB_FR_REG31(sp)
+-
+- CLI /* disable interrupts */
+- TRACE_IRQS_OFF
+-
+-/*
+- * Followed by the floating point registers
+- */
+- mfc0 v0, CP0_STATUS /* FPU enabled? */
+- srl v0, v0, 16
+- andi v0, v0, (ST0_CU1 >> 16)
+-
+- beqz v0,2f /* disabled, skip */
+- nop
+-
+- SDC1 $0, GDB_FR_FPR0(sp)
+- SDC1 $1, GDB_FR_FPR1(sp)
+- SDC1 $2, GDB_FR_FPR2(sp)
+- SDC1 $3, GDB_FR_FPR3(sp)
+- SDC1 $4, GDB_FR_FPR4(sp)
+- SDC1 $5, GDB_FR_FPR5(sp)
+- SDC1 $6, GDB_FR_FPR6(sp)
+- SDC1 $7, GDB_FR_FPR7(sp)
+- SDC1 $8, GDB_FR_FPR8(sp)
+- SDC1 $9, GDB_FR_FPR9(sp)
+- SDC1 $10, GDB_FR_FPR10(sp)
+- SDC1 $11, GDB_FR_FPR11(sp)
+- SDC1 $12, GDB_FR_FPR12(sp)
+- SDC1 $13, GDB_FR_FPR13(sp)
+- SDC1 $14, GDB_FR_FPR14(sp)
+- SDC1 $15, GDB_FR_FPR15(sp)
+- SDC1 $16, GDB_FR_FPR16(sp)
+- SDC1 $17, GDB_FR_FPR17(sp)
+- SDC1 $18, GDB_FR_FPR18(sp)
+- SDC1 $19, GDB_FR_FPR19(sp)
+- SDC1 $20, GDB_FR_FPR20(sp)
+- SDC1 $21, GDB_FR_FPR21(sp)
+- SDC1 $22, GDB_FR_FPR22(sp)
+- SDC1 $23, GDB_FR_FPR23(sp)
+- SDC1 $24, GDB_FR_FPR24(sp)
+- SDC1 $25, GDB_FR_FPR25(sp)
+- SDC1 $26, GDB_FR_FPR26(sp)
+- SDC1 $27, GDB_FR_FPR27(sp)
+- SDC1 $28, GDB_FR_FPR28(sp)
+- SDC1 $29, GDB_FR_FPR29(sp)
+- SDC1 $30, GDB_FR_FPR30(sp)
+- SDC1 $31, GDB_FR_FPR31(sp)
+-
+-/*
+- * FPU control registers
+- */
+-
+- cfc1 v0, CP1_STATUS
+- LONG_S v0, GDB_FR_FSR(sp)
+- cfc1 v0, CP1_REVISION
+- LONG_S v0, GDB_FR_FIR(sp)
+-
+-/*
+- * Current stack frame ptr
+- */
+-
+-2:
+- LONG_S sp, GDB_FR_FRP(sp)
+-
+-/*
+- * CP0 registers (R4000/R4400 unused registers skipped)
+- */
+-
+- mfc0 v0, CP0_INDEX
+- LONG_S v0, GDB_FR_CP0_INDEX(sp)
+- mfc0 v0, CP0_RANDOM
+- LONG_S v0, GDB_FR_CP0_RANDOM(sp)
+- DMFC0 v0, CP0_ENTRYLO0
+- LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp)
+- DMFC0 v0, CP0_ENTRYLO1
+- LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp)
+- DMFC0 v0, CP0_CONTEXT
+- LONG_S v0, GDB_FR_CP0_CONTEXT(sp)
+- mfc0 v0, CP0_PAGEMASK
+- LONG_S v0, GDB_FR_CP0_PAGEMASK(sp)
+- mfc0 v0, CP0_WIRED
+- LONG_S v0, GDB_FR_CP0_WIRED(sp)
+- DMFC0 v0, CP0_ENTRYHI
+- LONG_S v0, GDB_FR_CP0_ENTRYHI(sp)
+- mfc0 v0, CP0_PRID
+- LONG_S v0, GDB_FR_CP0_PRID(sp)
+-
+- .set at
+-
+-/*
+- * Continue with the higher level handler
+- */
+-
+- move a0,sp
+-
+- jal handle_exception
+- nop
+-
+-/*
+- * Restore all writable registers, in reverse order
+- */
+-
+- .set noat
+-
+- LONG_L v0, GDB_FR_CP0_ENTRYHI(sp)
+- LONG_L v1, GDB_FR_CP0_WIRED(sp)
+- DMTC0 v0, CP0_ENTRYHI
+- mtc0 v1, CP0_WIRED
+- LONG_L v0, GDB_FR_CP0_PAGEMASK(sp)
+- LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp)
+- mtc0 v0, CP0_PAGEMASK
+- DMTC0 v1, CP0_ENTRYLO1
+- LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp)
+- LONG_L v1, GDB_FR_CP0_INDEX(sp)
+- DMTC0 v0, CP0_ENTRYLO0
+- LONG_L v0, GDB_FR_CP0_CONTEXT(sp)
+- mtc0 v1, CP0_INDEX
+- DMTC0 v0, CP0_CONTEXT
+-
+-
+-/*
+- * Next, the floating point registers
+- */
+- mfc0 v0, CP0_STATUS /* check if the FPU is enabled */
+- srl v0, v0, 16
+- andi v0, v0, (ST0_CU1 >> 16)
+-
+- beqz v0, 3f /* disabled, skip */
+- nop
+-
+- LDC1 $31, GDB_FR_FPR31(sp)
+- LDC1 $30, GDB_FR_FPR30(sp)
+- LDC1 $29, GDB_FR_FPR29(sp)
+- LDC1 $28, GDB_FR_FPR28(sp)
+- LDC1 $27, GDB_FR_FPR27(sp)
+- LDC1 $26, GDB_FR_FPR26(sp)
+- LDC1 $25, GDB_FR_FPR25(sp)
+- LDC1 $24, GDB_FR_FPR24(sp)
+- LDC1 $23, GDB_FR_FPR23(sp)
+- LDC1 $22, GDB_FR_FPR22(sp)
+- LDC1 $21, GDB_FR_FPR21(sp)
+- LDC1 $20, GDB_FR_FPR20(sp)
+- LDC1 $19, GDB_FR_FPR19(sp)
+- LDC1 $18, GDB_FR_FPR18(sp)
+- LDC1 $17, GDB_FR_FPR17(sp)
+- LDC1 $16, GDB_FR_FPR16(sp)
+- LDC1 $15, GDB_FR_FPR15(sp)
+- LDC1 $14, GDB_FR_FPR14(sp)
+- LDC1 $13, GDB_FR_FPR13(sp)
+- LDC1 $12, GDB_FR_FPR12(sp)
+- LDC1 $11, GDB_FR_FPR11(sp)
+- LDC1 $10, GDB_FR_FPR10(sp)
+- LDC1 $9, GDB_FR_FPR9(sp)
+- LDC1 $8, GDB_FR_FPR8(sp)
+- LDC1 $7, GDB_FR_FPR7(sp)
+- LDC1 $6, GDB_FR_FPR6(sp)
+- LDC1 $5, GDB_FR_FPR5(sp)
+- LDC1 $4, GDB_FR_FPR4(sp)
+- LDC1 $3, GDB_FR_FPR3(sp)
+- LDC1 $2, GDB_FR_FPR2(sp)
+- LDC1 $1, GDB_FR_FPR1(sp)
+- LDC1 $0, GDB_FR_FPR0(sp)
+-
+-/*
+- * Now the CP0 and integer registers
+- */
+-
+-3:
+-#ifdef CONFIG_MIPS_MT_SMTC
+- /* Read-modify write of Status must be atomic */
+- mfc0 t2, CP0_TCSTATUS
+- ori t1, t2, TCSTATUS_IXMT
+- mtc0 t1, CP0_TCSTATUS
+- andi t2, t2, TCSTATUS_IXMT
+- _ehb
+- DMT 9 # dmt t1
+- jal mips_ihb
+- nop
+-#endif /* CONFIG_MIPS_MT_SMTC */
+- mfc0 t0, CP0_STATUS
+- ori t0, 0x1f
+- xori t0, 0x1f
+- mtc0 t0, CP0_STATUS
+-#ifdef CONFIG_MIPS_MT_SMTC
+- andi t1, t1, VPECONTROL_TE
+- beqz t1, 9f
+- nop
+- EMT # emt
+-9:
+- mfc0 t1, CP0_TCSTATUS
+- xori t1, t1, TCSTATUS_IXMT
+- or t1, t1, t2
+- mtc0 t1, CP0_TCSTATUS
+- _ehb
+-#endif /* CONFIG_MIPS_MT_SMTC */
+- LONG_L v0, GDB_FR_STATUS(sp)
+- LONG_L v1, GDB_FR_EPC(sp)
+- mtc0 v0, CP0_STATUS
+- DMTC0 v1, CP0_EPC
+- LONG_L v0, GDB_FR_HI(sp)
+- LONG_L v1, GDB_FR_LO(sp)
+- mthi v0
+- mtlo v1
+- LONG_L $31, GDB_FR_REG31(sp)
+- LONG_L $30, GDB_FR_REG30(sp)
+- LONG_L $28, GDB_FR_REG28(sp)
+- LONG_L $27, GDB_FR_REG27(sp)
+- LONG_L $26, GDB_FR_REG26(sp)
+- LONG_L $25, GDB_FR_REG25(sp)
+- LONG_L $24, GDB_FR_REG24(sp)
+- LONG_L $23, GDB_FR_REG23(sp)
+- LONG_L $22, GDB_FR_REG22(sp)
+- LONG_L $21, GDB_FR_REG21(sp)
+- LONG_L $20, GDB_FR_REG20(sp)
+- LONG_L $19, GDB_FR_REG19(sp)
+- LONG_L $18, GDB_FR_REG18(sp)
+- LONG_L $17, GDB_FR_REG17(sp)
+- LONG_L $16, GDB_FR_REG16(sp)
+- LONG_L $15, GDB_FR_REG15(sp)
+- LONG_L $14, GDB_FR_REG14(sp)
+- LONG_L $13, GDB_FR_REG13(sp)
+- LONG_L $12, GDB_FR_REG12(sp)
+- LONG_L $11, GDB_FR_REG11(sp)
+- LONG_L $10, GDB_FR_REG10(sp)
+- LONG_L $9, GDB_FR_REG9(sp)
+- LONG_L $8, GDB_FR_REG8(sp)
+- LONG_L $7, GDB_FR_REG7(sp)
+- LONG_L $6, GDB_FR_REG6(sp)
+- LONG_L $5, GDB_FR_REG5(sp)
+- LONG_L $4, GDB_FR_REG4(sp)
+- LONG_L $3, GDB_FR_REG3(sp)
+- LONG_L $2, GDB_FR_REG2(sp)
+- LONG_L $1, GDB_FR_REG1(sp)
+-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+- LONG_L k0, GDB_FR_EPC(sp)
+- LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */
+- jr k0
+- rfe
+-#else
+- LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */
+-
+- .set mips3
+- eret
+- .set mips0
+-#endif
+- .set at
+- .set reorder
+- END(trap_low)
+-
+-LEAF(kgdb_read_byte)
+-4: lb t0, (a0)
+- sb t0, (a1)
+- li v0, 0
+- jr ra
+- .section __ex_table,"a"
+- PTR 4b, kgdbfault
+- .previous
+- END(kgdb_read_byte)
+-
+-LEAF(kgdb_write_byte)
+-5: sb a0, (a1)
+- li v0, 0
+- jr ra
+- .section __ex_table,"a"
+- PTR 5b, kgdbfault
+- .previous
+- END(kgdb_write_byte)
+-
+- .type kgdbfault@function
+- .ent kgdbfault
+-
+-kgdbfault: li v0, -EFAULT
+- jr ra
+- .end kgdbfault
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-stub.c linux-2.6.22-591/arch/mips/kernel/gdb-stub.c
+--- linux-2.6.22-570/arch/mips/kernel/gdb-stub.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/kernel/gdb-stub.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,1154 +0,0 @@
+-/*
+- * arch/mips/kernel/gdb-stub.c
+- *
+- * Originally written by Glenn Engel, Lake Stevens Instrument Division
+- *
+- * Contributed by HP Systems
+- *
+- * Modified for SPARC by Stu Grossman, Cygnus Support.
+- *
+- * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
+- * Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
+- *
+- * Copyright (C) 1995 Andreas Busse
+- *
+- * Copyright (C) 2003 MontaVista Software Inc.
+- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
+- */
+-
+-/*
+- * To enable debugger support, two things need to happen. One, a
+- * call to set_debug_traps() is necessary in order to allow any breakpoints
+- * or error conditions to be properly intercepted and reported to gdb.
+- * Two, a breakpoint needs to be generated to begin communication. This
+- * is most easily accomplished by a call to breakpoint(). Breakpoint()
+- * simulates a breakpoint by executing a BREAK instruction.
+- *
+- *
+- * The following gdb commands are supported:
+- *
+- * command function Return value
+- *
+- * g return the value of the CPU registers hex data or ENN
+- * G set the value of the CPU registers OK or ENN
+- *
+- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN
+- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN
+- *
+- * c Resume at current address SNN ( signal NN)
+- * cAA..AA Continue at address AA..AA SNN
+- *
+- * s Step one instruction SNN
+- * sAA..AA Step one instruction from AA..AA SNN
+- *
+- * k kill
+- *
+- * ? What was the last sigval ? SNN (signal NN)
+- *
+- * bBB..BB Set baud rate to BB..BB OK or BNN, then sets
+- * baud rate
+- *
+- * All commands and responses are sent with a packet which includes a
+- * checksum. A packet consists of
+- *
+- * $<packet info>#<checksum>.
+- *
+- * where
+- * <packet info> :: <characters representing the command or response>
+- * <checksum> :: < two hex digits computed as modulo 256 sum of <packetinfo>>
+- *
+- * When a packet is received, it is first acknowledged with either '+' or '-'.
+- * '+' indicates a successful transfer. '-' indicates a failed transfer.
+- *
+- * Example:
+- *
+- * Host: Reply:
+- * $m0,10#2a +$00010203040506070809101112131415#42
+- *
+- *
+- * ==============
+- * MORE EXAMPLES:
+- * ==============
+- *
+- * For reference -- the following are the steps that one
+- * company took (RidgeRun Inc) to get remote gdb debugging
+- * going. In this scenario the host machine was a PC and the
+- * target platform was a Galileo EVB64120A MIPS evaluation
+- * board.
+- *
+- * Step 1:
+- * First download gdb-5.0.tar.gz from the internet.
+- * and then build/install the package.
+- *
+- * Example:
+- * $ tar zxf gdb-5.0.tar.gz
+- * $ cd gdb-5.0
+- * $ ./configure --target=mips-linux-elf
+- * $ make
+- * $ install
+- * $ which mips-linux-elf-gdb
+- * /usr/local/bin/mips-linux-elf-gdb
+- *
+- * Step 2:
+- * Configure linux for remote debugging and build it.
+- *
+- * Example:
+- * $ cd ~/linux
+- * $ make menuconfig <go to "Kernel Hacking" and turn on remote debugging>
+- * $ make
+- *
+- * Step 3:
+- * Download the kernel to the remote target and start
+- * the kernel running. It will promptly halt and wait
+- * for the host gdb session to connect. It does this
+- * since the "Kernel Hacking" option has defined
+- * CONFIG_KGDB which in turn enables your calls
+- * to:
+- * set_debug_traps();
+- * breakpoint();
+- *
+- * Step 4:
+- * Start the gdb session on the host.
+- *
+- * Example:
+- * $ mips-linux-elf-gdb vmlinux
+- * (gdb) set remotebaud 115200
+- * (gdb) target remote /dev/ttyS1
+- * ...at this point you are connected to
+- * the remote target and can use gdb
+- * in the normal fasion. Setting
+- * breakpoints, single stepping,
+- * printing variables, etc.
+- */
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/signal.h>
+-#include <linux/sched.h>
+-#include <linux/mm.h>
+-#include <linux/console.h>
+-#include <linux/init.h>
+-#include <linux/smp.h>
+-#include <linux/spinlock.h>
+-#include <linux/slab.h>
+-#include <linux/reboot.h>
+-
+-#include <asm/asm.h>
+-#include <asm/cacheflush.h>
+-#include <asm/mipsregs.h>
+-#include <asm/pgtable.h>
+-#include <asm/system.h>
+-#include <asm/gdb-stub.h>
+-#include <asm/inst.h>
+-#include <asm/smp.h>
+-
+-/*
+- * external low-level support routines
+- */
+-
+-extern int putDebugChar(char c); /* write a single character */
+-extern char getDebugChar(void); /* read and return a single char */
+-extern void trap_low(void);
+-
+-/*
+- * breakpoint and test functions
+- */
+-extern void breakpoint(void);
+-extern void breakinst(void);
+-extern void async_breakpoint(void);
+-extern void async_breakinst(void);
+-extern void adel(void);
+-
+-/*
+- * local prototypes
+- */
+-
+-static void getpacket(char *buffer);
+-static void putpacket(char *buffer);
+-static int computeSignal(int tt);
+-static int hex(unsigned char ch);
+-static int hexToInt(char **ptr, int *intValue);
+-static int hexToLong(char **ptr, long *longValue);
+-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault);
+-void handle_exception(struct gdb_regs *regs);
+-
+-int kgdb_enabled;
+-
+-/*
+- * spin locks for smp case
+- */
+-static DEFINE_SPINLOCK(kgdb_lock);
+-static raw_spinlock_t kgdb_cpulock[NR_CPUS] = {
+- [0 ... NR_CPUS-1] = __RAW_SPIN_LOCK_UNLOCKED,
+-};
+-
+-/*
+- * BUFMAX defines the maximum number of characters in inbound/outbound buffers
+- * at least NUMREGBYTES*2 are needed for register packets
+- */
+-#define BUFMAX 2048
+-
+-static char input_buffer[BUFMAX];
+-static char output_buffer[BUFMAX];
+-static int initialized; /* !0 means we've been initialized */
+-static int kgdb_started;
+-static const char hexchars[]="0123456789abcdef";
+-
+-/* Used to prevent crashes in memory access. Note that they'll crash anyway if
+- we haven't set up fault handlers yet... */
+-int kgdb_read_byte(unsigned char *address, unsigned char *dest);
+-int kgdb_write_byte(unsigned char val, unsigned char *dest);
+-
+-/*
+- * Convert ch from a hex digit to an int
+- */
+-static int hex(unsigned char ch)
+-{
+- if (ch >= 'a' && ch <= 'f')
+- return ch-'a'+10;
+- if (ch >= '0' && ch <= '9')
+- return ch-'0';
+- if (ch >= 'A' && ch <= 'F')
+- return ch-'A'+10;
+- return -1;
+-}
+-
+-/*
+- * scan for the sequence $<data>#<checksum>
+- */
+-static void getpacket(char *buffer)
+-{
+- unsigned char checksum;
+- unsigned char xmitcsum;
+- int i;
+- int count;
+- unsigned char ch;
+-
+- do {
+- /*
+- * wait around for the start character,
+- * ignore all other characters
+- */
+- while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+-
+- checksum = 0;
+- xmitcsum = -1;
+- count = 0;
+-
+- /*
+- * now, read until a # or end of buffer is found
+- */
+- while (count < BUFMAX) {
+- ch = getDebugChar();
+- if (ch == '#')
+- break;
+- checksum = checksum + ch;
+- buffer[count] = ch;
+- count = count + 1;
+- }
+-
+- if (count >= BUFMAX)
+- continue;
+-
+- buffer[count] = 0;
+-
+- if (ch == '#') {
+- xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+- xmitcsum |= hex(getDebugChar() & 0x7f);
+-
+- if (checksum != xmitcsum)
+- putDebugChar('-'); /* failed checksum */
+- else {
+- putDebugChar('+'); /* successful transfer */
+-
+- /*
+- * if a sequence char is present,
+- * reply the sequence ID
+- */
+- if (buffer[2] == ':') {
+- putDebugChar(buffer[0]);
+- putDebugChar(buffer[1]);
+-
+- /*
+- * remove sequence chars from buffer
+- */
+- count = strlen(buffer);
+- for (i=3; i <= count; i++)
+- buffer[i-3] = buffer[i];
+- }
+- }
+- }
+- }
+- while (checksum != xmitcsum);
+-}
+-
+-/*
+- * send the packet in buffer.
+- */
+-static void putpacket(char *buffer)
+-{
+- unsigned char checksum;
+- int count;
+- unsigned char ch;
+-
+- /*
+- * $<packet info>#<checksum>.
+- */
+-
+- do {
+- putDebugChar('$');
+- checksum = 0;
+- count = 0;
+-
+- while ((ch = buffer[count]) != 0) {
+- if (!(putDebugChar(ch)))
+- return;
+- checksum += ch;
+- count += 1;
+- }
+-
+- putDebugChar('#');
+- putDebugChar(hexchars[checksum >> 4]);
+- putDebugChar(hexchars[checksum & 0xf]);
+-
+- }
+- while ((getDebugChar() & 0x7f) != '+');
+-}
+-
+-
+-/*
+- * Convert the memory pointed to by mem into hex, placing result in buf.
+- * Return a pointer to the last char put in buf (null), in case of mem fault,
+- * return 0.
+- * may_fault is non-zero if we are reading from arbitrary memory, but is currently
+- * not used.
+- */
+-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault)
+-{
+- unsigned char ch;
+-
+- while (count-- > 0) {
+- if (kgdb_read_byte(mem++, &ch) != 0)
+- return 0;
+- *buf++ = hexchars[ch >> 4];
+- *buf++ = hexchars[ch & 0xf];
+- }
+-
+- *buf = 0;
+-
+- return buf;
+-}
+-
+-/*
+- * convert the hex array pointed to by buf into binary to be placed in mem
+- * return a pointer to the character AFTER the last byte written
+- * may_fault is non-zero if we are reading from arbitrary memory, but is currently
+- * not used.
+- */
+-static char *hex2mem(char *buf, char *mem, int count, int binary, int may_fault)
+-{
+- int i;
+- unsigned char ch;
+-
+- for (i=0; i<count; i++)
+- {
+- if (binary) {
+- ch = *buf++;
+- if (ch == 0x7d)
+- ch = 0x20 ^ *buf++;
+- }
+- else {
+- ch = hex(*buf++) << 4;
+- ch |= hex(*buf++);
+- }
+- if (kgdb_write_byte(ch, mem++) != 0)
+- return 0;
+- }
+-
+- return mem;
+-}
+-
+-/*
+- * This table contains the mapping between SPARC hardware trap types, and
+- * signals, which are primarily what GDB understands. It also indicates
+- * which hardware traps we need to commandeer when initializing the stub.
+- */
+-static struct hard_trap_info {
+- unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */
+- unsigned char signo; /* Signal that we map this trap into */
+-} hard_trap_info[] = {
+- { 6, SIGBUS }, /* instruction bus error */
+- { 7, SIGBUS }, /* data bus error */
+- { 9, SIGTRAP }, /* break */
+- { 10, SIGILL }, /* reserved instruction */
+-/* { 11, SIGILL }, */ /* CPU unusable */
+- { 12, SIGFPE }, /* overflow */
+- { 13, SIGTRAP }, /* trap */
+- { 14, SIGSEGV }, /* virtual instruction cache coherency */
+- { 15, SIGFPE }, /* floating point exception */
+- { 23, SIGSEGV }, /* watch */
+- { 31, SIGSEGV }, /* virtual data cache coherency */
+- { 0, 0} /* Must be last */
+-};
+-
+-/* Save the normal trap handlers for user-mode traps. */
+-void *saved_vectors[32];
+-
+-/*
+- * Set up exception handlers for tracing and breakpoints
+- */
+-void set_debug_traps(void)
+-{
+- struct hard_trap_info *ht;
+- unsigned long flags;
+- unsigned char c;
+-
+- local_irq_save(flags);
+- for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+- saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low);
+-
+- putDebugChar('+'); /* 'hello world' */
+- /*
+- * In case GDB is started before us, ack any packets
+- * (presumably "$?#xx") sitting there.
+- */
+- while((c = getDebugChar()) != '$');
+- while((c = getDebugChar()) != '#');
+- c = getDebugChar(); /* eat first csum byte */
+- c = getDebugChar(); /* eat second csum byte */
+- putDebugChar('+'); /* ack it */
+-
+- initialized = 1;
+- local_irq_restore(flags);
+-}
+-
+-void restore_debug_traps(void)
+-{
+- struct hard_trap_info *ht;
+- unsigned long flags;
+-
+- local_irq_save(flags);
+- for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+- set_except_vector(ht->tt, saved_vectors[ht->tt]);
+- local_irq_restore(flags);
+-}
+-
+-/*
+- * Convert the MIPS hardware trap type code to a Unix signal number.
+- */
+-static int computeSignal(int tt)
+-{
+- struct hard_trap_info *ht;
+-
+- for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+- if (ht->tt == tt)
+- return ht->signo;
+-
+- return SIGHUP; /* default for things we don't know about */
+-}
+-
+-/*
+- * While we find nice hex chars, build an int.
+- * Return number of chars processed.
+- */
+-static int hexToInt(char **ptr, int *intValue)
+-{
+- int numChars = 0;
+- int hexValue;
+-
+- *intValue = 0;
+-
+- while (**ptr) {
+- hexValue = hex(**ptr);
+- if (hexValue < 0)
+- break;
+-
+- *intValue = (*intValue << 4) | hexValue;
+- numChars ++;
+-
+- (*ptr)++;
+- }
+-
+- return (numChars);
+-}
+-
+-static int hexToLong(char **ptr, long *longValue)
+-{
+- int numChars = 0;
+- int hexValue;
+-
+- *longValue = 0;
+-
+- while (**ptr) {
+- hexValue = hex(**ptr);
+- if (hexValue < 0)
+- break;
+-
+- *longValue = (*longValue << 4) | hexValue;
+- numChars ++;
+-
+- (*ptr)++;
+- }
+-
+- return numChars;
+-}
+-
+-
+-#if 0
+-/*
+- * Print registers (on target console)
+- * Used only to debug the stub...
+- */
+-void show_gdbregs(struct gdb_regs * regs)
+-{
+- /*
+- * Saved main processor registers
+- */
+- printk("$0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+- regs->reg0, regs->reg1, regs->reg2, regs->reg3,
+- regs->reg4, regs->reg5, regs->reg6, regs->reg7);
+- printk("$8 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+- regs->reg8, regs->reg9, regs->reg10, regs->reg11,
+- regs->reg12, regs->reg13, regs->reg14, regs->reg15);
+- printk("$16: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+- regs->reg16, regs->reg17, regs->reg18, regs->reg19,
+- regs->reg20, regs->reg21, regs->reg22, regs->reg23);
+- printk("$24: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+- regs->reg24, regs->reg25, regs->reg26, regs->reg27,
+- regs->reg28, regs->reg29, regs->reg30, regs->reg31);
+-
+- /*
+- * Saved cp0 registers
+- */
+- printk("epc : %08lx\nStatus: %08lx\nCause : %08lx\n",
+- regs->cp0_epc, regs->cp0_status, regs->cp0_cause);
+-}
+-#endif /* dead code */
+-
+-/*
+- * We single-step by setting breakpoints. When an exception
+- * is handled, we need to restore the instructions hoisted
+- * when the breakpoints were set.
+- *
+- * This is where we save the original instructions.
+- */
+-static struct gdb_bp_save {
+- unsigned long addr;
+- unsigned int val;
+-} step_bp[2];
+-
+-#define BP 0x0000000d /* break opcode */
+-
+-/*
+- * Set breakpoint instructions for single stepping.
+- */
+-static void single_step(struct gdb_regs *regs)
+-{
+- union mips_instruction insn;
+- unsigned long targ;
+- int is_branch, is_cond, i;
+-
+- targ = regs->cp0_epc;
+- insn.word = *(unsigned int *)targ;
+- is_branch = is_cond = 0;
+-
+- switch (insn.i_format.opcode) {
+- /*
+- * jr and jalr are in r_format format.
+- */
+- case spec_op:
+- switch (insn.r_format.func) {
+- case jalr_op:
+- case jr_op:
+- targ = *(®s->reg0 + insn.r_format.rs);
+- is_branch = 1;
+- break;
+- }
+- break;
+-
+- /*
+- * This group contains:
+- * bltz_op, bgez_op, bltzl_op, bgezl_op,
+- * bltzal_op, bgezal_op, bltzall_op, bgezall_op.
+- */
+- case bcond_op:
+- is_branch = is_cond = 1;
+- targ += 4 + (insn.i_format.simmediate << 2);
+- break;
+-
+- /*
+- * These are unconditional and in j_format.
+- */
+- case jal_op:
+- case j_op:
+- is_branch = 1;
+- targ += 4;
+- targ >>= 28;
+- targ <<= 28;
+- targ |= (insn.j_format.target << 2);
+- break;
+-
+- /*
+- * These are conditional.
+- */
+- case beq_op:
+- case beql_op:
+- case bne_op:
+- case bnel_op:
+- case blez_op:
+- case blezl_op:
+- case bgtz_op:
+- case bgtzl_op:
+- case cop0_op:
+- case cop1_op:
+- case cop2_op:
+- case cop1x_op:
+- is_branch = is_cond = 1;
+- targ += 4 + (insn.i_format.simmediate << 2);
+- break;
+- }
+-
+- if (is_branch) {
+- i = 0;
+- if (is_cond && targ != (regs->cp0_epc + 8)) {
+- step_bp[i].addr = regs->cp0_epc + 8;
+- step_bp[i++].val = *(unsigned *)(regs->cp0_epc + 8);
+- *(unsigned *)(regs->cp0_epc + 8) = BP;
+- }
+- step_bp[i].addr = targ;
+- step_bp[i].val = *(unsigned *)targ;
+- *(unsigned *)targ = BP;
+- } else {
+- step_bp[0].addr = regs->cp0_epc + 4;
+- step_bp[0].val = *(unsigned *)(regs->cp0_epc + 4);
+- *(unsigned *)(regs->cp0_epc + 4) = BP;
+- }
+-}
+-
+-/*
+- * If asynchronously interrupted by gdb, then we need to set a breakpoint
+- * at the interrupted instruction so that we wind up stopped with a
+- * reasonable stack frame.
+- */
+-static struct gdb_bp_save async_bp;
+-
+-/*
+- * Swap the interrupted EPC with our asynchronous breakpoint routine.
+- * This is safer than stuffing the breakpoint in-place, since no cache
+- * flushes (or resulting smp_call_functions) are required. The
+- * assumption is that only one CPU will be handling asynchronous bp's,
+- * and only one can be active at a time.
+- */
+-extern spinlock_t smp_call_lock;
+-
+-void set_async_breakpoint(unsigned long *epc)
+-{
+- /* skip breaking into userland */
+- if ((*epc & 0x80000000) == 0)
+- return;
+-
+-#ifdef CONFIG_SMP
+- /* avoid deadlock if someone is make IPC */
+- if (spin_is_locked(&smp_call_lock))
+- return;
+-#endif
+-
+- async_bp.addr = *epc;
+- *epc = (unsigned long)async_breakpoint;
+-}
+-
+-static void kgdb_wait(void *arg)
+-{
+- unsigned flags;
+- int cpu = smp_processor_id();
+-
+- local_irq_save(flags);
+-
+- __raw_spin_lock(&kgdb_cpulock[cpu]);
+- __raw_spin_unlock(&kgdb_cpulock[cpu]);
+-
+- local_irq_restore(flags);
+-}
+-
+-/*
+- * GDB stub needs to call kgdb_wait on all processor with interrupts
+- * disabled, so it uses it's own special variant.
+- */
+-static int kgdb_smp_call_kgdb_wait(void)
+-{
+-#ifdef CONFIG_SMP
+- struct call_data_struct data;
+- int i, cpus = num_online_cpus() - 1;
+- int cpu = smp_processor_id();
+-
+- /*
+- * Can die spectacularly if this CPU isn't yet marked online
+- */
+- BUG_ON(!cpu_online(cpu));
+-
+- if (!cpus)
+- return 0;
+-
+- if (spin_is_locked(&smp_call_lock)) {
+- /*
+- * Some other processor is trying to make us do something
+- * but we're not going to respond... give up
+- */
+- return -1;
+- }
+-
+- /*
+- * We will continue here, accepting the fact that
+- * the kernel may deadlock if another CPU attempts
+- * to call smp_call_function now...
+- */
+-
+- data.func = kgdb_wait;
+- data.info = NULL;
+- atomic_set(&data.started, 0);
+- data.wait = 0;
+-
+- spin_lock(&smp_call_lock);
+- call_data = &data;
+- mb();
+-
+- /* Send a message to all other CPUs and wait for them to respond */
+- for (i = 0; i < NR_CPUS; i++)
+- if (cpu_online(i) && i != cpu)
+- core_send_ipi(i, SMP_CALL_FUNCTION);
+-
+- /* Wait for response */
+- /* FIXME: lock-up detection, backtrace on lock-up */
+- while (atomic_read(&data.started) != cpus)
+- barrier();
+-
+- call_data = NULL;
+- spin_unlock(&smp_call_lock);
+-#endif
+-
+- return 0;
+-}
+-
+-/*
+- * This function does all command processing for interfacing to gdb. It
+- * returns 1 if you should skip the instruction at the trap address, 0
+- * otherwise.
+- */
+-void handle_exception (struct gdb_regs *regs)
+-{
+- int trap; /* Trap type */
+- int sigval;
+- long addr;
+- int length;
+- char *ptr;
+- unsigned long *stack;
+- int i;
+- int bflag = 0;
+-
+- kgdb_started = 1;
+-
+- /*
+- * acquire the big kgdb spinlock
+- */
+- if (!spin_trylock(&kgdb_lock)) {
+- /*
+- * some other CPU has the lock, we should go back to
+- * receive the gdb_wait IPC
+- */
+- return;
+- }
+-
+- /*
+- * If we're in async_breakpoint(), restore the real EPC from
+- * the breakpoint.
+- */
+- if (regs->cp0_epc == (unsigned long)async_breakinst) {
+- regs->cp0_epc = async_bp.addr;
+- async_bp.addr = 0;
+- }
+-
+- /*
+- * acquire the CPU spinlocks
+- */
+- for (i = num_online_cpus()-1; i >= 0; i--)
+- if (__raw_spin_trylock(&kgdb_cpulock[i]) == 0)
+- panic("kgdb: couldn't get cpulock %d\n", i);
+-
+- /*
+- * force other cpus to enter kgdb
+- */
+- kgdb_smp_call_kgdb_wait();
+-
+- /*
+- * If we're in breakpoint() increment the PC
+- */
+- trap = (regs->cp0_cause & 0x7c) >> 2;
+- if (trap == 9 && regs->cp0_epc == (unsigned long)breakinst)
+- regs->cp0_epc += 4;
+-
+- /*
+- * If we were single_stepping, restore the opcodes hoisted
+- * for the breakpoint[s].
+- */
+- if (step_bp[0].addr) {
+- *(unsigned *)step_bp[0].addr = step_bp[0].val;
+- step_bp[0].addr = 0;
+-
+- if (step_bp[1].addr) {
+- *(unsigned *)step_bp[1].addr = step_bp[1].val;
+- step_bp[1].addr = 0;
+- }
+- }
+-
+- stack = (long *)regs->reg29; /* stack ptr */
+- sigval = computeSignal(trap);
+-
+- /*
+- * reply to host that an exception has occurred
+- */
+- ptr = output_buffer;
+-
+- /*
+- * Send trap type (converted to signal)
+- */
+- *ptr++ = 'T';
+- *ptr++ = hexchars[sigval >> 4];
+- *ptr++ = hexchars[sigval & 0xf];
+-
+- /*
+- * Send Error PC
+- */
+- *ptr++ = hexchars[REG_EPC >> 4];
+- *ptr++ = hexchars[REG_EPC & 0xf];
+- *ptr++ = ':';
+- ptr = mem2hex((char *)®s->cp0_epc, ptr, sizeof(long), 0);
+- *ptr++ = ';';
+-
+- /*
+- * Send frame pointer
+- */
+- *ptr++ = hexchars[REG_FP >> 4];
+- *ptr++ = hexchars[REG_FP & 0xf];
+- *ptr++ = ':';
+- ptr = mem2hex((char *)®s->reg30, ptr, sizeof(long), 0);
+- *ptr++ = ';';
+-
+- /*
+- * Send stack pointer
+- */
+- *ptr++ = hexchars[REG_SP >> 4];
+- *ptr++ = hexchars[REG_SP & 0xf];
+- *ptr++ = ':';
+- ptr = mem2hex((char *)®s->reg29, ptr, sizeof(long), 0);
+- *ptr++ = ';';
+-
+- *ptr++ = 0;
+- putpacket(output_buffer); /* send it off... */
+-
+- /*
+- * Wait for input from remote GDB
+- */
+- while (1) {
+- output_buffer[0] = 0;
+- getpacket(input_buffer);
+-
+- switch (input_buffer[0])
+- {
+- case '?':
+- output_buffer[0] = 'S';
+- output_buffer[1] = hexchars[sigval >> 4];
+- output_buffer[2] = hexchars[sigval & 0xf];
+- output_buffer[3] = 0;
+- break;
+-
+- /*
+- * Detach debugger; let CPU run
+- */
+- case 'D':
+- putpacket(output_buffer);
+- goto finish_kgdb;
+- break;
+-
+- case 'd':
+- /* toggle debug flag */
+- break;
+-
+- /*
+- * Return the value of the CPU registers
+- */
+- case 'g':
+- ptr = output_buffer;
+- ptr = mem2hex((char *)®s->reg0, ptr, 32*sizeof(long), 0); /* r0...r31 */
+- ptr = mem2hex((char *)®s->cp0_status, ptr, 6*sizeof(long), 0); /* cp0 */
+- ptr = mem2hex((char *)®s->fpr0, ptr, 32*sizeof(long), 0); /* f0...31 */
+- ptr = mem2hex((char *)®s->cp1_fsr, ptr, 2*sizeof(long), 0); /* cp1 */
+- ptr = mem2hex((char *)®s->frame_ptr, ptr, 2*sizeof(long), 0); /* frp */
+- ptr = mem2hex((char *)®s->cp0_index, ptr, 16*sizeof(long), 0); /* cp0 */
+- break;
+-
+- /*
+- * set the value of the CPU registers - return OK
+- */
+- case 'G':
+- {
+- ptr = &input_buffer[1];
+- hex2mem(ptr, (char *)®s->reg0, 32*sizeof(long), 0, 0);
+- ptr += 32*(2*sizeof(long));
+- hex2mem(ptr, (char *)®s->cp0_status, 6*sizeof(long), 0, 0);
+- ptr += 6*(2*sizeof(long));
+- hex2mem(ptr, (char *)®s->fpr0, 32*sizeof(long), 0, 0);
+- ptr += 32*(2*sizeof(long));
+- hex2mem(ptr, (char *)®s->cp1_fsr, 2*sizeof(long), 0, 0);
+- ptr += 2*(2*sizeof(long));
+- hex2mem(ptr, (char *)®s->frame_ptr, 2*sizeof(long), 0, 0);
+- ptr += 2*(2*sizeof(long));
+- hex2mem(ptr, (char *)®s->cp0_index, 16*sizeof(long), 0, 0);
+- strcpy(output_buffer,"OK");
+- }
+- break;
+-
+- /*
+- * mAA..AA,LLLL Read LLLL bytes at address AA..AA
+- */
+- case 'm':
+- ptr = &input_buffer[1];
+-
+- if (hexToLong(&ptr, &addr)
+- && *ptr++ == ','
+- && hexToInt(&ptr, &length)) {
+- if (mem2hex((char *)addr, output_buffer, length, 1))
+- break;
+- strcpy (output_buffer, "E03");
+- } else
+- strcpy(output_buffer,"E01");
+- break;
+-
+- /*
+- * XAA..AA,LLLL: Write LLLL escaped binary bytes at address AA.AA
+- */
+- case 'X':
+- bflag = 1;
+- /* fall through */
+-
+- /*
+- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK
+- */
+- case 'M':
+- ptr = &input_buffer[1];
+-
+- if (hexToLong(&ptr, &addr)
+- && *ptr++ == ','
+- && hexToInt(&ptr, &length)
+- && *ptr++ == ':') {
+- if (hex2mem(ptr, (char *)addr, length, bflag, 1))
+- strcpy(output_buffer, "OK");
+- else
+- strcpy(output_buffer, "E03");
+- }
+- else
+- strcpy(output_buffer, "E02");
+- break;
+-
+- /*
+- * cAA..AA Continue at address AA..AA(optional)
+- */
+- case 'c':
+- /* try to read optional parameter, pc unchanged if no parm */
+-
+- ptr = &input_buffer[1];
+- if (hexToLong(&ptr, &addr))
+- regs->cp0_epc = addr;
+-
+- goto exit_kgdb_exception;
+- break;
+-
+- /*
+- * kill the program; let us try to restart the machine
+- * Reset the whole machine.
+- */
+- case 'k':
+- case 'r':
+- machine_restart("kgdb restarts machine");
+- break;
+-
+- /*
+- * Step to next instruction
+- */
+- case 's':
+- /*
+- * There is no single step insn in the MIPS ISA, so we
+- * use breakpoints and continue, instead.
+- */
+- single_step(regs);
+- goto exit_kgdb_exception;
+- /* NOTREACHED */
+- break;
+-
+- /*
+- * Set baud rate (bBB)
+- * FIXME: Needs to be written
+- */
+- case 'b':
+- {
+-#if 0
+- int baudrate;
+- extern void set_timer_3();
+-
+- ptr = &input_buffer[1];
+- if (!hexToInt(&ptr, &baudrate))
+- {
+- strcpy(output_buffer,"B01");
+- break;
+- }
+-
+- /* Convert baud rate to uart clock divider */
+-
+- switch (baudrate)
+- {
+- case 38400:
+- baudrate = 16;
+- break;
+- case 19200:
+- baudrate = 33;
+- break;
+- case 9600:
+- baudrate = 65;
+- break;
+- default:
+- baudrate = 0;
+- strcpy(output_buffer,"B02");
+- goto x1;
+- }
+-
+- if (baudrate) {
+- putpacket("OK"); /* Ack before changing speed */
+- set_timer_3(baudrate); /* Set it */
+- }
+-#endif
+- }
+- break;
+-
+- } /* switch */
+-
+- /*
+- * reply to the request
+- */
+-
+- putpacket(output_buffer);
+-
+- } /* while */
+-
+- return;
+-
+-finish_kgdb:
+- restore_debug_traps();
+-
+-exit_kgdb_exception:
+- /* release locks so other CPUs can go */
+- for (i = num_online_cpus()-1; i >= 0; i--)
+- __raw_spin_unlock(&kgdb_cpulock[i]);
+- spin_unlock(&kgdb_lock);
+-
+- __flush_cache_all();
+- return;
+-}
+-
+-/*
+- * This function will generate a breakpoint exception. It is used at the
+- * beginning of a program to sync up with a debugger and can be used
+- * otherwise as a quick means to stop program execution and "break" into
+- * the debugger.
+- */
+-void breakpoint(void)
+-{
+- if (!initialized)
+- return;
+-
+- __asm__ __volatile__(
+- ".globl breakinst\n\t"
+- ".set\tnoreorder\n\t"
+- "nop\n"
+- "breakinst:\tbreak\n\t"
+- "nop\n\t"
+- ".set\treorder"
+- );
+-}
+-
+-/* Nothing but the break; don't pollute any registers */
+-void async_breakpoint(void)
+-{
+- __asm__ __volatile__(
+- ".globl async_breakinst\n\t"
+- ".set\tnoreorder\n\t"
+- "nop\n"
+- "async_breakinst:\tbreak\n\t"
+- "nop\n\t"
+- ".set\treorder"
+- );
+-}
+-
+-void adel(void)
+-{
+- __asm__ __volatile__(
+- ".globl\tadel\n\t"
+- "lui\t$8,0x8000\n\t"
+- "lw\t$9,1($8)\n\t"
+- );
+-}
+-
+-/*
+- * malloc is needed by gdb client in "call func()", even a private one
+- * will make gdb happy
+- */
+-static void * __attribute_used__ malloc(size_t size)
+-{
+- return kmalloc(size, GFP_ATOMIC);
+-}
+-
+-static void __attribute_used__ free (void *where)
+-{
+- kfree(where);
+-}
+-
+-#ifdef CONFIG_GDB_CONSOLE
+-
+-void gdb_putsn(const char *str, int l)
+-{
+- char outbuf[18];
+-
+- if (!kgdb_started)
+- return;
+-
+- outbuf[0]='O';
+-
+- while(l) {
+- int i = (l>8)?8:l;
+- mem2hex((char *)str, &outbuf[1], i, 0);
+- outbuf[(i*2)+1]=0;
+- putpacket(outbuf);
+- str += i;
+- l -= i;
+- }
+-}
+-
+-static void gdb_console_write(struct console *con, const char *s, unsigned n)
+-{
+- gdb_putsn(s, n);
+-}
+-
+-static struct console gdb_console = {
+- .name = "gdb",
+- .write = gdb_console_write,
+- .flags = CON_PRINTBUFFER,
+- .index = -1
+-};
+-
+-static int __init register_gdb_console(void)
+-{
+- register_console(&gdb_console);
+-
+- return 0;
+-}
+-
+-console_initcall(register_gdb_console);
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/irq.c linux-2.6.22-591/arch/mips/kernel/irq.c
+--- linux-2.6.22-570/arch/mips/kernel/irq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -25,6 +25,10 @@
+ #include <asm/atomic.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <asm/kgdb.h>
++
++/* Keep track of if we've done certain initialization already or not. */
++int kgdb_early_setup;
+
+ static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+
+@@ -138,28 +142,23 @@
+ atomic_inc(&irq_err_count);
+ }
+
+-#ifdef CONFIG_KGDB
+-extern void breakpoint(void);
+-extern void set_debug_traps(void);
+-
+-static int kgdb_flag = 1;
+-static int __init nokgdb(char *str)
++void __init init_IRQ(void)
+ {
+- kgdb_flag = 0;
+- return 1;
+-}
+-__setup("nokgdb", nokgdb);
++
++#ifdef CONFIG_KGDB
++ if (kgdb_early_setup)
++ return;
+ #endif
+
+-void __init init_IRQ(void)
+-{
+ arch_init_irq();
+
++
+ #ifdef CONFIG_KGDB
+- if (kgdb_flag) {
+- printk("Wait for gdb client connection ...\n");
+- set_debug_traps();
+- breakpoint();
+- }
++ /*
++ * We have been called before kgdb_arch_init(). Hence,
++ * we dont want the traps to be reinitialized
++ */
++ if (kgdb_early_setup == 0)
++ kgdb_early_setup = 1;
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c
+--- linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,110 @@
++/*
++ * arch/mips/kernel/kgdb-jmp.c
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ * Author: Manish Lachwani <mlachwani@mvista.com>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 1996, 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
++ * Copyright (C) 2005-2006 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/kgdb.h>
++
++#ifdef CONFIG_64BIT
++/*
++ * MIPS 64-bit
++ */
++
++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp)
++{
++ __asm__ __volatile__ ("sd $gp, %0" : : "m" (curr_context[0]));
++ __asm__ __volatile__ ("sd $16, %0" : : "m" (curr_context[1]));
++ __asm__ __volatile__ ("sd $17, %0" : : "m" (curr_context[2]));
++ __asm__ __volatile__ ("sd $18, %0" : : "m" (curr_context[3]));
++ __asm__ __volatile__ ("sd $19, %0" : : "m" (curr_context[4]));
++ __asm__ __volatile__ ("sd $20, %0" : : "m" (curr_context[5]));
++ __asm__ __volatile__ ("sd $21, %0" : : "m" (curr_context[6]));
++ __asm__ __volatile__ ("sd $22, %0" : : "m" (curr_context[7]));
++ __asm__ __volatile__ ("sd $23, %0" : : "m" (curr_context[8]));
++ __asm__ __volatile__ ("sd $31, %0" : : "m" (curr_context[9]));
++ curr_context[10] = sp;
++ curr_context[11] = fp;
++
++ return 0;
++}
++
++void kgdb_fault_longjmp(unsigned long *curr_context)
++{
++ __asm__ __volatile__ ("ld $gp, %0" : : "m" (curr_context[0]));
++ __asm__ __volatile__ ("ld $16, %0" : : "m" (curr_context[1]));
++ __asm__ __volatile__ ("ld $17, %0" : : "m" (curr_context[2]));
++ __asm__ __volatile__ ("ld $18, %0" : : "m" (curr_context[3]));
++ __asm__ __volatile__ ("ld $19, %0" : : "m" (curr_context[4]));
++ __asm__ __volatile__ ("ld $20, %0" : : "m" (curr_context[5]));
++ __asm__ __volatile__ ("ld $21, %0" : : "m" (curr_context[6]));
++ __asm__ __volatile__ ("ld $22, %0" : : "m" (curr_context[7]));
++ __asm__ __volatile__ ("ld $23, %0" : : "m" (curr_context[8]));
++ __asm__ __volatile__ ("ld $25, %0" : : "m" (curr_context[9]));
++ __asm__ __volatile__ ("ld $29, %0\n\t"
++ "ld $30, %1\n\t" : :
++ "m" (curr_context[10]), "m" (curr_context[11]));
++
++ __asm__ __volatile__ ("dli $2, 1");
++ __asm__ __volatile__ ("j $25");
++
++ for (;;);
++}
++#else
++/*
++ * MIPS 32-bit
++ */
++
++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp)
++{
++ __asm__ __volatile__("sw $gp, %0" : : "m" (curr_context[0]));
++ __asm__ __volatile__("sw $16, %0" : : "m" (curr_context[1]));
++ __asm__ __volatile__("sw $17, %0" : : "m" (curr_context[2]));
++ __asm__ __volatile__("sw $18, %0" : : "m" (curr_context[3]));
++ __asm__ __volatile__("sw $19, %0" : : "m" (curr_context[4]));
++ __asm__ __volatile__("sw $20, %0" : : "m" (curr_context[5]));
++ __asm__ __volatile__("sw $21, %0" : : "m" (curr_context[6]));
++ __asm__ __volatile__("sw $22, %0" : : "m" (curr_context[7]));
++ __asm__ __volatile__("sw $23, %0" : : "m" (curr_context[8]));
++ __asm__ __volatile__("sw $31, %0" : : "m" (curr_context[9]));
++ curr_context[10] = sp;
++ curr_context[11] = fp;
++
++ return 0;
++}
++
++void kgdb_fault_longjmp(unsigned long *curr_context)
++{
++ __asm__ __volatile__("lw $gp, %0" : : "m" (curr_context[0]));
++ __asm__ __volatile__("lw $16, %0" : : "m" (curr_context[1]));
++ __asm__ __volatile__("lw $17, %0" : : "m" (curr_context[2]));
++ __asm__ __volatile__("lw $18, %0" : : "m" (curr_context[3]));
++ __asm__ __volatile__("lw $19, %0" : : "m" (curr_context[4]));
++ __asm__ __volatile__("lw $20, %0" : : "m" (curr_context[5]));
++ __asm__ __volatile__("lw $21, %0" : : "m" (curr_context[6]));
++ __asm__ __volatile__("lw $22, %0" : : "m" (curr_context[7]));
++ __asm__ __volatile__("lw $23, %0" : : "m" (curr_context[8]));
++ __asm__ __volatile__("lw $25, %0" : : "m" (curr_context[9]));
++
++ __asm__ __volatile__("lw $29, %0\n\t"
++ "lw $30, %1\n\t" : :
++ "m" (curr_context[10]), "m" (curr_context[11]));
++
++ __asm__ __volatile__("li $2, 1");
++ __asm__ __volatile__("jr $25");
++
++ for (;;);
++}
++#endif
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S
+--- linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,28 @@
++/*
++ * arch/mips/kernel/kgdb-jmp.c
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Copyright (C) 2005 by MontaVista Software.
++ * Author: Manish Lachwani (mlachwani@mvista.com)
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <asm/asm.h>
++#include <asm/mipsregs.h>
++#include <asm/regdef.h>
++#include <asm/stackframe.h>
++
++ .ent kgdb_fault_setjmp,0
++ENTRY (kgdb_fault_setjmp)
++ move a1, sp
++ move a2, fp
++#ifdef CONFIG_64BIT
++ nop
++#endif
++ j kgdb_fault_setjmp_aux
++ .end kgdb_fault_setjmp
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb.c linux-2.6.22-591/arch/mips/kernel/kgdb.c
+--- linux-2.6.22-570/arch/mips/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/mips/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,299 @@
++/*
++ * arch/mips/kernel/kgdb.c
++ *
++ * Originally written by Glenn Engel, Lake Stevens Instrument Division
++ *
++ * Contributed by HP Systems
++ *
++ * Modified for SPARC by Stu Grossman, Cygnus Support.
++ *
++ * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
++ * Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
++ *
++ * Copyright (C) 1995 Andreas Busse
++ *
++ * Copyright (C) 2003 MontaVista Software Inc.
++ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
++ *
++ * Copyright (C) 2004-2005 MontaVista Software Inc.
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h> /* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/inst.h>
++#include <asm/gdb-stub.h>
++#include <asm/cacheflush.h>
++#include <asm/kdebug.h>
++
++static struct hard_trap_info {
++ unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */
++ unsigned char signo; /* Signal that we map this trap into */
++} hard_trap_info[] = {
++ { 6, SIGBUS }, /* instruction bus error */
++ { 7, SIGBUS }, /* data bus error */
++ { 9, SIGTRAP }, /* break */
++/* { 11, SIGILL }, */ /* CPU unusable */
++ { 12, SIGFPE }, /* overflow */
++ { 13, SIGTRAP }, /* trap */
++ { 14, SIGSEGV }, /* virtual instruction cache coherency */
++ { 15, SIGFPE }, /* floating point exception */
++ { 23, SIGSEGV }, /* watch */
++ { 31, SIGSEGV }, /* virtual data cache coherency */
++ { 0, 0} /* Must be last */
++};
++
++/* Save the normal trap handlers for user-mode traps. */
++void *saved_vectors[32];
++
++extern void trap_low(void);
++extern void breakinst(void);
++extern void init_IRQ(void);
++
++void kgdb_call_nmi_hook(void *ignored)
++{
++ kgdb_nmihook(smp_processor_id(), (void *)0);
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++ local_irq_enable();
++ smp_call_function(kgdb_call_nmi_hook, 0, 0, 0);
++ local_irq_disable();
++}
++
++static int compute_signal(int tt)
++{
++ struct hard_trap_info *ht;
++
++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++ if (ht->tt == tt)
++ return ht->signo;
++
++ return SIGHUP; /* default for things we don't know about */
++}
++
++/*
++ * Set up exception handlers for tracing and breakpoints
++ */
++void handle_exception(struct pt_regs *regs)
++{
++ int trap = (regs->cp0_cause & 0x7c) >> 2;
++
++ if (fixup_exception(regs)) {
++ return;
++ }
++
++ if (atomic_read(&debugger_active))
++ kgdb_nmihook(smp_processor_id(), regs);
++
++ if (atomic_read(&kgdb_setting_breakpoint))
++ if ((trap == 9) && (regs->cp0_epc == (unsigned long)breakinst))
++ regs->cp0_epc += 4;
++
++ kgdb_handle_exception(0, compute_signal(trap), 0, regs);
++
++ /* In SMP mode, __flush_cache_all does IPI */
++ local_irq_enable();
++ __flush_cache_all();
++}
++
++void set_debug_traps(void)
++{
++ struct hard_trap_info *ht;
++ unsigned long flags;
++
++ local_irq_save(flags);
++
++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++ saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low);
++
++ local_irq_restore(flags);
++}
++
++#if 0
++/* This should be called before we exit kgdb_handle_exception() I believe.
++ * -- Tom
++ */
++void restore_debug_traps(void)
++{
++ struct hard_trap_info *ht;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++ set_except_vector(ht->tt, saved_vectors[ht->tt]);
++ local_irq_restore(flags);
++}
++#endif
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ int reg;
++ gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++ for (reg = 0; reg < 32; reg++)
++ *(ptr++) = regs->regs[reg];
++
++ *(ptr++) = regs->cp0_status;
++ *(ptr++) = regs->lo;
++ *(ptr++) = regs->hi;
++ *(ptr++) = regs->cp0_badvaddr;
++ *(ptr++) = regs->cp0_cause;
++ *(ptr++) = regs->cp0_epc;
++
++ return;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++
++ int reg;
++ const gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++ for (reg = 0; reg < 32; reg++)
++ regs->regs[reg] = *(ptr++);
++
++ regs->cp0_status = *(ptr++);
++ regs->lo = *(ptr++);
++ regs->hi = *(ptr++);
++ regs->cp0_badvaddr = *(ptr++);
++ regs->cp0_cause = *(ptr++);
++ regs->cp0_epc = *(ptr++);
++
++ return;
++}
++
++/*
++ * Similar to regs_to_gdb_regs() except that process is sleeping and so
++ * we may not be able to get all the info.
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ int reg;
++ struct thread_info *ti = task_thread_info(p);
++ unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32;
++ struct pt_regs *regs = (struct pt_regs *)ksp - 1;
++ gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++ for (reg = 0; reg < 16; reg++)
++ *(ptr++) = regs->regs[reg];
++
++ /* S0 - S7 */
++ for (reg = 16; reg < 24; reg++)
++ *(ptr++) = regs->regs[reg];
++
++ for (reg = 24; reg < 28; reg++)
++ *(ptr++) = 0;
++
++ /* GP, SP, FP, RA */
++ for (reg = 28; reg < 32; reg++)
++ *(ptr++) = regs->regs[reg];
++
++ *(ptr++) = regs->cp0_status;
++ *(ptr++) = regs->lo;
++ *(ptr++) = regs->hi;
++ *(ptr++) = regs->cp0_badvaddr;
++ *(ptr++) = regs->cp0_cause;
++ *(ptr++) = regs->cp0_epc;
++
++ return;
++}
++
++/*
++ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
++ * then try to fall into the debugger
++ */
++static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd,
++ void *ptr)
++{
++ struct die_args *args = (struct die_args *)ptr;
++ struct pt_regs *regs = args->regs;
++ int trap = (regs->cp0_cause & 0x7c) >> 2;
++
++ /* See if KGDB is interested. */
++ if (user_mode(regs))
++ /* Userpace events, ignore. */
++ return NOTIFY_DONE;
++
++ kgdb_handle_exception(trap, compute_signal(trap), 0, regs);
++ return NOTIFY_OK;
++}
++
++static struct notifier_block kgdb_notifier = {
++ .notifier_call = kgdb_mips_notify,
++};
++
++/*
++ * Handle the 's' and 'c' commands
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++ char *remcom_in_buffer, char *remcom_out_buffer,
++ struct pt_regs *regs)
++{
++ char *ptr;
++ unsigned long address;
++ int cpu = smp_processor_id();
++
++ switch (remcom_in_buffer[0]) {
++ case 's':
++ case 'c':
++ /* handle the optional parameter */
++ ptr = &remcom_in_buffer[1];
++ if (kgdb_hex2long(&ptr, &address))
++ regs->cp0_epc = address;
++
++ atomic_set(&cpu_doing_single_step, -1);
++ if (remcom_in_buffer[0] == 's')
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step, cpu);
++
++ return 0;
++ }
++
++ return -1;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifdef CONFIG_CPU_LITTLE_ENDIAN
++ .gdb_bpt_instr = {0xd},
++#else
++ .gdb_bpt_instr = {0x00, 0x00, 0x00, 0x0d},
++#endif
++};
++
++/*
++ * We use kgdb_early_setup so that functions we need to call now don't
++ * cause trouble when called again later.
++ */
++__init int kgdb_arch_init(void)
++{
++ /* Board-specifics. */
++ /* Force some calls to happen earlier. */
++ if (kgdb_early_setup == 0) {
++ trap_init();
++ init_IRQ();
++ kgdb_early_setup = 1;
++ }
++
++ /* Set our traps. */
++ /* This needs to be done more finely grained again, paired in
++ * a before/after in kgdb_handle_exception(...) -- Tom */
++ set_debug_traps();
++ register_die_notifier(&kgdb_notifier);
++
++ return 0;
++}
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S
+--- linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,339 @@
++/*
++ * arch/mips/kernel/kgdb_handler.S
++ *
++ * Copyright (C) 2007 Wind River Systems, Inc
++ *
++ * Copyright (C) 2004-2005 MontaVista Software Inc.
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++/*
++ * Trap Handler for the new KGDB framework. The main KGDB handler is
++ * handle_exception that will be called from here
++ *
++ */
++
++#include <linux/sys.h>
++
++#include <asm/asm.h>
++#include <asm/errno.h>
++#include <asm/mipsregs.h>
++#include <asm/regdef.h>
++#include <asm/stackframe.h>
++#include <asm/gdb-stub.h>
++
++#ifdef CONFIG_32BIT
++#define DMFC0 mfc0
++#define DMTC0 mtc0
++#define LDC1 lwc1
++#define SDC1 swc1
++#endif
++#ifdef CONFIG_64BIT
++#define DMFC0 dmfc0
++#define DMTC0 dmtc0
++#define LDC1 ldc1
++#define SDC1 sdc1
++#endif
++
++#include <asm/asmmacro.h>
++
++/*
++ * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed)
++ * part is used to store registers and passed to exception handler.
++ * The upper part is reserved for "call func" feature where gdb client
++ * saves some of the regs, setups call frame and passes args.
++ *
++ * A trace shows about 200 bytes are used to store about half of all regs.
++ * The rest should be big enough for frame setup and passing args.
++ */
++
++/*
++ * The low level trap handler
++ */
++ .align 5
++ NESTED(trap_low, GDB_FR_SIZE, sp)
++ .set noat
++ .set noreorder
++
++ mfc0 k0, CP0_STATUS
++ sll k0, 3 /* extract cu0 bit */
++ bltz k0, 1f
++ move k1, sp
++
++ /*
++ * Called from user mode, go somewhere else.
++ */
++#if defined(CONFIG_32BIT)
++ lui k1, %hi(saved_vectors)
++ mfc0 k0, CP0_CAUSE
++ andi k0, k0, 0x7c
++ add k1, k1, k0
++ lw k0, %lo(saved_vectors)(k1)
++#elif defined(CONFIG_64BIT) && defined(CONFIG_BUILD_ELF64)
++ DMFC0 k0, CP0_CAUSE
++ lui k1, %highest(saved_vectors)
++ andi k0, k0, 0x7c /* mask exception type */
++ dsll k0, 1 /* turn into byte offset */
++ daddiu k1, %higher(saved_vectors)
++ dsll k1, k1, 16
++ daddiu k1, %hi(saved_vectors)
++ dsll k1, k1, 16
++ daddu k1, k1, k0
++ LONG_L k0, %lo(saved_vectors)(k1)
++#else
++#error "MIPS configuration is unsupported for kgdb!!"
++#endif
++ jr k0
++ nop
++1:
++ move k0, sp
++ PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above
++ LONG_S k0, GDB_FR_REG29(sp)
++ LONG_S $2, GDB_FR_REG2(sp)
++
++/*
++ * First save the CP0 and special registers
++ */
++
++ mfc0 v0, CP0_STATUS
++ LONG_S v0, GDB_FR_STATUS(sp)
++ mfc0 v0, CP0_CAUSE
++ LONG_S v0, GDB_FR_CAUSE(sp)
++ DMFC0 v0, CP0_EPC
++ LONG_S v0, GDB_FR_EPC(sp)
++ DMFC0 v0, CP0_BADVADDR
++ LONG_S v0, GDB_FR_BADVADDR(sp)
++ mfhi v0
++ LONG_S v0, GDB_FR_HI(sp)
++ mflo v0
++ LONG_S v0, GDB_FR_LO(sp)
++
++/*
++ * Now the integer registers
++ */
++
++ LONG_S zero, GDB_FR_REG0(sp) /* I know... */
++ LONG_S $1, GDB_FR_REG1(sp)
++ /* v0 already saved */
++ LONG_S $3, GDB_FR_REG3(sp)
++ LONG_S $4, GDB_FR_REG4(sp)
++ LONG_S $5, GDB_FR_REG5(sp)
++ LONG_S $6, GDB_FR_REG6(sp)
++ LONG_S $7, GDB_FR_REG7(sp)
++ LONG_S $8, GDB_FR_REG8(sp)
++ LONG_S $9, GDB_FR_REG9(sp)
++ LONG_S $10, GDB_FR_REG10(sp)
++ LONG_S $11, GDB_FR_REG11(sp)
++ LONG_S $12, GDB_FR_REG12(sp)
++ LONG_S $13, GDB_FR_REG13(sp)
++ LONG_S $14, GDB_FR_REG14(sp)
++ LONG_S $15, GDB_FR_REG15(sp)
++ LONG_S $16, GDB_FR_REG16(sp)
++ LONG_S $17, GDB_FR_REG17(sp)
++ LONG_S $18, GDB_FR_REG18(sp)
++ LONG_S $19, GDB_FR_REG19(sp)
++ LONG_S $20, GDB_FR_REG20(sp)
++ LONG_S $21, GDB_FR_REG21(sp)
++ LONG_S $22, GDB_FR_REG22(sp)
++ LONG_S $23, GDB_FR_REG23(sp)
++ LONG_S $24, GDB_FR_REG24(sp)
++ LONG_S $25, GDB_FR_REG25(sp)
++ LONG_S $26, GDB_FR_REG26(sp)
++ LONG_S $27, GDB_FR_REG27(sp)
++ LONG_S $28, GDB_FR_REG28(sp)
++ /* sp already saved */
++ LONG_S $30, GDB_FR_REG30(sp)
++ LONG_S $31, GDB_FR_REG31(sp)
++
++ CLI /* disable interrupts */
++
++/*
++ * Followed by the floating point registers
++ */
++ mfc0 v0, CP0_STATUS /* FPU enabled? */
++ srl v0, v0, 16
++ andi v0, v0, (ST0_CU1 >> 16)
++
++ beqz v0,3f /* disabled, skip */
++ nop
++
++ li t0, 0
++#ifdef CONFIG_64BIT
++ mfc0 t0, CP0_STATUS
++#endif
++ fpu_save_double_kgdb sp t0 t1 # clobbers t1
++
++
++/*
++ * Current stack frame ptr
++ */
++
++3:
++ LONG_S sp, GDB_FR_FRP(sp)
++
++/*
++ * CP0 registers (R4000/R4400 unused registers skipped)
++ */
++
++ mfc0 v0, CP0_INDEX
++ LONG_S v0, GDB_FR_CP0_INDEX(sp)
++ mfc0 v0, CP0_RANDOM
++ LONG_S v0, GDB_FR_CP0_RANDOM(sp)
++ DMFC0 v0, CP0_ENTRYLO0
++ LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp)
++ DMFC0 v0, CP0_ENTRYLO1
++ LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp)
++ DMFC0 v0, CP0_CONTEXT
++ LONG_S v0, GDB_FR_CP0_CONTEXT(sp)
++ mfc0 v0, CP0_PAGEMASK
++ LONG_S v0, GDB_FR_CP0_PAGEMASK(sp)
++ mfc0 v0, CP0_WIRED
++ LONG_S v0, GDB_FR_CP0_WIRED(sp)
++ DMFC0 v0, CP0_ENTRYHI
++ LONG_S v0, GDB_FR_CP0_ENTRYHI(sp)
++ mfc0 v0, CP0_PRID
++ LONG_S v0, GDB_FR_CP0_PRID(sp)
++
++ .set at
++
++/*
++ * Continue with the higher level handler
++ */
++
++ move a0,sp
++
++ jal handle_exception
++ nop
++
++/*
++ * Restore all writable registers, in reverse order
++ */
++
++ .set noat
++
++ LONG_L v0, GDB_FR_CP0_ENTRYHI(sp)
++ LONG_L v1, GDB_FR_CP0_WIRED(sp)
++ DMTC0 v0, CP0_ENTRYHI
++ mtc0 v1, CP0_WIRED
++ LONG_L v0, GDB_FR_CP0_PAGEMASK(sp)
++ LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp)
++ mtc0 v0, CP0_PAGEMASK
++ DMTC0 v1, CP0_ENTRYLO1
++ LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp)
++ LONG_L v1, GDB_FR_CP0_INDEX(sp)
++ DMTC0 v0, CP0_ENTRYLO0
++ LONG_L v0, GDB_FR_CP0_CONTEXT(sp)
++ mtc0 v1, CP0_INDEX
++ DMTC0 v0, CP0_CONTEXT
++
++
++/*
++ * Next, the floating point registers
++ */
++ mfc0 v0, CP0_STATUS /* check if the FPU is enabled */
++ srl v0, v0, 16
++ andi v0, v0, (ST0_CU1 >> 16)
++
++ beqz v0, 3f /* disabled, skip */
++ nop
++
++ li t0, 0
++#ifdef CONFIG_64BIT
++ mfc0 t0, CP0_STATUS
++#endif
++ fpu_restore_double_kgdb sp t0 t1 # clobbers t1
++
++
++/*
++ * Now the CP0 and integer registers
++ */
++
++3:
++ mfc0 t0, CP0_STATUS
++ ori t0, 0x1f
++ xori t0, 0x1f
++ mtc0 t0, CP0_STATUS
++
++ LONG_L v0, GDB_FR_STATUS(sp)
++ LONG_L v1, GDB_FR_EPC(sp)
++ mtc0 v0, CP0_STATUS
++ DMTC0 v1, CP0_EPC
++ LONG_L v0, GDB_FR_HI(sp)
++ LONG_L v1, GDB_FR_LO(sp)
++ mthi v0
++ mtlo v1
++ LONG_L $31, GDB_FR_REG31(sp)
++ LONG_L $30, GDB_FR_REG30(sp)
++ LONG_L $28, GDB_FR_REG28(sp)
++ LONG_L $27, GDB_FR_REG27(sp)
++ LONG_L $26, GDB_FR_REG26(sp)
++ LONG_L $25, GDB_FR_REG25(sp)
++ LONG_L $24, GDB_FR_REG24(sp)
++ LONG_L $23, GDB_FR_REG23(sp)
++ LONG_L $22, GDB_FR_REG22(sp)
++ LONG_L $21, GDB_FR_REG21(sp)
++ LONG_L $20, GDB_FR_REG20(sp)
++ LONG_L $19, GDB_FR_REG19(sp)
++ LONG_L $18, GDB_FR_REG18(sp)
++ LONG_L $17, GDB_FR_REG17(sp)
++ LONG_L $16, GDB_FR_REG16(sp)
++ LONG_L $15, GDB_FR_REG15(sp)
++ LONG_L $14, GDB_FR_REG14(sp)
++ LONG_L $13, GDB_FR_REG13(sp)
++ LONG_L $12, GDB_FR_REG12(sp)
++ LONG_L $11, GDB_FR_REG11(sp)
++ LONG_L $10, GDB_FR_REG10(sp)
++ LONG_L $9, GDB_FR_REG9(sp)
++ LONG_L $8, GDB_FR_REG8(sp)
++ LONG_L $7, GDB_FR_REG7(sp)
++ LONG_L $6, GDB_FR_REG6(sp)
++ LONG_L $5, GDB_FR_REG5(sp)
++ LONG_L $4, GDB_FR_REG4(sp)
++ LONG_L $3, GDB_FR_REG3(sp)
++ LONG_L $2, GDB_FR_REG2(sp)
++ LONG_L $1, GDB_FR_REG1(sp)
++#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
++ LONG_L k0, GDB_FR_EPC(sp)
++ LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */
++ jr k0
++ rfe
++#else
++ LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */
++
++ .set mips3
++ eret
++ .set mips0
++#endif
++ .set at
++ .set reorder
++ END(trap_low)
++
++LEAF(kgdb_read_byte)
++4: lb t0, (a0)
++ sb t0, (a1)
++ li v0, 0
++ jr ra
++ .section __ex_table,"a"
++ PTR 4b, kgdbfault
++ .previous
++ END(kgdb_read_byte)
++
++LEAF(kgdb_write_byte)
++5: sb a0, (a1)
++ li v0, 0
++ jr ra
++ .section __ex_table,"a"
++ PTR 5b, kgdbfault
++ .previous
++ END(kgdb_write_byte)
++
++ .type kgdbfault@function
++ .ent kgdbfault
++
++kgdbfault: li v0, -EFAULT
++ jr ra
++ .end kgdbfault
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/traps.c linux-2.6.22-591/arch/mips/kernel/traps.c
+--- linux-2.6.22-570/arch/mips/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/mips/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -10,6 +10,8 @@
+ * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000, 01 MIPS Technologies, Inc.
+ * Copyright (C) 2002, 2003, 2004, 2005 Maciej W. Rozycki
++ *
++ * KGDB specific changes - Manish Lachwani (mlachwani@mvista.com)
+ */
+ #include <linux/bug.h>
+ #include <linux/init.h>
+@@ -21,6 +23,7 @@
+ #include <linux/kallsyms.h>
+ #include <linux/bootmem.h>
+ #include <linux/interrupt.h>
++#include <linux/kgdb.h>
+
+ #include <asm/bootinfo.h>
+ #include <asm/branch.h>
+@@ -42,6 +45,7 @@
+ #include <asm/watch.h>
+ #include <asm/types.h>
+ #include <asm/stacktrace.h>
++#include <asm/kdebug.h>
+
+ extern asmlinkage void handle_int(void);
+ extern asmlinkage void handle_tlbm(void);
+@@ -1445,6 +1449,11 @@
+ extern char except_vec4;
+ unsigned long i;
+
++#if defined(CONFIG_KGDB)
++ if (kgdb_early_setup)
++ return; /* Already done */
++#endif
++
+ if (cpu_has_veic || cpu_has_vint)
+ ebase = (unsigned long) alloc_bootmem_low_pages (0x200 + VECTORSPACING*64);
+ else
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -17,4 +17,3 @@
+ #
+
+ obj-y := atlas_int.o atlas_setup.o
+-obj-$(CONFIG_KGDB) += atlas_gdb.o
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,97 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
+- *
+- * This program is free software; you can distribute it and/or modify it
+- * under the terms of the GNU General Public License (Version 2) as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+- * for more details.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * This is the interface to the remote debugger stub.
+- */
+-#include <asm/io.h>
+-#include <asm/mips-boards/atlas.h>
+-#include <asm/mips-boards/saa9730_uart.h>
+-
+-#define INB(a) inb((unsigned long)a)
+-#define OUTB(x,a) outb(x,(unsigned long)a)
+-
+-/*
+- * This is the interface to the remote debugger stub
+- * if the Philips part is used for the debug port,
+- * called from the platform setup code.
+- */
+-void *saa9730_base = (void *)ATLAS_SAA9730_REG;
+-
+-static int saa9730_kgdb_active = 0;
+-
+-#define SAA9730_BAUDCLOCK(baud) (((ATLAS_SAA9730_BAUDCLOCK/(baud))/16)-1)
+-
+-int saa9730_kgdb_hook(int speed)
+-{
+- int baudclock;
+- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+-
+- /*
+- * Clear all interrupts
+- */
+- (void) INB(&kgdb_uart->Lsr);
+- (void) INB(&kgdb_uart->Msr);
+- (void) INB(&kgdb_uart->Thr_Rbr);
+- (void) INB(&kgdb_uart->Iir_Fcr);
+-
+- /*
+- * Now, initialize the UART
+- */
+- /* 8 data bits, one stop bit, no parity */
+- OUTB(SAA9730_LCR_DATA8, &kgdb_uart->Lcr);
+-
+- baudclock = SAA9730_BAUDCLOCK(speed);
+-
+- OUTB((baudclock >> 16) & 0xff, &kgdb_uart->BaudDivMsb);
+- OUTB( baudclock & 0xff, &kgdb_uart->BaudDivLsb);
+-
+- /* Set RTS/DTR active */
+- OUTB(SAA9730_MCR_DTR | SAA9730_MCR_RTS, &kgdb_uart->Mcr);
+- saa9730_kgdb_active = 1;
+-
+- return speed;
+-}
+-
+-int saa9730_putDebugChar(char c)
+-{
+- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+-
+- if (!saa9730_kgdb_active) { /* need to init device first */
+- return 0;
+- }
+-
+- while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_THRE))
+- ;
+- OUTB(c, &kgdb_uart->Thr_Rbr);
+-
+- return 1;
+-}
+-
+-char saa9730_getDebugChar(void)
+-{
+- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+- char c;
+-
+- if (!saa9730_kgdb_active) { /* need to init device first */
+- return 0;
+- }
+- while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_DR))
+- ;
+-
+- c = INB(&kgdb_uart->Thr_Rbr);
+- return(c);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -37,10 +37,6 @@
+ extern void mips_time_init(void);
+ extern unsigned long mips_rtc_get_time(void);
+
+-#ifdef CONFIG_KGDB
+-extern void kgdb_config(void);
+-#endif
+-
+ static void __init serial_init(void);
+
+ const char *get_system_type(void)
+@@ -58,9 +54,6 @@
+
+ serial_init ();
+
+-#ifdef CONFIG_KGDB
+- kgdb_config();
+-#endif
+ mips_reboot_setup();
+
+ board_time_init = mips_time_init;
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c
+--- linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,133 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
+- *
+- * This program is free software; you can distribute it and/or modify it
+- * under the terms of the GNU General Public License (Version 2) as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+- * for more details.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * This is the interface to the remote debugger stub.
+- */
+-#include <linux/types.h>
+-#include <linux/serial.h>
+-#include <linux/serialP.h>
+-#include <linux/serial_reg.h>
+-
+-#include <asm/serial.h>
+-#include <asm/io.h>
+-
+-static struct serial_state rs_table[] = {
+- SERIAL_PORT_DFNS /* Defined in serial.h */
+-};
+-
+-static struct async_struct kdb_port_info = {0};
+-
+-int (*generic_putDebugChar)(char);
+-char (*generic_getDebugChar)(void);
+-
+-static __inline__ unsigned int serial_in(struct async_struct *info, int offset)
+-{
+- return inb(info->port + offset);
+-}
+-
+-static __inline__ void serial_out(struct async_struct *info, int offset,
+- int value)
+-{
+- outb(value, info->port+offset);
+-}
+-
+-int rs_kgdb_hook(int tty_no, int speed) {
+- int t;
+- struct serial_state *ser = &rs_table[tty_no];
+-
+- kdb_port_info.state = ser;
+- kdb_port_info.magic = SERIAL_MAGIC;
+- kdb_port_info.port = ser->port;
+- kdb_port_info.flags = ser->flags;
+-
+- /*
+- * Clear all interrupts
+- */
+- serial_in(&kdb_port_info, UART_LSR);
+- serial_in(&kdb_port_info, UART_RX);
+- serial_in(&kdb_port_info, UART_IIR);
+- serial_in(&kdb_port_info, UART_MSR);
+-
+- /*
+- * Now, initialize the UART
+- */
+- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); /* reset DLAB */
+- if (kdb_port_info.flags & ASYNC_FOURPORT) {
+- kdb_port_info.MCR = UART_MCR_DTR | UART_MCR_RTS;
+- t = UART_MCR_DTR | UART_MCR_OUT1;
+- } else {
+- kdb_port_info.MCR
+- = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2;
+- t = UART_MCR_DTR | UART_MCR_RTS;
+- }
+-
+- kdb_port_info.MCR = t; /* no interrupts, please */
+- serial_out(&kdb_port_info, UART_MCR, kdb_port_info.MCR);
+-
+- /*
+- * and set the speed of the serial port
+- */
+- if (speed == 0)
+- speed = 9600;
+-
+- t = kdb_port_info.state->baud_base / speed;
+- /* set DLAB */
+- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB);
+- serial_out(&kdb_port_info, UART_DLL, t & 0xff);/* LS of divisor */
+- serial_out(&kdb_port_info, UART_DLM, t >> 8); /* MS of divisor */
+- /* reset DLAB */
+- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8);
+-
+- return speed;
+-}
+-
+-int putDebugChar(char c)
+-{
+- return generic_putDebugChar(c);
+-}
+-
+-char getDebugChar(void)
+-{
+- return generic_getDebugChar();
+-}
+-
+-int rs_putDebugChar(char c)
+-{
+-
+- if (!kdb_port_info.state) { /* need to init device first */
+- return 0;
+- }
+-
+- while ((serial_in(&kdb_port_info, UART_LSR) & UART_LSR_THRE) == 0)
+- ;
+-
+- serial_out(&kdb_port_info, UART_TX, c);
+-
+- return 1;
+-}
+-
+-char rs_getDebugChar(void)
+-{
+- if (!kdb_port_info.state) { /* need to init device first */
+- return 0;
+- }
+-
+- while (!(serial_in(&kdb_port_info, UART_LSR) & 1))
+- ;
+-
+- return serial_in(&kdb_port_info, UART_RX);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/init.c linux-2.6.22-591/arch/mips/mips-boards/generic/init.c
+--- linux-2.6.22-570/arch/mips/mips-boards/generic/init.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/generic/init.c 2007-12-21 15:36:11.000000000 -0500
+@@ -37,15 +37,6 @@
+
+ #include <asm/mips-boards/malta.h>
+
+-#ifdef CONFIG_KGDB
+-extern int rs_kgdb_hook(int, int);
+-extern int rs_putDebugChar(char);
+-extern char rs_getDebugChar(void);
+-extern int saa9730_kgdb_hook(int);
+-extern int saa9730_putDebugChar(char);
+-extern char saa9730_getDebugChar(void);
+-#endif
+-
+ int prom_argc;
+ int *_prom_argv, *_prom_envp;
+
+@@ -173,59 +164,6 @@
+ }
+ #endif
+
+-#ifdef CONFIG_KGDB
+-void __init kgdb_config (void)
+-{
+- extern int (*generic_putDebugChar)(char);
+- extern char (*generic_getDebugChar)(void);
+- char *argptr;
+- int line, speed;
+-
+- argptr = prom_getcmdline();
+- if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) {
+- argptr += strlen("kgdb=ttyS");
+- if (*argptr != '0' && *argptr != '1')
+- printk("KGDB: Unknown serial line /dev/ttyS%c, "
+- "falling back to /dev/ttyS1\n", *argptr);
+- line = *argptr == '0' ? 0 : 1;
+- printk("KGDB: Using serial line /dev/ttyS%d for session\n", line);
+-
+- speed = 0;
+- if (*++argptr == ',')
+- {
+- int c;
+- while ((c = *++argptr) && ('0' <= c && c <= '9'))
+- speed = speed * 10 + c - '0';
+- }
+-#ifdef CONFIG_MIPS_ATLAS
+- if (line == 1) {
+- speed = saa9730_kgdb_hook(speed);
+- generic_putDebugChar = saa9730_putDebugChar;
+- generic_getDebugChar = saa9730_getDebugChar;
+- }
+- else
+-#endif
+- {
+- speed = rs_kgdb_hook(line, speed);
+- generic_putDebugChar = rs_putDebugChar;
+- generic_getDebugChar = rs_getDebugChar;
+- }
+-
+- pr_info("KGDB: Using serial line /dev/ttyS%d at %d for "
+- "session, please connect your debugger\n",
+- line ? 1 : 0, speed);
+-
+- {
+- char *s;
+- for (s = "Please connect GDB to this port\r\n"; *s; )
+- generic_putDebugChar (*s++);
+- }
+-
+- /* Breakpoint is invoked after interrupts are initialised */
+- }
+-}
+-#endif
+-
+ void __init mips_nmi_setup (void)
+ {
+ void *base;
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c
+--- linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -39,10 +39,6 @@
+ extern void mips_time_init(void);
+ extern unsigned long mips_rtc_get_time(void);
+
+-#ifdef CONFIG_KGDB
+-extern void kgdb_config(void);
+-#endif
+-
+ struct resource standard_io_resources[] = {
+ { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY },
+ { .name = "timer", .start = 0x40, .end = 0x5f, .flags = IORESOURCE_BUSY },
+@@ -99,10 +95,6 @@
+ */
+ enable_dma(4);
+
+-#ifdef CONFIG_KGDB
+- kgdb_config ();
+-#endif
+-
+ if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) {
+ char *argptr;
+
+diff -Nurb linux-2.6.22-570/arch/mips/mm/extable.c linux-2.6.22-591/arch/mips/mm/extable.c
+--- linux-2.6.22-570/arch/mips/mm/extable.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/mm/extable.c 2007-12-21 15:36:11.000000000 -0500
+@@ -3,6 +3,7 @@
+ */
+ #include <linux/module.h>
+ #include <linux/spinlock.h>
++#include <linux/kgdb.h>
+ #include <asm/branch.h>
+ #include <asm/uaccess.h>
+
+@@ -16,6 +17,12 @@
+
+ return 1;
+ }
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault)
++ /* Restore our previous state. */
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Not reached. */
++#endif
+
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile
+--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -4,5 +4,3 @@
+
+ obj-y += cpci-irq.o irq.o platform.o prom.o reset.o \
+ setup.o uart-irq.o
+-
+-obj-$(CONFIG_KGDB) += dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c
+--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/serial.h> /* For the serial port location and base baud */
+-
+-/* --- CONFIG --- */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-/* --- END OF CONFIG --- */
+-
+-#define UART16550_BAUD_2400 2400
+-#define UART16550_BAUD_4800 4800
+-#define UART16550_BAUD_9600 9600
+-#define UART16550_BAUD_19200 19200
+-#define UART16550_BAUD_38400 38400
+-#define UART16550_BAUD_57600 57600
+-#define UART16550_BAUD_115200 115200
+-
+-#define UART16550_PARITY_NONE 0
+-#define UART16550_PARITY_ODD 0x08
+-#define UART16550_PARITY_EVEN 0x18
+-#define UART16550_PARITY_MARK 0x28
+-#define UART16550_PARITY_SPACE 0x38
+-
+-#define UART16550_DATA_5BIT 0x0
+-#define UART16550_DATA_6BIT 0x1
+-#define UART16550_DATA_7BIT 0x2
+-#define UART16550_DATA_8BIT 0x3
+-
+-#define UART16550_STOP_1BIT 0x0
+-#define UART16550_STOP_2BIT 0x4
+-
+-/* ----------------------------------------------------- */
+-
+-/* === CONFIG === */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define BASE OCELOT_SERIAL1_BASE
+-#define MAX_BAUD OCELOT_BASE_BAUD
+-
+-/* === END OF CONFIG === */
+-
+-#define REG_OFFSET 4
+-
+-/* register offset */
+-#define OFS_RCV_BUFFER 0
+-#define OFS_TRANS_HOLD 0
+-#define OFS_SEND_BUFFER 0
+-#define OFS_INTR_ENABLE (1*REG_OFFSET)
+-#define OFS_INTR_ID (2*REG_OFFSET)
+-#define OFS_DATA_FORMAT (3*REG_OFFSET)
+-#define OFS_LINE_CONTROL (3*REG_OFFSET)
+-#define OFS_MODEM_CONTROL (4*REG_OFFSET)
+-#define OFS_RS232_OUTPUT (4*REG_OFFSET)
+-#define OFS_LINE_STATUS (5*REG_OFFSET)
+-#define OFS_MODEM_STATUS (6*REG_OFFSET)
+-#define OFS_RS232_INPUT (6*REG_OFFSET)
+-#define OFS_SCRATCH_PAD (7*REG_OFFSET)
+-
+-#define OFS_DIVISOR_LSB (0*REG_OFFSET)
+-#define OFS_DIVISOR_MSB (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y)))
+-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+- /* disable interrupts */
+- UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+- /* set up baud rate */
+- {
+- uint32 divisor;
+-
+- /* set DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+- /* set divisor */
+- divisor = MAX_BAUD / baud;
+- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+- /* clear DIAB bit */
+- UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+- }
+-
+- /* set data format */
+- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_38400,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+- return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(UART16550_BAUD_38400,
+- UART16550_DATA_8BIT,
+- UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+- }
+-
+- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+- UART16550_WRITE(OFS_SEND_BUFFER, byte);
+- return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/pci/fixup-atlas.c linux-2.6.22-591/arch/mips/pci/fixup-atlas.c
+--- linux-2.6.22-570/arch/mips/pci/fixup-atlas.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/pci/fixup-atlas.c 2007-12-21 15:36:11.000000000 -0500
+@@ -68,24 +68,3 @@
+ {
+ return 0;
+ }
+-
+-#ifdef CONFIG_KGDB
+-/*
+- * The PCI scan may have moved the saa9730 I/O address, so reread
+- * the address here.
+- * This does mean that it's not possible to debug the PCI bus configuration
+- * code, but it is better than nothing...
+- */
+-
+-static void atlas_saa9730_base_fixup (struct pci_dev *pdev)
+-{
+- extern void *saa9730_base;
+- if (pdev->bus == 0 && PCI_SLOT(pdev->devfn) == 19)
+- (void) pci_read_config_dword (pdev, 0x14, (u32 *)&saa9730_base);
+- printk ("saa9730_base = %x\n", saa9730_base);
+-}
+-
+-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PHILIPS, PCI_DEVICE_ID_PHILIPS_SAA9730,
+- atlas_saa9730_base_fixup);
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -24,4 +24,3 @@
+
+ obj-y := setup.o prom.o int.o reset.o time.o proc.o platform.o
+ obj-$(CONFIG_PCI) += pci.o
+-obj-$(CONFIG_KGDB) += gdb_hook.o
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,109 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
+- *
+- * ########################################################################
+- *
+- * This program is free software; you can distribute it and/or modify it
+- * under the terms of the GNU General Public License (Version 2) as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope it will be useful, but WITHOUT
+- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+- * for more details.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * ########################################################################
+- *
+- * This is the interface to the remote debugger stub.
+- *
+- */
+-#include <linux/types.h>
+-#include <linux/serial.h>
+-#include <linux/serialP.h>
+-#include <linux/serial_reg.h>
+-#include <linux/serial_ip3106.h>
+-
+-#include <asm/serial.h>
+-#include <asm/io.h>
+-
+-#include <uart.h>
+-
+-static struct serial_state rs_table[IP3106_NR_PORTS] = {
+-};
+-static struct async_struct kdb_port_info = {0};
+-
+-void rs_kgdb_hook(int tty_no)
+-{
+- struct serial_state *ser = &rs_table[tty_no];
+-
+- kdb_port_info.state = ser;
+- kdb_port_info.magic = SERIAL_MAGIC;
+- kdb_port_info.port = tty_no;
+- kdb_port_info.flags = ser->flags;
+-
+- /*
+- * Clear all interrupts
+- */
+- /* Clear all the transmitter FIFO counters (pointer and status) */
+- ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_TX_RST;
+- /* Clear all the receiver FIFO counters (pointer and status) */
+- ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_RX_RST;
+- /* Clear all interrupts */
+- ip3106_iclr(UART_BASE, tty_no) = IP3106_UART_INT_ALLRX |
+- IP3106_UART_INT_ALLTX;
+-
+- /*
+- * Now, initialize the UART
+- */
+- ip3106_lcr(UART_BASE, tty_no) = IP3106_UART_LCR_8BIT;
+- ip3106_baud(UART_BASE, tty_no) = 5; // 38400 Baud
+-}
+-
+-int putDebugChar(char c)
+-{
+- /* Wait until FIFO not full */
+- while (((ip3106_fifo(UART_BASE, kdb_port_info.port) & IP3106_UART_FIFO_TXFIFO) >> 16) >= 16)
+- ;
+- /* Send one char */
+- ip3106_fifo(UART_BASE, kdb_port_info.port) = c;
+-
+- return 1;
+-}
+-
+-char getDebugChar(void)
+-{
+- char ch;
+-
+- /* Wait until there is a char in the FIFO */
+- while (!((ip3106_fifo(UART_BASE, kdb_port_info.port) &
+- IP3106_UART_FIFO_RXFIFO) >> 8))
+- ;
+- /* Read one char */
+- ch = ip3106_fifo(UART_BASE, kdb_port_info.port) &
+- IP3106_UART_FIFO_RBRTHR;
+- /* Advance the RX FIFO read pointer */
+- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_NEXT;
+- return (ch);
+-}
+-
+-void rs_disable_debug_interrupts(void)
+-{
+- ip3106_ien(UART_BASE, kdb_port_info.port) = 0; /* Disable all interrupts */
+-}
+-
+-void rs_enable_debug_interrupts(void)
+-{
+- /* Clear all the transmitter FIFO counters (pointer and status) */
+- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_TX_RST;
+- /* Clear all the receiver FIFO counters (pointer and status) */
+- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_RST;
+- /* Clear all interrupts */
+- ip3106_iclr(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX |
+- IP3106_UART_INT_ALLTX;
+- ip3106_ien(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX; /* Enable RX interrupts */
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -145,16 +145,5 @@
+ ip3106_baud(UART_BASE, pnx8550_console_port) = 5;
+ }
+
+-#ifdef CONFIG_KGDB
+- argptr = prom_getcmdline();
+- if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) {
+- int line;
+- argptr += strlen("kgdb=ttyS");
+- line = *argptr == '0' ? 0 : 1;
+- rs_kgdb_hook(line);
+- pr_info("KGDB: Using ttyS%i for session, "
+- "please connect your debugger\n", line ? 1 : 0);
+- }
+-#endif
+ return;
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -4,5 +4,4 @@
+
+ obj-y += irq.o i2c-yosemite.o prom.o py-console.o setup.o
+
+-obj-$(CONFIG_KGDB) += dbg_io.o
+ obj-$(CONFIG_SMP) += smp.o
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,180 +0,0 @@
+-/*
+- * Copyright 2003 PMC-Sierra
+- * Author: Manish Lachwani (lachwani@pmc-sierra.com)
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-/*
+- * Support for KGDB for the Yosemite board. We make use of single serial
+- * port to be used for KGDB as well as console. The second serial port
+- * seems to be having a problem. Single IRQ is allocated for both the
+- * ports. Hence, the interrupt routing code needs to figure out whether
+- * the interrupt came from channel A or B.
+- */
+-
+-#include <asm/serial.h>
+-
+-/*
+- * Baud rate, Parity, Data and Stop bit settings for the
+- * serial port on the Yosemite. Note that the Early printk
+- * patch has been added. So, we should be all set to go
+- */
+-#define YOSEMITE_BAUD_2400 2400
+-#define YOSEMITE_BAUD_4800 4800
+-#define YOSEMITE_BAUD_9600 9600
+-#define YOSEMITE_BAUD_19200 19200
+-#define YOSEMITE_BAUD_38400 38400
+-#define YOSEMITE_BAUD_57600 57600
+-#define YOSEMITE_BAUD_115200 115200
+-
+-#define YOSEMITE_PARITY_NONE 0
+-#define YOSEMITE_PARITY_ODD 0x08
+-#define YOSEMITE_PARITY_EVEN 0x18
+-#define YOSEMITE_PARITY_MARK 0x28
+-#define YOSEMITE_PARITY_SPACE 0x38
+-
+-#define YOSEMITE_DATA_5BIT 0x0
+-#define YOSEMITE_DATA_6BIT 0x1
+-#define YOSEMITE_DATA_7BIT 0x2
+-#define YOSEMITE_DATA_8BIT 0x3
+-
+-#define YOSEMITE_STOP_1BIT 0x0
+-#define YOSEMITE_STOP_2BIT 0x4
+-
+-/* This is crucial */
+-#define SERIAL_REG_OFS 0x1
+-
+-#define SERIAL_RCV_BUFFER 0x0
+-#define SERIAL_TRANS_HOLD 0x0
+-#define SERIAL_SEND_BUFFER 0x0
+-#define SERIAL_INTR_ENABLE (1 * SERIAL_REG_OFS)
+-#define SERIAL_INTR_ID (2 * SERIAL_REG_OFS)
+-#define SERIAL_DATA_FORMAT (3 * SERIAL_REG_OFS)
+-#define SERIAL_LINE_CONTROL (3 * SERIAL_REG_OFS)
+-#define SERIAL_MODEM_CONTROL (4 * SERIAL_REG_OFS)
+-#define SERIAL_RS232_OUTPUT (4 * SERIAL_REG_OFS)
+-#define SERIAL_LINE_STATUS (5 * SERIAL_REG_OFS)
+-#define SERIAL_MODEM_STATUS (6 * SERIAL_REG_OFS)
+-#define SERIAL_RS232_INPUT (6 * SERIAL_REG_OFS)
+-#define SERIAL_SCRATCH_PAD (7 * SERIAL_REG_OFS)
+-
+-#define SERIAL_DIVISOR_LSB (0 * SERIAL_REG_OFS)
+-#define SERIAL_DIVISOR_MSB (1 * SERIAL_REG_OFS)
+-
+-/*
+- * Functions to READ and WRITE to serial port 0
+- */
+-#define SERIAL_READ(ofs) (*((volatile unsigned char*) \
+- (TITAN_SERIAL_BASE + ofs)))
+-
+-#define SERIAL_WRITE(ofs, val) ((*((volatile unsigned char*) \
+- (TITAN_SERIAL_BASE + ofs))) = val)
+-
+-/*
+- * Functions to READ and WRITE to serial port 1
+- */
+-#define SERIAL_READ_1(ofs) (*((volatile unsigned char*) \
+- (TITAN_SERIAL_BASE_1 + ofs)))
+-
+-#define SERIAL_WRITE_1(ofs, val) ((*((volatile unsigned char*) \
+- (TITAN_SERIAL_BASE_1 + ofs))) = val)
+-
+-/*
+- * Second serial port initialization
+- */
+-void init_second_port(void)
+-{
+- /* Disable Interrupts */
+- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0);
+- SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0x0);
+-
+- {
+- unsigned int divisor;
+-
+- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x80);
+- divisor = TITAN_SERIAL_BASE_BAUD / YOSEMITE_BAUD_115200;
+- SERIAL_WRITE_1(SERIAL_DIVISOR_LSB, divisor & 0xff);
+-
+- SERIAL_WRITE_1(SERIAL_DIVISOR_MSB,
+- (divisor & 0xff00) >> 8);
+- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0);
+- }
+-
+- SERIAL_WRITE_1(SERIAL_DATA_FORMAT, YOSEMITE_DATA_8BIT |
+- YOSEMITE_PARITY_NONE | YOSEMITE_STOP_1BIT);
+-
+- /* Enable Interrupts */
+- SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0xf);
+-}
+-
+-/* Initialize the serial port for KGDB debugging */
+-void debugInit(unsigned int baud, unsigned char data, unsigned char parity,
+- unsigned char stop)
+-{
+- /* Disable Interrupts */
+- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0);
+- SERIAL_WRITE(SERIAL_INTR_ENABLE, 0x0);
+-
+- {
+- unsigned int divisor;
+-
+- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x80);
+-
+- divisor = TITAN_SERIAL_BASE_BAUD / baud;
+- SERIAL_WRITE(SERIAL_DIVISOR_LSB, divisor & 0xff);
+-
+- SERIAL_WRITE(SERIAL_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0);
+- }
+-
+- SERIAL_WRITE(SERIAL_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-unsigned char getDebugChar(void)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(YOSEMITE_BAUD_115200,
+- YOSEMITE_DATA_8BIT,
+- YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT);
+- }
+-
+- while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x1) == 0);
+- return SERIAL_READ(SERIAL_RCV_BUFFER);
+-}
+-
+-int putDebugChar(unsigned char byte)
+-{
+- if (!remoteDebugInitialized) {
+- remoteDebugInitialized = 1;
+- debugInit(YOSEMITE_BAUD_115200,
+- YOSEMITE_DATA_8BIT,
+- YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT);
+- }
+-
+- while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x20) == 0);
+- SERIAL_WRITE(SERIAL_SEND_BUFFER, byte);
+-
+- return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -137,10 +137,6 @@
+ }
+ }
+
+-#ifdef CONFIG_KGDB
+-extern void init_second_port(void);
+-#endif
+-
+ /*
+ * Initialize the next level interrupt handler
+ */
+@@ -152,11 +148,6 @@
+ rm7k_cpu_irq_init();
+ rm9k_cpu_irq_init();
+
+-#ifdef CONFIG_KGDB
+- /* At this point, initialize the second serial port */
+- init_second_port();
+-#endif
+-
+ #ifdef CONFIG_GDB_CONSOLE
+ register_gdb_console();
+ #endif
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c
+--- linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -101,30 +101,6 @@
+ add_preferred_console("arc", 0, NULL);
+ }
+
+-#ifdef CONFIG_KGDB
+- {
+- char *kgdb_ttyd = prom_getcmdline();
+-
+- if ((kgdb_ttyd = strstr(kgdb_ttyd, "kgdb=ttyd")) != NULL) {
+- int line;
+- kgdb_ttyd += strlen("kgdb=ttyd");
+- if (*kgdb_ttyd != '1' && *kgdb_ttyd != '2')
+- printk(KERN_INFO "KGDB: Uknown serial line /dev/ttyd%c"
+- ", falling back to /dev/ttyd1\n", *kgdb_ttyd);
+- line = *kgdb_ttyd == '2' ? 0 : 1;
+- printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for "
+- "session\n", line ? 1 : 2);
+- rs_kgdb_hook(line);
+-
+- printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for "
+- "session, please connect your debugger\n", line ? 1:2);
+-
+- kgdb_enabled = 1;
+- /* Breakpoints and stuff are in sgi_irq_setup() */
+- }
+- }
+-#endif
+-
+ #if defined(CONFIG_VT) && defined(CONFIG_SGI_NEWPORT_CONSOLE)
+ {
+ ULONG *gfxinfo;
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/Makefile linux-2.6.22-591/arch/mips/sgi-ip27/Makefile
+--- linux-2.6.22-570/arch/mips/sgi-ip27/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sgi-ip27/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -7,5 +7,4 @@
+ ip27-xtalk.o
+
+ obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o
+-obj-$(CONFIG_KGDB) += ip27-dbgio.o
+ obj-$(CONFIG_SMP) += ip27-smp.o
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c
+--- linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,60 +0,0 @@
+-/*
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Copyright 2004 Ralf Baechle <ralf@linux-mips.org>
+- */
+-#include <asm/sn/addrs.h>
+-#include <asm/sn/sn0/hub.h>
+-#include <asm/sn/klconfig.h>
+-#include <asm/sn/ioc3.h>
+-#include <asm/sn/sn_private.h>
+-
+-#include <linux/serial.h>
+-#include <linux/serial_core.h>
+-#include <linux/serial_reg.h>
+-
+-#define IOC3_CLK (22000000 / 3)
+-#define IOC3_FLAGS (0)
+-
+-static inline struct ioc3_uartregs *console_uart(void)
+-{
+- struct ioc3 *ioc3;
+-
+- ioc3 = (struct ioc3 *)KL_CONFIG_CH_CONS_INFO(get_nasid())->memory_base;
+-
+- return &ioc3->sregs.uarta;
+-}
+-
+-unsigned char getDebugChar(void)
+-{
+- struct ioc3_uartregs *uart = console_uart();
+-
+- while ((uart->iu_lsr & UART_LSR_DR) == 0);
+- return uart->iu_rbr;
+-}
+-
+-void putDebugChar(unsigned char c)
+-{
+- struct ioc3_uartregs *uart = console_uart();
+-
+- while ((uart->iu_lsr & UART_LSR_THRE) == 0);
+- uart->iu_thr = c;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c
+--- linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -57,30 +57,6 @@
+ extern unsigned long ht_eoi_space;
+ #endif
+
+-#ifdef CONFIG_KGDB
+-#include <asm/gdb-stub.h>
+-extern void breakpoint(void);
+-static int kgdb_irq;
+-#ifdef CONFIG_GDB_CONSOLE
+-extern void register_gdb_console(void);
+-#endif
+-
+-/* kgdb is on when configured. Pass "nokgdb" kernel arg to turn it off */
+-static int kgdb_flag = 1;
+-static int __init nokgdb(char *str)
+-{
+- kgdb_flag = 0;
+- return 1;
+-}
+-__setup("nokgdb", nokgdb);
+-
+-/* Default to UART1 */
+-int kgdb_port = 1;
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-extern char sb1250_duart_present[];
+-#endif
+-#endif
+-
+ static struct irq_chip bcm1480_irq_type = {
+ .name = "BCM1480-IMR",
+ .ack = ack_bcm1480_irq,
+@@ -394,62 +370,11 @@
+ * does its own management of IP7.
+ */
+
+-#ifdef CONFIG_KGDB
+- imask |= STATUSF_IP6;
+-#endif
+ /* Enable necessary IPs, disable the rest */
+ change_c0_status(ST0_IM, imask);
+
+-#ifdef CONFIG_KGDB
+- if (kgdb_flag) {
+- kgdb_irq = K_BCM1480_INT_UART_0 + kgdb_port;
+-
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+- sb1250_duart_present[kgdb_port] = 0;
+-#endif
+- /* Setup uart 1 settings, mapper */
+- /* QQQ FIXME */
+- __raw_writeq(M_DUART_IMR_BRK, IO_SPACE_BASE + A_DUART_IMRREG(kgdb_port));
+-
+- bcm1480_steal_irq(kgdb_irq);
+- __raw_writeq(IMR_IP6_VAL,
+- IO_SPACE_BASE + A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) +
+- (kgdb_irq<<3));
+- bcm1480_unmask_irq(0, kgdb_irq);
+-
+-#ifdef CONFIG_GDB_CONSOLE
+- register_gdb_console();
+-#endif
+- printk("Waiting for GDB on UART port %d\n", kgdb_port);
+- set_debug_traps();
+- breakpoint();
+- }
+-#endif
+ }
+
+-#ifdef CONFIG_KGDB
+-
+-#include <linux/delay.h>
+-
+-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-static void bcm1480_kgdb_interrupt(void)
+-{
+- /*
+- * Clear break-change status (allow some time for the remote
+- * host to stop the break, since we would see another
+- * interrupt on the end-of-break too)
+- */
+- kstat.irqs[smp_processor_id()][kgdb_irq]++;
+- mdelay(500);
+- duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
+- M_DUART_RX_EN | M_DUART_TX_EN);
+- set_async_breakpoint(&get_irq_regs()->cp0_epc);
+-}
+-
+-#endif /* CONFIG_KGDB */
+-
+ extern void bcm1480_timer_interrupt(void);
+ extern void bcm1480_mailbox_interrupt(void);
+
+@@ -478,11 +403,6 @@
+ bcm1480_mailbox_interrupt();
+ #endif
+
+-#ifdef CONFIG_KGDB
+- else if (pending & CAUSEF_IP6)
+- bcm1480_kgdb_interrupt(); /* KGDB (uart 1) */
+-#endif
+-
+ else if (pending & CAUSEF_IP2) {
+ unsigned long long mask_h, mask_l;
+ unsigned long base;
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c
+--- linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -58,10 +58,6 @@
+ extern unsigned long initrd_start, initrd_end;
+ #endif
+
+-#ifdef CONFIG_KGDB
+-extern int kgdb_port;
+-#endif
+-
+ static void ATTRIB_NORET cfe_linux_exit(void *arg)
+ {
+ int warm = *(int *)arg;
+@@ -242,9 +238,6 @@
+ int argc = fw_arg0;
+ char **envp = (char **) fw_arg2;
+ int *prom_vec = (int *) fw_arg3;
+-#ifdef CONFIG_KGDB
+- char *arg;
+-#endif
+
+ _machine_restart = cfe_linux_restart;
+ _machine_halt = cfe_linux_halt;
+@@ -308,13 +301,6 @@
+ }
+ }
+
+-#ifdef CONFIG_KGDB
+- if ((arg = strstr(arcs_cmdline,"kgdb=duart")) != NULL)
+- kgdb_port = (arg[10] == '0') ? 0 : 1;
+- else
+- kgdb_port = 1;
+-#endif
+-
+ #ifdef CONFIG_BLK_DEV_INITRD
+ {
+ char *ptr;
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -3,3 +3,4 @@
+ obj-$(CONFIG_SMP) += smp.o
+ obj-$(CONFIG_SIBYTE_STANDALONE) += prom.o
+ obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o
++obj-$(CONFIG_KGDB_SIBYTE) += kgdb_sibyte.o
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <asm/signal.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
++#include <asm/kgdb.h>
+
+ #include <asm/sibyte/sb1250_regs.h>
+ #include <asm/sibyte/sb1250_int.h>
+@@ -56,16 +57,6 @@
+ extern unsigned long ldt_eoi_space;
+ #endif
+
+-#ifdef CONFIG_KGDB
+-static int kgdb_irq;
+-
+-/* Default to UART1 */
+-int kgdb_port = 1;
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-extern char sb1250_duart_present[];
+-#endif
+-#endif
+-
+ static struct irq_chip sb1250_irq_type = {
+ .name = "SB1250-IMR",
+ .ack = ack_sb1250_irq,
+@@ -304,6 +295,11 @@
+ unsigned int imask = STATUSF_IP4 | STATUSF_IP3 | STATUSF_IP2 |
+ STATUSF_IP1 | STATUSF_IP0;
+
++#ifdef CONFIG_KGDB
++ if (kgdb_early_setup)
++ return;
++#endif
++
+ /* Default everything to IP2 */
+ for (i = 0; i < SB1250_NR_IRQS; i++) { /* was I0 */
+ __raw_writeq(IMR_IP2_VAL,
+@@ -349,58 +345,16 @@
+ * does its own management of IP7.
+ */
+
+-#ifdef CONFIG_KGDB
++#ifdef CONFIG_KGDB_SIBYTE
+ imask |= STATUSF_IP6;
+ #endif
+ /* Enable necessary IPs, disable the rest */
+ change_c0_status(ST0_IM, imask);
+-
+-#ifdef CONFIG_KGDB
+- if (kgdb_flag) {
+- kgdb_irq = K_INT_UART_0 + kgdb_port;
+-
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+- sb1250_duart_present[kgdb_port] = 0;
+-#endif
+- /* Setup uart 1 settings, mapper */
+- __raw_writeq(M_DUART_IMR_BRK,
+- IOADDR(A_DUART_IMRREG(kgdb_port)));
+-
+- sb1250_steal_irq(kgdb_irq);
+- __raw_writeq(IMR_IP6_VAL,
+- IOADDR(A_IMR_REGISTER(0,
+- R_IMR_INTERRUPT_MAP_BASE) +
+- (kgdb_irq << 3)));
+- sb1250_unmask_irq(0, kgdb_irq);
+- }
+-#endif
+ }
+
+-#ifdef CONFIG_KGDB
+-
+-#include <linux/delay.h>
+-
+-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-static void sb1250_kgdb_interrupt(void)
+-{
+- /*
+- * Clear break-change status (allow some time for the remote
+- * host to stop the break, since we would see another
+- * interrupt on the end-of-break too)
+- */
+- kstat_this_cpu.irqs[kgdb_irq]++;
+- mdelay(500);
+- duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
+- M_DUART_RX_EN | M_DUART_TX_EN);
+- set_async_breakpoint(&get_irq_regs()->cp0_epc);
+-}
+-
+-#endif /* CONFIG_KGDB */
+-
+ extern void sb1250_timer_interrupt(void);
+ extern void sb1250_mailbox_interrupt(void);
++extern void sb1250_kgdb_interrupt(void);
+
+ asmlinkage void plat_irq_dispatch(void)
+ {
+@@ -437,7 +391,7 @@
+ sb1250_mailbox_interrupt();
+ #endif
+
+-#ifdef CONFIG_KGDB
++#ifdef CONFIG_KGDB_SIBYTE
+ else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */
+ sb1250_kgdb_interrupt();
+ #endif
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,144 @@
++/*
++ * arch/mips/sibyte/sb1250/kgdb_sibyte.c
++ *
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * 2004 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++/*
++ * Support for KGDB on the Broadcom Sibyte. The SWARM board
++ * for example does not have a 8250/16550 compatible serial
++ * port. Hence, we need to have a driver for the serial
++ * ports to handle KGDB. This board needs nothing in addition
++ * to what is normally provided by the gdb portion of the stub.
++ */
++
++#include <linux/delay.h>
++#include <linux/kernel_stat.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/io.h>
++#include <asm/sibyte/sb1250.h>
++#include <asm/sibyte/sb1250_regs.h>
++#include <asm/sibyte/sb1250_uart.h>
++#include <asm/sibyte/sb1250_int.h>
++#include <asm/addrspace.h>
++
++int kgdb_port = 1;
++static int kgdb_irq;
++
++extern char sb1250_duart_present[];
++extern int sb1250_steal_irq(int irq);
++
++/* Forward declarations. */
++static void kgdbsibyte_init_duart(void);
++static int kgdb_init_io(void);
++
++#define IMR_IP6_VAL K_INT_MAP_I4
++#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
++#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
++
++static void kgdbsibyte_write_char(u8 c)
++{
++ while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0) ;
++ duart_out(R_DUART_TX_HOLD, c);
++}
++
++static int kgdbsibyte_read_char(void)
++{
++ int ret_char;
++ unsigned int status;
++
++ do {
++ status = duart_in(R_DUART_STATUS);
++ } while ((status & M_DUART_RX_RDY) == 0);
++
++ /*
++ * Check for framing error
++ */
++ if (status & M_DUART_FRM_ERR) {
++ kgdbsibyte_init_duart();
++ kgdbsibyte_write_char('-');
++ return '-';
++ }
++
++ ret_char = duart_in(R_DUART_RX_HOLD);
++
++ return ret_char;
++}
++
++void sb1250_kgdb_interrupt(void)
++{
++ int kgdb_irq = K_INT_UART_0 + kgdb_port;
++
++ /*
++ * Clear break-change status (allow some time for the remote
++ * host to stop the break, since we would see another
++ * interrupt on the end-of-break too)
++ */
++ kstat_this_cpu.irqs[kgdb_irq]++;
++ mdelay(500);
++ duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
++ M_DUART_RX_EN | M_DUART_TX_EN);
++ breakpoint();
++
++}
++
++/*
++ * We use port #1 and we set it for 115200 BAUD, 8n1.
++ */
++static void kgdbsibyte_init_duart(void)
++{
++ /* Set 8n1. */
++ duart_out(R_DUART_MODE_REG_1,
++ V_DUART_BITS_PER_CHAR_8 | V_DUART_PARITY_MODE_NONE);
++ duart_out(R_DUART_MODE_REG_2, M_DUART_STOP_BIT_LEN_1);
++ /* Set baud rate of 115200. */
++ duart_out(R_DUART_CLK_SEL, V_DUART_BAUD_RATE(115200));
++ /* Enable rx and tx */
++ duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN);
++}
++
++static int kgdb_init_io(void)
++{
++#ifdef CONFIG_SIBYTE_SB1250_DUART
++ sb1250_duart_present[kgdb_port] = 0;
++#endif
++
++ kgdbsibyte_init_duart();
++
++ return 0;
++}
++
++/*
++ * Hookup our IRQ line. We will already have been initialized a
++ * this point.
++ */
++static void __init kgdbsibyte_hookup_irq(void)
++{
++ /* Steal the IRQ. */
++ kgdb_irq = K_INT_UART_0 + kgdb_port;
++
++ /* Setup uart 1 settings, mapper */
++ __raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port)));
++
++ sb1250_steal_irq(kgdb_irq);
++
++ __raw_writeq(IMR_IP6_VAL,
++ IOADDR(A_IMR_REGISTER(0, R_IMR_INTERRUPT_MAP_BASE) +
++ (kgdb_irq << 3)));
++
++ sb1250_unmask_irq(0, kgdb_irq);
++}
++
++struct kgdb_io kgdb_io_ops = {
++ .read_char = kgdbsibyte_read_char,
++ .write_char = kgdbsibyte_write_char,
++ .init = kgdb_init_io,
++ .late_init = kgdbsibyte_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile
+--- linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -1,3 +1 @@
+ lib-y = setup.o rtc_xicor1241.o rtc_m41t81.o
+-
+-lib-$(CONFIG_KGDB) += dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c
+--- linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,76 +0,0 @@
+-/*
+- * kgdb debug routines for SiByte boards.
+- *
+- * Copyright (C) 2001 MontaVista Software Inc.
+- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- */
+-
+-/* -------------------- BEGINNING OF CONFIG --------------------- */
+-
+-#include <linux/delay.h>
+-#include <asm/io.h>
+-#include <asm/sibyte/sb1250.h>
+-#include <asm/sibyte/sb1250_regs.h>
+-#include <asm/sibyte/sb1250_uart.h>
+-#include <asm/sibyte/sb1250_int.h>
+-#include <asm/addrspace.h>
+-
+-/*
+- * We use the second serial port for kgdb traffic.
+- * 115200, 8, N, 1.
+- */
+-
+-#define BAUD_RATE 115200
+-#define CLK_DIVISOR V_DUART_BAUD_RATE(BAUD_RATE)
+-#define DATA_BITS V_DUART_BITS_PER_CHAR_8 /* or 7 */
+-#define PARITY V_DUART_PARITY_MODE_NONE /* or even */
+-#define STOP_BITS M_DUART_STOP_BIT_LEN_1 /* or 2 */
+-
+-static int duart_initialized = 0; /* 0: need to be init'ed by kgdb */
+-
+-/* -------------------- END OF CONFIG --------------------- */
+-extern int kgdb_port;
+-
+-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-void putDebugChar(unsigned char c);
+-unsigned char getDebugChar(void);
+-static void
+-duart_init(int clk_divisor, int data, int parity, int stop)
+-{
+- duart_out(R_DUART_MODE_REG_1, data | parity);
+- duart_out(R_DUART_MODE_REG_2, stop);
+- duart_out(R_DUART_CLK_SEL, clk_divisor);
+-
+- duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); /* enable rx and tx */
+-}
+-
+-void
+-putDebugChar(unsigned char c)
+-{
+- if (!duart_initialized) {
+- duart_initialized = 1;
+- duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS);
+- }
+- while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0);
+- duart_out(R_DUART_TX_HOLD, c);
+-}
+-
+-unsigned char
+-getDebugChar(void)
+-{
+- if (!duart_initialized) {
+- duart_initialized = 1;
+- duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS);
+- }
+- while ((duart_in(R_DUART_STATUS) & M_DUART_RX_RDY) == 0) ;
+- return duart_in(R_DUART_RX_HOLD);
+-}
+-
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/Makefile linux-2.6.22-591/arch/mips/tx4927/common/Makefile
+--- linux-2.6.22-570/arch/mips/tx4927/common/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4927/common/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -9,4 +9,3 @@
+ obj-y += tx4927_prom.o tx4927_setup.o tx4927_irq.o
+
+ obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o
+-obj-$(CONFIG_KGDB) += tx4927_dbgio.o
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c
+--- linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,47 +0,0 @@
+-/*
+- * linux/arch/mips/tx4927/common/tx4927_dbgio.c
+- *
+- * kgdb interface for gdb
+- *
+- * Author: MontaVista Software, Inc.
+- * source@mvista.com
+- *
+- * Copyright 2001-2002 MontaVista Software Inc.
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#include <asm/mipsregs.h>
+-#include <asm/system.h>
+-#include <asm/tx4927/tx4927_mips.h>
+-
+-u8 getDebugChar(void)
+-{
+- extern u8 txx9_sio_kdbg_rd(void);
+- return (txx9_sio_kdbg_rd());
+-}
+-
+-
+-int putDebugChar(u8 byte)
+-{
+- extern int txx9_sio_kdbg_wr( u8 ch );
+- return (txx9_sio_kdbg_wr(byte));
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+--- linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -76,7 +76,7 @@
+ #include <linux/hdreg.h>
+ #include <linux/ide.h>
+ #endif
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+@@ -973,9 +973,10 @@
+
+ #endif /* CONFIG_PCI */
+
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined (CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ {
+ extern int early_serial_txx9_setup(struct uart_port *port);
++ extern int txx9_kgdb_add_port(int n, struct uart_port *port);
+ int i;
+ struct uart_port req;
+ for(i = 0; i < 2; i++) {
+@@ -987,7 +988,12 @@
+ req.irq = 32 + i;
+ req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+ req.uartclk = 50000000;
++#ifdef CONFIG_SERIAL_TXX9
+ early_serial_txx9_setup(&req);
++#endif
++#ifdef CONFIG_KGDB_TXX9
++ txx9_kgdb_add_port(i, &req);
++#endif
+ }
+ }
+ #ifdef CONFIG_SERIAL_TXX9_CONSOLE
+@@ -996,7 +1002,7 @@
+ strcat(argptr, " console=ttyS0,38400");
+ }
+ #endif
+-#endif
++#endif /* defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) */
+
+ #ifdef CONFIG_ROOT_NFS
+ argptr = prom_getcmdline();
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/Makefile linux-2.6.22-591/arch/mips/tx4938/common/Makefile
+--- linux-2.6.22-570/arch/mips/tx4938/common/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4938/common/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -7,5 +7,4 @@
+ #
+
+ obj-y += prom.o setup.o irq.o rtc_rx5c348.o
+-obj-$(CONFIG_KGDB) += dbgio.o
+
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c
+--- linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,50 +0,0 @@
+-/*
+- * linux/arch/mips/tx4938/common/dbgio.c
+- *
+- * kgdb interface for gdb
+- *
+- * Author: MontaVista Software, Inc.
+- * source@mvista.com
+- *
+- * Copyright 2005 MontaVista Software Inc.
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of the GNU General Public License as published by the
+- * Free Software Foundation; either version 2 of the License, or (at your
+- * option) any later version.
+- *
+- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write to the Free Software Foundation, Inc.,
+- * 675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp>
+- */
+-
+-#include <asm/mipsregs.h>
+-#include <asm/system.h>
+-#include <asm/tx4938/tx4938_mips.h>
+-
+-extern u8 txx9_sio_kdbg_rd(void);
+-extern int txx9_sio_kdbg_wr( u8 ch );
+-
+-u8 getDebugChar(void)
+-{
+- return (txx9_sio_kdbg_rd());
+-}
+-
+-int putDebugChar(u8 byte)
+-{
+- return (txx9_sio_kdbg_wr(byte));
+-}
+-
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c
+--- linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,7 +30,7 @@
+ #include <asm/io.h>
+ #include <asm/bootinfo.h>
+ #include <asm/tx4938/rbtx4938.h>
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+@@ -924,9 +924,10 @@
+ set_io_port_base(RBTX4938_ETHER_BASE);
+ #endif
+
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined (CONFIG_SERIAL_TXX9) || defined (CONFIG_KGDB_TXX9)
+ {
+ extern int early_serial_txx9_setup(struct uart_port *port);
++ extern int txx9_kgdb_add_port(int n, struct uart_port *port);
+ int i;
+ struct uart_port req;
+ for(i = 0; i < 2; i++) {
+@@ -938,7 +939,12 @@
+ req.irq = 32 + i;
+ req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+ req.uartclk = 50000000;
++#ifdef CONFIG_SERIAL_TXX9
+ early_serial_txx9_setup(&req);
++#endif
++#ifdef CONFIG_KGDB_TXX9
++ txx9_kgdb_add_port(i, &req);
++#endif
+ }
+ }
+ #ifdef CONFIG_SERIAL_TXX9_CONSOLE
+diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig linux-2.6.22-591/arch/powerpc/Kconfig
+--- linux-2.6.22-570/arch/powerpc/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -4,12 +4,7 @@
+
+ mainmenu "Linux/PowerPC Kernel Configuration"
+
+-config PPC64
+- bool "64-bit kernel"
+- default n
+- help
+- This option selects whether a 32-bit or a 64-bit kernel
+- will be built.
++source "arch/powerpc/platforms/Kconfig.cputype"
+
+ config PPC_PM_NEEDS_RTC_LIB
+ bool
+@@ -132,123 +127,6 @@
+ depends on PPC64 && (BROKEN || (PPC_PMAC64 && EXPERIMENTAL))
+ default y
+
+-menu "Processor support"
+-choice
+- prompt "Processor Type"
+- depends on PPC32
+- default 6xx
+-
+-config CLASSIC32
+- bool "52xx/6xx/7xx/74xx"
+- select PPC_FPU
+- select 6xx
+- help
+- There are four families of PowerPC chips supported. The more common
+- types (601, 603, 604, 740, 750, 7400), the Motorola embedded
+- versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
+- embedded versions (403 and 405) and the high end 64 bit Power
+- processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
+-
+- This option is the catch-all for 6xx types, including some of the
+- embedded versions. Unless there is see an option for the specific
+- chip family you are using, you want this option.
+-
+- You do not want this if you are building a kernel for a 64 bit
+- IBM RS/6000 or an Apple G5, choose 6xx.
+-
+- If unsure, select this option
+-
+- Note that the kernel runs in 32-bit mode even on 64-bit chips.
+-
+-config PPC_82xx
+- bool "Freescale 82xx"
+- select 6xx
+- select PPC_FPU
+-
+-config PPC_83xx
+- bool "Freescale 83xx"
+- select 6xx
+- select FSL_SOC
+- select 83xx
+- select PPC_FPU
+- select WANT_DEVICE_TREE
+-
+-config PPC_85xx
+- bool "Freescale 85xx"
+- select E500
+- select FSL_SOC
+- select 85xx
+- select WANT_DEVICE_TREE
+-
+-config PPC_86xx
+- bool "Freescale 86xx"
+- select 6xx
+- select FSL_SOC
+- select FSL_PCIE
+- select PPC_FPU
+- select ALTIVEC
+- help
+- The Freescale E600 SoCs have 74xx cores.
+-
+-config PPC_8xx
+- bool "Freescale 8xx"
+- select FSL_SOC
+- select 8xx
+-
+-config 40x
+- bool "AMCC 40x"
+- select PPC_DCR_NATIVE
+-
+-config 44x
+- bool "AMCC 44x"
+- select PPC_DCR_NATIVE
+- select WANT_DEVICE_TREE
+-
+-config E200
+- bool "Freescale e200"
+-
+-endchoice
+-
+-config POWER4_ONLY
+- bool "Optimize for POWER4"
+- depends on PPC64
+- default n
+- ---help---
+- Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
+- The resulting binary will not work on POWER3 or RS64 processors
+- when compiled with binutils 2.15 or later.
+-
+-config POWER3
+- bool
+- depends on PPC64
+- default y if !POWER4_ONLY
+-
+-config POWER4
+- depends on PPC64
+- def_bool y
+-
+-config 6xx
+- bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 8xx
+- bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 83xx
+- bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 85xx
+- bool
+-
+-config E500
+- bool
+-
+-config PPC_FPU
+- bool
+- default y if PPC64
+-
+ config PPC_DCR_NATIVE
+ bool
+ default n
+@@ -267,134 +145,6 @@
+ depends on PPC64 # not supported on 32 bits yet
+ default n
+
+-config 4xx
+- bool
+- depends on 40x || 44x
+- default y
+-
+-config BOOKE
+- bool
+- depends on E200 || E500 || 44x
+- default y
+-
+-config FSL_BOOKE
+- bool
+- depends on E200 || E500
+- default y
+-
+-config PTE_64BIT
+- bool
+- depends on 44x || E500
+- default y if 44x
+- default y if E500 && PHYS_64BIT
+-
+-config PHYS_64BIT
+- bool 'Large physical address support' if E500
+- depends on 44x || E500
+- select RESOURCES_64BIT
+- default y if 44x
+- ---help---
+- This option enables kernel support for larger than 32-bit physical
+- addresses. This features is not be available on all e500 cores.
+-
+- If in doubt, say N here.
+-
+-config ALTIVEC
+- bool "AltiVec Support"
+- depends on CLASSIC32 || POWER4
+- ---help---
+- This option enables kernel support for the Altivec extensions to the
+- PowerPC processor. The kernel currently supports saving and restoring
+- altivec registers, and turning on the 'altivec enable' bit so user
+- processes can execute altivec instructions.
+-
+- This option is only usefully if you have a processor that supports
+- altivec (G4, otherwise known as 74xx series), but does not have
+- any affect on a non-altivec cpu (it does, however add code to the
+- kernel).
+-
+- If in doubt, say Y here.
+-
+-config SPE
+- bool "SPE Support"
+- depends on E200 || E500
+- default y
+- ---help---
+- This option enables kernel support for the Signal Processing
+- Extensions (SPE) to the PowerPC processor. The kernel currently
+- supports saving and restoring SPE registers, and turning on the
+- 'spe enable' bit so user processes can execute SPE instructions.
+-
+- This option is only useful if you have a processor that supports
+- SPE (e500, otherwise known as 85xx series), but does not have any
+- effect on a non-spe cpu (it does, however add code to the kernel).
+-
+- If in doubt, say Y here.
+-
+-config PPC_STD_MMU
+- bool
+- depends on 6xx || POWER3 || POWER4 || PPC64
+- default y
+-
+-config PPC_STD_MMU_32
+- def_bool y
+- depends on PPC_STD_MMU && PPC32
+-
+-config PPC_MM_SLICES
+- bool
+- default y if HUGETLB_PAGE
+- default n
+-
+-config VIRT_CPU_ACCOUNTING
+- bool "Deterministic task and CPU time accounting"
+- depends on PPC64
+- default y
+- help
+- Select this option to enable more accurate task and CPU time
+- accounting. This is done by reading a CPU counter on each
+- kernel entry and exit and on transitions within the kernel
+- between system, softirq and hardirq state, so there is a
+- small performance impact. This also enables accounting of
+- stolen time on logically-partitioned systems running on
+- IBM POWER5-based machines.
+-
+- If in doubt, say Y here.
+-
+-config SMP
+- depends on PPC_STD_MMU
+- bool "Symmetric multi-processing support"
+- ---help---
+- This enables support for systems with more than one CPU. If you have
+- a system with only one CPU, say N. If you have a system with more
+- than one CPU, say Y. Note that the kernel does not currently
+- support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+- since they have inadequate hardware support for multiprocessor
+- operation.
+-
+- If you say N here, the kernel will run on single and multiprocessor
+- machines, but will use only one CPU of a multiprocessor machine. If
+- you say Y here, the kernel will run on single-processor machines.
+- On a single-processor machine, the kernel will run faster if you say
+- N here.
+-
+- If you don't know what to do here, say N.
+-
+-config NR_CPUS
+- int "Maximum number of CPUs (2-128)"
+- range 2 128
+- depends on SMP
+- default "32" if PPC64
+- default "4"
+-
+-config NOT_COHERENT_CACHE
+- bool
+- depends on 4xx || 8xx || E200
+- default y
+-
+-config CONFIG_CHECK_CACHE_COHERENCY
+- bool
+-endmenu
+-
+ source "init/Kconfig"
+
+ source "arch/powerpc/platforms/Kconfig"
+@@ -686,9 +436,9 @@
+ bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_86xx \
+ || PPC_MPC52xx || (EMBEDDED && (PPC_PSERIES || PPC_ISERIES)) \
+ || MPC7448HPC2 || PPC_PS3 || PPC_HOLLY
+- default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx \
++ default y if !40x && !CPM2 && !8xx && !PPC_83xx \
+ && !PPC_85xx && !PPC_86xx
+- default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
++ default PCI_PERMEDIA if !4xx && !CPM2 && !8xx
+ default PCI_QSPAN if !4xx && !CPM2 && 8xx
+ select ARCH_SUPPORTS_MSI
+ help
+diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig.debug linux-2.6.22-591/arch/powerpc/Kconfig.debug
+--- linux-2.6.22-570/arch/powerpc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -41,52 +41,9 @@
+ This option will add a small amount of overhead to all hypervisor
+ calls.
+
+-config DEBUGGER
+- bool "Enable debugger hooks"
+- depends on DEBUG_KERNEL
+- help
+- Include in-kernel hooks for kernel debuggers. Unless you are
+- intending to debug the kernel, say N here.
+-
+-config KGDB
+- bool "Include kgdb kernel debugger"
+- depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx)
+- select DEBUG_INFO
+- help
+- Include in-kernel hooks for kgdb, the Linux kernel source level
+- debugger. See <http://kgdb.sourceforge.net/> for more information.
+- Unless you are intending to debug the kernel, say N here.
+-
+-choice
+- prompt "Serial Port"
+- depends on KGDB
+- default KGDB_TTYS1
+-
+-config KGDB_TTYS0
+- bool "ttyS0"
+-
+-config KGDB_TTYS1
+- bool "ttyS1"
+-
+-config KGDB_TTYS2
+- bool "ttyS2"
+-
+-config KGDB_TTYS3
+- bool "ttyS3"
+-
+-endchoice
+-
+-config KGDB_CONSOLE
+- bool "Enable serial console thru kgdb port"
+- depends on KGDB && 8xx || CPM2
+- help
+- If you enable this, all serial console messages will be sent
+- over the gdb stub.
+- If unsure, say N.
+-
+ config XMON
+ bool "Include xmon kernel debugger"
+- depends on DEBUGGER
++ depends on DEBUG_KERNEL
+ help
+ Include in-kernel hooks for the xmon kernel monitor/debugger.
+ Unless you are intending to debug the kernel, say N here.
+@@ -116,6 +73,11 @@
+ to say Y here, unless you're building for a memory-constrained
+ system.
+
++config DEBUGGER
++ bool
++ depends on KGDB || XMON
++ default y
++
+ config IRQSTACKS
+ bool "Use separate kernel stacks when processing interrupts"
+ depends on PPC64
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.c linux-2.6.22-591/arch/powerpc/boot/44x.c
+--- linux-2.6.22-570/arch/powerpc/boot/44x.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/44x.c 2007-12-21 15:36:11.000000000 -0500
+@@ -38,3 +38,48 @@
+
+ dt_fixup_memory(0, memsize);
+ }
++
++#define SPRN_DBCR0 0x134
++#define DBCR0_RST_SYSTEM 0x30000000
++
++void ibm44x_dbcr_reset(void)
++{
++ unsigned long tmp;
++
++ asm volatile (
++ "mfspr %0,%1\n"
++ "oris %0,%0,%2@h\n"
++ "mtspr %1,%0"
++ : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
++ );
++
++}
++
++/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
++ * banks into the OPB address space */
++void ibm4xx_fixup_ebc_ranges(const char *ebc)
++{
++ void *devp;
++ u32 bxcr;
++ u32 ranges[EBC_NUM_BANKS*4];
++ u32 *p = ranges;
++ int i;
++
++ for (i = 0; i < EBC_NUM_BANKS; i++) {
++ mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i));
++ bxcr = mfdcr(DCRN_EBC0_CFGDATA);
++
++ if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) {
++ *p++ = i;
++ *p++ = 0;
++ *p++ = bxcr & EBC_BXCR_BAS;
++ *p++ = EBC_BXCR_BANK_SIZE(bxcr);
++ }
++ }
++
++ devp = finddevice(ebc);
++ if (! devp)
++ fatal("Couldn't locate EBC node %s\n\r", ebc);
++
++ setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32));
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.h linux-2.6.22-591/arch/powerpc/boot/44x.h
+--- linux-2.6.22-570/arch/powerpc/boot/44x.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/44x.h 2007-12-21 15:36:11.000000000 -0500
+@@ -11,6 +11,9 @@
+ #define _PPC_BOOT_44X_H_
+
+ void ibm44x_fixup_memsize(void);
++void ibm4xx_fixup_ebc_ranges(const char *ebc);
++
++void ibm44x_dbcr_reset(void);
+ void ebony_init(void *mac0, void *mac1);
+
+ #endif /* _PPC_BOOT_44X_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/Makefile linux-2.6.22-591/arch/powerpc/boot/Makefile
+--- linux-2.6.22-570/arch/powerpc/boot/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -43,8 +43,8 @@
+
+ src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \
+ ns16550.c serial.c simple_alloc.c div64.S util.S \
+- gunzip_util.c elf_util.c $(zlib) devtree.c \
+- 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c
++ gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
++ 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c
+ src-plat := of.c cuboot-83xx.c cuboot-85xx.c holly.c \
+ cuboot-ebony.c treeboot-ebony.c prpmc2800.c
+ src-boot := $(src-wlib) $(src-plat) empty.c
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c 2007-12-21 15:36:11.000000000 -0500
+@@ -12,12 +12,12 @@
+
+ #include "ops.h"
+ #include "stdio.h"
++#include "cuboot.h"
+
+ #define TARGET_83xx
+ #include "ppcboot.h"
+
+ static bd_t bd;
+-extern char _end[];
+ extern char _dtb_start[], _dtb_end[];
+
+ static void platform_fixups(void)
+@@ -52,16 +52,7 @@
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+ {
+- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+- unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+- memcpy(&bd, (bd_t *)r3, sizeof(bd));
+- loader_info.initrd_addr = r4;
+- loader_info.initrd_size = r4 ? r5 - r4 : 0;
+- loader_info.cmdline = (char *)r6;
+- loader_info.cmdline_len = r7 - r6;
+-
+- simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++ CUBOOT_INIT();
+ ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+ serial_console_init();
+ platform_ops.fixups = platform_fixups;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c 2007-12-21 15:36:11.000000000 -0500
+@@ -12,12 +12,12 @@
+
+ #include "ops.h"
+ #include "stdio.h"
++#include "cuboot.h"
+
+ #define TARGET_85xx
+ #include "ppcboot.h"
+
+ static bd_t bd;
+-extern char _end[];
+ extern char _dtb_start[], _dtb_end[];
+
+ static void platform_fixups(void)
+@@ -53,16 +53,7 @@
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+ {
+- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+- unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+- memcpy(&bd, (bd_t *)r3, sizeof(bd));
+- loader_info.initrd_addr = r4;
+- loader_info.initrd_size = r4 ? r5 - r4 : 0;
+- loader_info.cmdline = (char *)r6;
+- loader_info.cmdline_len = r7 - r6;
+-
+- simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++ CUBOOT_INIT();
+ ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+ serial_console_init();
+ platform_ops.fixups = platform_fixups;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c 2007-12-21 15:36:11.000000000 -0500
+@@ -15,28 +15,16 @@
+ #include "ops.h"
+ #include "stdio.h"
+ #include "44x.h"
++#include "cuboot.h"
+
+ #define TARGET_44x
+ #include "ppcboot.h"
+
+ static bd_t bd;
+-extern char _end[];
+-
+-BSS_STACK(4096);
+
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+ {
+- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+- unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+- memcpy(&bd, (bd_t *)r3, sizeof(bd));
+- loader_info.initrd_addr = r4;
+- loader_info.initrd_size = r4 ? r5 : 0;
+- loader_info.cmdline = (char *)r6;
+- loader_info.cmdline_len = r7 - r6;
+-
+- simple_alloc_init(_end, avail_ram, 32, 64);
+-
++ CUBOOT_INIT();
+ ebony_init(&bd.bi_enetaddr, &bd.bi_enet1addr);
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.c linux-2.6.22-591/arch/powerpc/boot/cuboot.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/boot/cuboot.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,35 @@
++/*
++ * Compatibility for old (not device tree aware) U-Boot versions
++ *
++ * Author: Scott Wood <scottwood@freescale.com>
++ * Consolidated using macros by David Gibson <david@gibson.dropbear.id.au>
++ *
++ * Copyright 2007 David Gibson, IBM Corporation.
++ * Copyright (c) 2007 Freescale Semiconductor, Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ */
++
++#include "ops.h"
++#include "stdio.h"
++
++#include "ppcboot.h"
++
++extern char _end[];
++extern char _dtb_start[], _dtb_end[];
++
++void cuboot_init(unsigned long r4, unsigned long r5,
++ unsigned long r6, unsigned long r7,
++ unsigned long end_of_ram)
++{
++ unsigned long avail_ram = end_of_ram - (unsigned long)_end;
++
++ loader_info.initrd_addr = r4;
++ loader_info.initrd_size = r4 ? r5 - r4 : 0;
++ loader_info.cmdline = (char *)r6;
++ loader_info.cmdline_len = r7 - r6;
++
++ simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.h linux-2.6.22-591/arch/powerpc/boot/cuboot.h
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/boot/cuboot.h 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,14 @@
++#ifndef _PPC_BOOT_CUBOOT_H_
++#define _PPC_BOOT_CUBOOT_H_
++
++void cuboot_init(unsigned long r4, unsigned long r5,
++ unsigned long r6, unsigned long r7,
++ unsigned long end_of_ram);
++
++#define CUBOOT_INIT() \
++ do { \
++ memcpy(&bd, (bd_t *)r3, sizeof(bd)); \
++ cuboot_init(r4, r5, r6, r7, bd.bi_memstart + bd.bi_memsize); \
++ } while (0)
++
++#endif /* _PPC_BOOT_CUBOOT_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dcr.h linux-2.6.22-591/arch/powerpc/boot/dcr.h
+--- linux-2.6.22-570/arch/powerpc/boot/dcr.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/dcr.h 2007-12-21 15:36:11.000000000 -0500
+@@ -26,6 +26,43 @@
+ #define SDRAM_CONFIG_BANK_SIZE(reg) \
+ (0x00400000 << ((reg & SDRAM_CONFIG_SIZE_MASK) >> 17))
+
++/* 440GP External Bus Controller (EBC) */
++#define DCRN_EBC0_CFGADDR 0x012
++#define DCRN_EBC0_CFGDATA 0x013
++#define EBC_NUM_BANKS 8
++#define EBC_B0CR 0x00
++#define EBC_B1CR 0x01
++#define EBC_B2CR 0x02
++#define EBC_B3CR 0x03
++#define EBC_B4CR 0x04
++#define EBC_B5CR 0x05
++#define EBC_B6CR 0x06
++#define EBC_B7CR 0x07
++#define EBC_BXCR(n) (n)
++#define EBC_BXCR_BAS 0xfff00000
++#define EBC_BXCR_BS 0x000e0000
++#define EBC_BXCR_BANK_SIZE(reg) \
++ (0x100000 << (((reg) & EBC_BXCR_BS) >> 17))
++#define EBC_BXCR_BU 0x00018000
++#define EBC_BXCR_BU_OFF 0x00000000
++#define EBC_BXCR_BU_RO 0x00008000
++#define EBC_BXCR_BU_WO 0x00010000
++#define EBC_BXCR_BU_RW 0x00018000
++#define EBC_BXCR_BW 0x00006000
++#define EBC_B0AP 0x10
++#define EBC_B1AP 0x11
++#define EBC_B2AP 0x12
++#define EBC_B3AP 0x13
++#define EBC_B4AP 0x14
++#define EBC_B5AP 0x15
++#define EBC_B6AP 0x16
++#define EBC_B7AP 0x17
++#define EBC_BXAP(n) (0x10+(n))
++#define EBC_BEAR 0x20
++#define EBC_BESR 0x21
++#define EBC_CFG 0x23
++#define EBC_CID 0x24
++
+ /* 440GP Clock, PM, chip control */
+ #define DCRN_CPC0_SR 0x0b0
+ #define DCRN_CPC0_ER 0x0b1
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts 2007-12-21 15:36:11.000000000 -0500
+@@ -135,11 +135,9 @@
+ #address-cells = <2>;
+ #size-cells = <1>;
+ clock-frequency = <0>; // Filled in by zImage
+- ranges = <0 00000000 fff00000 100000
+- 1 00000000 48000000 100000
+- 2 00000000 ff800000 400000
+- 3 00000000 48200000 100000
+- 7 00000000 48300000 100000>;
++ // ranges property is supplied by zImage
++ // based on firmware's configuration of the
++ // EBC bridge
+ interrupts = <5 4>;
+ interrupt-parent = <&UIC1>;
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts 2007-12-21 15:36:11.000000000 -0500
+@@ -46,7 +46,7 @@
+
+ tsi109@c0000000 {
+ device_type = "tsi-bridge";
+- compatible = "tsi-bridge";
++ compatible = "tsi109-bridge", "tsi108-bridge";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <00000000 c0000000 00010000>;
+@@ -54,52 +54,55 @@
+
+ i2c@7000 {
+ device_type = "i2c";
+- compatible = "tsi-i2c";
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ compatible = "tsi109-i2c", "tsi108-i2c";
++ interrupt-parent = <&MPIC>;
+ interrupts = <e 2>;
+ reg = <7000 400>;
+ };
+
+- mdio@6000 {
++ MDIO: mdio@6000 {
+ device_type = "mdio";
+- compatible = "tsi-ethernet";
+-
+- PHY1: ethernet-phy@6000 {
+- device_type = "ethernet-phy";
+- compatible = "bcm54xx";
++ compatible = "tsi109-mdio", "tsi108-mdio";
+ reg = <6000 50>;
+- phy-id = <1>;
++ #address-cells = <1>;
++ #size-cells = <0>;
++
++ PHY1: ethernet-phy@1 {
++ compatible = "bcm5461a";
++ reg = <1>;
++ txc-rxc-delay-disable;
+ };
+
+- PHY2: ethernet-phy@6400 {
+- device_type = "ethernet-phy";
+- compatible = "bcm54xx";
+- reg = <6000 50>;
+- phy-id = <2>;
++ PHY2: ethernet-phy@2 {
++ compatible = "bcm5461a";
++ reg = <2>;
++ txc-rxc-delay-disable;
+ };
+ };
+
+ ethernet@6200 {
+ device_type = "network";
+- compatible = "tsi-ethernet";
++ compatible = "tsi109-ethernet", "tsi108-ethernet";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6000 200>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ interrupts = <10 2>;
++ mdio-handle = <&MDIO>;
+ phy-handle = <&PHY1>;
+ };
+
+ ethernet@6600 {
+ device_type = "network";
+- compatible = "tsi-ethernet";
++ compatible = "tsi109-ethernet", "tsi108-ethernet";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6400 200>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ interrupts = <11 2>;
++ mdio-handle = <&MDIO>;
+ phy-handle = <&PHY2>;
+ };
+
+@@ -110,7 +113,7 @@
+ virtual-reg = <c0007808>;
+ clock-frequency = <3F9C6000>;
+ current-speed = <1c200>;
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ interrupts = <c 2>;
+ };
+
+@@ -121,7 +124,7 @@
+ virtual-reg = <c0007c08>;
+ clock-frequency = <3F9C6000>;
+ current-speed = <1c200>;
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ interrupts = <d 2>;
+ };
+
+@@ -136,7 +139,7 @@
+
+ pci@1000 {
+ device_type = "pci";
+- compatible = "tsi109";
++ compatible = "tsi109-pci", "tsi108-pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+@@ -150,7 +153,7 @@
+ ranges = <02000000 0 40000000 40000000 0 10000000
+ 01000000 0 00000000 7e000000 0 00010000>;
+ clock-frequency = <7f28154>;
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ interrupts = <17 2>;
+ interrupt-map-mask = <f800 0 0 7>;
+ /*----------------------------------------------------+
+@@ -186,13 +189,12 @@
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ interrupts = <17 2>;
+- interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++ interrupt-parent = <&MPIC>;
+ };
+ };
+ };
+
+ chosen {
+ linux,stdout-path = "/tsi109@c0000000/serial@7808";
+- bootargs = "console=ttyS0,115200";
+ };
+ };
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-12-21 15:36:11.000000000 -0500
+@@ -45,7 +45,7 @@
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interrupt-cells = <2>;
+- device_type = "tsi-bridge";
++ device_type = "tsi108-bridge";
+ ranges = <00000000 c0000000 00010000>;
+ reg = <c0000000 00010000>;
+ bus-frequency = <0>;
+@@ -55,27 +55,26 @@
+ interrupts = <E 0>;
+ reg = <7000 400>;
+ device_type = "i2c";
+- compatible = "tsi-i2c";
++ compatible = "tsi108-i2c";
+ };
+
+- mdio@6000 {
++ MDIO: mdio@6000 {
+ device_type = "mdio";
+- compatible = "tsi-ethernet";
++ compatible = "tsi108-mdio";
++ reg = <6000 50>;
++ #address-cells = <1>;
++ #size-cells = <0>;
+
+- phy8: ethernet-phy@6000 {
++ phy8: ethernet-phy@8 {
+ interrupt-parent = <&mpic>;
+ interrupts = <2 1>;
+- reg = <6000 50>;
+- phy-id = <8>;
+- device_type = "ethernet-phy";
++ reg = <8>;
+ };
+
+- phy9: ethernet-phy@6400 {
++ phy9: ethernet-phy@9 {
+ interrupt-parent = <&mpic>;
+ interrupts = <2 1>;
+- reg = <6000 50>;
+- phy-id = <9>;
+- device_type = "ethernet-phy";
++ reg = <9>;
+ };
+
+ };
+@@ -83,12 +82,12 @@
+ ethernet@6200 {
+ #size-cells = <0>;
+ device_type = "network";
+- model = "TSI-ETH";
+- compatible = "tsi-ethernet";
++ compatible = "tsi108-ethernet";
+ reg = <6000 200>;
+ address = [ 00 06 D2 00 00 01 ];
+ interrupts = <10 2>;
+ interrupt-parent = <&mpic>;
++ mdio-handle = <&MDIO>;
+ phy-handle = <&phy8>;
+ };
+
+@@ -96,12 +95,12 @@
+ #address-cells = <1>;
+ #size-cells = <0>;
+ device_type = "network";
+- model = "TSI-ETH";
+- compatible = "tsi-ethernet";
++ compatible = "tsi108-ethernet";
+ reg = <6400 200>;
+ address = [ 00 06 D2 00 00 02 ];
+ interrupts = <11 2>;
+ interrupt-parent = <&mpic>;
++ mdio-handle = <&MDIO>;
+ phy-handle = <&phy9>;
+ };
+
+@@ -135,7 +134,7 @@
+ big-endian;
+ };
+ pci@1000 {
+- compatible = "tsi10x";
++ compatible = "tsi108-pci";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ebony.c linux-2.6.22-591/arch/powerpc/boot/ebony.c
+--- linux-2.6.22-570/arch/powerpc/boot/ebony.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/ebony.c 2007-12-21 15:36:11.000000000 -0500
+@@ -100,28 +100,13 @@
+ ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
+ ibm44x_fixup_memsize();
+ dt_fixup_mac_addresses(ebony_mac0, ebony_mac1);
+-}
+-
+-#define SPRN_DBCR0 0x134
+-#define DBCR0_RST_SYSTEM 0x30000000
+-
+-static void ebony_exit(void)
+-{
+- unsigned long tmp;
+-
+- asm volatile (
+- "mfspr %0,%1\n"
+- "oris %0,%0,%2@h\n"
+- "mtspr %1,%0"
+- : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
+- );
+-
++ ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+ }
+
+ void ebony_init(void *mac0, void *mac1)
+ {
+ platform_ops.fixups = ebony_fixups;
+- platform_ops.exit = ebony_exit;
++ platform_ops.exit = ibm44x_dbcr_reset;
+ ebony_mac0 = mac0;
+ ebony_mac1 = mac1;
+ ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.c linux-2.6.22-591/arch/powerpc/boot/of.c
+--- linux-2.6.22-570/arch/powerpc/boot/of.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/boot/of.c 2007-12-21 15:36:11.000000000 -0500
+@@ -15,8 +15,7 @@
+ #include "page.h"
+ #include "ops.h"
+
+-typedef void *ihandle;
+-typedef void *phandle;
++#include "of.h"
+
+ extern char _end[];
+
+@@ -25,154 +24,10 @@
+ #define RAM_END (512<<20) /* Fixme: use OF */
+ #define ONE_MB 0x100000
+
+-int (*prom) (void *);
+
+
+ static unsigned long claim_base;
+
+-static int call_prom(const char *service, int nargs, int nret, ...)
+-{
+- int i;
+- struct prom_args {
+- const char *service;
+- int nargs;
+- int nret;
+- unsigned int args[12];
+- } args;
+- va_list list;
+-
+- args.service = service;
+- args.nargs = nargs;
+- args.nret = nret;
+-
+- va_start(list, nret);
+- for (i = 0; i < nargs; i++)
+- args.args[i] = va_arg(list, unsigned int);
+- va_end(list);
+-
+- for (i = 0; i < nret; i++)
+- args.args[nargs+i] = 0;
+-
+- if (prom(&args) < 0)
+- return -1;
+-
+- return (nret > 0)? args.args[nargs]: 0;
+-}
+-
+-static int call_prom_ret(const char *service, int nargs, int nret,
+- unsigned int *rets, ...)
+-{
+- int i;
+- struct prom_args {
+- const char *service;
+- int nargs;
+- int nret;
+- unsigned int args[12];
+- } args;
+- va_list list;
+-
+- args.service = service;
+- args.nargs = nargs;
+- args.nret = nret;
+-
+- va_start(list, rets);
+- for (i = 0; i < nargs; i++)
+- args.args[i] = va_arg(list, unsigned int);
+- va_end(list);
+-
+- for (i = 0; i < nret; i++)
+- args.args[nargs+i] = 0;
+-
+- if (prom(&args) < 0)
+- return -1;
+-
+- if (rets != (void *) 0)
+- for (i = 1; i < nret; ++i)
+- rets[i-1] = args.args[nargs+i];
+-
+- return (nret > 0)? args.args[nargs]: 0;
+-}
+-
+-/*
+- * Older OF's require that when claiming a specific range of addresses,
+- * we claim the physical space in the /memory node and the virtual
+- * space in the chosen mmu node, and then do a map operation to
+- * map virtual to physical.
+- */
+-static int need_map = -1;
+-static ihandle chosen_mmu;
+-static phandle memory;
+-
+-/* returns true if s2 is a prefix of s1 */
+-static int string_match(const char *s1, const char *s2)
+-{
+- for (; *s2; ++s2)
+- if (*s1++ != *s2)
+- return 0;
+- return 1;
+-}
+-
+-static int check_of_version(void)
+-{
+- phandle oprom, chosen;
+- char version[64];
+-
+- oprom = finddevice("/openprom");
+- if (oprom == (phandle) -1)
+- return 0;
+- if (getprop(oprom, "model", version, sizeof(version)) <= 0)
+- return 0;
+- version[sizeof(version)-1] = 0;
+- printf("OF version = '%s'\r\n", version);
+- if (!string_match(version, "Open Firmware, 1.")
+- && !string_match(version, "FirmWorks,3."))
+- return 0;
+- chosen = finddevice("/chosen");
+- if (chosen == (phandle) -1) {
+- chosen = finddevice("/chosen@0");
+- if (chosen == (phandle) -1) {
+- printf("no chosen\n");
+- return 0;
+- }
+- }
+- if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
+- printf("no mmu\n");
+- return 0;
+- }
+- memory = (ihandle) call_prom("open", 1, 1, "/memory");
+- if (memory == (ihandle) -1) {
+- memory = (ihandle) call_prom("open", 1, 1, "/memory@0");
+- if (memory == (ihandle) -1) {
+- printf("no memory node\n");
+- return 0;
+- }
+- }
+- printf("old OF detected\r\n");
+- return 1;
+-}
+-
+-static void *claim(unsigned long virt, unsigned long size, unsigned long align)
+-{
+- int ret;
+- unsigned int result;
+-
+- if (need_map < 0)
+- need_map = check_of_version();
+- if (align || !need_map)
+- return (void *) call_prom("claim", 3, 1, virt, size, align);
+-
+- ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory,
+- align, size, virt);
+- if (ret != 0 || result == -1)
+- return (void *) -1;
+- ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
+- align, size, virt);
+- /* 0x12 == coherent + read/write */
+- ret = call_prom("call-method", 6, 1, "map", chosen_mmu,
+- 0x12, size, virt, virt);
+- return (void *) virt;
+-}
+-
+ static void *of_try_claim(unsigned long size)
+ {
+ unsigned long addr = 0;
+@@ -184,7 +39,7 @@
+ #ifdef DEBUG
+ printf(" trying: 0x%08lx\n\r", claim_base);
+ #endif
+- addr = (unsigned long)claim(claim_base, size, 0);
++ addr = (unsigned long)of_claim(claim_base, size, 0);
+ if ((void *)addr != (void *)-1)
+ break;
+ }
+@@ -218,52 +73,24 @@
+ return p;
+ }
+
+-static void of_exit(void)
+-{
+- call_prom("exit", 0, 0);
+-}
+-
+ /*
+ * OF device tree routines
+ */
+ static void *of_finddevice(const char *name)
+ {
+- return (phandle) call_prom("finddevice", 1, 1, name);
++ return (phandle) of_call_prom("finddevice", 1, 1, name);
+ }
+
+ static int of_getprop(const void *phandle, const char *name, void *buf,
+ const int buflen)
+ {
+- return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
++ return of_call_prom("getprop", 4, 1, phandle, name, buf, buflen);
+ }
+
+ static int of_setprop(const void *phandle, const char *name, const void *buf,
+ const int buflen)
+ {
+- return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+-}
+-
+-/*
+- * OF console routines
+- */
+-static void *of_stdout_handle;
+-
+-static int of_console_open(void)
+-{
+- void *devp;
+-
+- if (((devp = finddevice("/chosen")) != NULL)
+- && (getprop(devp, "stdout", &of_stdout_handle,
+- sizeof(of_stdout_handle))
+- == sizeof(of_stdout_handle)))
+- return 0;
+-
+- return -1;
+-}
+-
+-static void of_console_write(char *buf, int len)
+-{
+- call_prom("write", 3, 1, of_stdout_handle, buf, len);
++ return of_call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+ }
+
+ void platform_init(unsigned long a1, unsigned long a2, void *promptr)
+@@ -277,10 +104,9 @@
+ dt_ops.getprop = of_getprop;
+ dt_ops.setprop = of_setprop;
+
+- console_ops.open = of_console_open;
+- console_ops.write = of_console_write;
++ of_console_init();
+
+- prom = (int (*)(void *))promptr;
++ of_init(promptr);
+ loader_info.promptr = promptr;
+ if (a1 && a2 && a2 != 0xdeadbeef) {
+ loader_info.initrd_addr = a1;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.h linux-2.6.22-591/arch/powerpc/boot/of.h
+--- linux-2.6.22-570/arch/powerpc/boot/of.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/boot/of.h 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,15 @@
++#ifndef _PPC_BOOT_OF_H_
++#define _PPC_BOOT_OF_H_
++
++typedef void *phandle;
++typedef void *ihandle;
++
++void of_init(void *promptr);
++int of_call_prom(const char *service, int nargs, int nret, ...);
++void *of_claim(unsigned long virt, unsigned long size, unsigned long align);
++void of_exit(void);
++
++/* Console functions */
++void of_console_init(void);
++
++#endif /* _PPC_BOOT_OF_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ofconsole.c linux-2.6.22-591/arch/powerpc/boot/ofconsole.c
+--- linux-2.6.22-570/arch/powerpc/boot/ofconsole.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/boot/ofconsole.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,45 @@
++/*
++ * OF console routines
++ *
++ * Copyright (C) Paul Mackerras 1997.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <stddef.h>
++#include "types.h"
++#include "elf.h"
++#include "string.h"
++#include "stdio.h"
++#include "page.h"
++#include "ops.h"
++
++#include "of.h"
++
++static void *of_stdout_handle;
++
++static int of_console_open(void)
++{
++ void *devp;
++
++ if (((devp = finddevice("/chosen")) != NULL)
++ && (getprop(devp, "stdout", &of_stdout_handle,
++ sizeof(of_stdout_handle))
++ == sizeof(of_stdout_handle)))
++ return 0;
++
++ return -1;
++}
++
++static void of_console_write(char *buf, int len)
++{
++ of_call_prom("write", 3, 1, of_stdout_handle, buf, len);
++}
++
++void of_console_init(void)
++{
++ console_ops.open = of_console_open;
++ console_ops.write = of_console_write;
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/oflib.c linux-2.6.22-591/arch/powerpc/boot/oflib.c
+--- linux-2.6.22-570/arch/powerpc/boot/oflib.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/boot/oflib.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (C) Paul Mackerras 1997.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <stddef.h>
++#include "types.h"
++#include "elf.h"
++#include "string.h"
++#include "stdio.h"
++#include "page.h"
++#include "ops.h"
++
++#include "of.h"
++
++static int (*prom) (void *);
++
++void of_init(void *promptr)
++{
++ prom = (int (*)(void *))promptr;
++}
++
++int of_call_prom(const char *service, int nargs, int nret, ...)
++{
++ int i;
++ struct prom_args {
++ const char *service;
++ int nargs;
++ int nret;
++ unsigned int args[12];
++ } args;
++ va_list list;
++
++ args.service = service;
++ args.nargs = nargs;
++ args.nret = nret;
++
++ va_start(list, nret);
++ for (i = 0; i < nargs; i++)
++ args.args[i] = va_arg(list, unsigned int);
++ va_end(list);
++
++ for (i = 0; i < nret; i++)
++ args.args[nargs+i] = 0;
++
++ if (prom(&args) < 0)
++ return -1;
++
++ return (nret > 0)? args.args[nargs]: 0;
++}
++
++static int of_call_prom_ret(const char *service, int nargs, int nret,
++ unsigned int *rets, ...)
++{
++ int i;
++ struct prom_args {
++ const char *service;
++ int nargs;
++ int nret;
++ unsigned int args[12];
++ } args;
++ va_list list;
++
++ args.service = service;
++ args.nargs = nargs;
++ args.nret = nret;
++
++ va_start(list, rets);
++ for (i = 0; i < nargs; i++)
++ args.args[i] = va_arg(list, unsigned int);
++ va_end(list);
++
++ for (i = 0; i < nret; i++)
++ args.args[nargs+i] = 0;
++
++ if (prom(&args) < 0)
++ return -1;
++
++ if (rets != (void *) 0)
++ for (i = 1; i < nret; ++i)
++ rets[i-1] = args.args[nargs+i];
++
++ return (nret > 0)? args.args[nargs]: 0;
++}
++
++/* returns true if s2 is a prefix of s1 */
++static int string_match(const char *s1, const char *s2)
++{
++ for (; *s2; ++s2)
++ if (*s1++ != *s2)
++ return 0;
++ return 1;
++}
++
++/*
++ * Older OF's require that when claiming a specific range of addresses,
++ * we claim the physical space in the /memory node and the virtual
++ * space in the chosen mmu node, and then do a map operation to
++ * map virtual to physical.
++ */
++static int need_map = -1;
++static ihandle chosen_mmu;
++static phandle memory;
++
++static int check_of_version(void)
++{
++ phandle oprom, chosen;
++ char version[64];
++
++ oprom = finddevice("/openprom");
++ if (oprom == (phandle) -1)
++ return 0;
++ if (getprop(oprom, "model", version, sizeof(version)) <= 0)
++ return 0;
++ version[sizeof(version)-1] = 0;
++ printf("OF version = '%s'\r\n", version);
++ if (!string_match(version, "Open Firmware, 1.")
++ && !string_match(version, "FirmWorks,3."))
++ return 0;
++ chosen = finddevice("/chosen");
++ if (chosen == (phandle) -1) {
++ chosen = finddevice("/chosen@0");
++ if (chosen == (phandle) -1) {
++ printf("no chosen\n");
++ return 0;
++ }
++ }
++ if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
++ printf("no mmu\n");
++ return 0;
++ }
++ memory = (ihandle) of_call_prom("open", 1, 1, "/memory");
++ if (memory == (ihandle) -1) {
++ memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0");
++ if (memory == (ihandle) -1) {
++ printf("no memory node\n");
++ return 0;
++ }
++ }
++ printf("old OF detected\r\n");
++ return 1;
++}
++
++void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
++{
++ int ret;
++ unsigned int result;
++
++ if (need_map < 0)
++ need_map = check_of_version();
++ if (align || !need_map)
++ return (void *) of_call_prom("claim", 3, 1, virt, size, align);
++
++ ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
++ align, size, virt);
++ if (ret != 0 || result == -1)
++ return (void *) -1;
++ ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
++ align, size, virt);
++ /* 0x12 == coherent + read/write */
++ ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
++ 0x12, size, virt, virt);
++ return (void *) virt;
++}
++
++void of_exit(void)
++{
++ of_call_prom("exit", 0, 0);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/configs/holly_defconfig linux-2.6.22-591/arch/powerpc/configs/holly_defconfig
+--- linux-2.6.22-570/arch/powerpc/configs/holly_defconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/configs/holly_defconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -190,7 +190,8 @@
+ # CONFIG_RESOURCES_64BIT is not set
+ CONFIG_ZONE_DMA_FLAG=1
+ CONFIG_PROC_DEVICETREE=y
+-# CONFIG_CMDLINE_BOOL is not set
++CONFIG_CMDLINE_BOOL=y
++CONFIG_CMDLINE="console=ttyS0,115200"
+ # CONFIG_PM is not set
+ # CONFIG_SECCOMP is not set
+ # CONFIG_WANT_DEVICE_TREE is not set
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/Makefile linux-2.6.22-591/arch/powerpc/kernel/Makefile
+--- linux-2.6.22-570/arch/powerpc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -12,7 +12,8 @@
+
+ obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
+ irq.o align.o signal_32.o pmc.o vdso.o \
+- init_task.o process.o systbl.o idle.o
++ init_task.o process.o systbl.o idle.o \
++ signal.o
+ obj-y += vdso32/
+ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
+ signal_64.o ptrace32.o \
+@@ -62,10 +63,16 @@
+ obj-$(CONFIG_KPROBES) += kprobes.o
+ obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
+
++ifeq ($(CONFIG_PPC32),y)
++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o
++else
++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp64.o
++endif
++
+ module-$(CONFIG_PPC64) += module_64.o
+ obj-$(CONFIG_MODULES) += $(module-y)
+
+-pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o
++pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o isa-bridge.o
+ pci32-$(CONFIG_PPC32) := pci_32.o
+ obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y)
+ obj-$(CONFIG_PCI_MSI) += msi.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/head_32.S linux-2.6.22-591/arch/powerpc/kernel/head_32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/head_32.S 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/head_32.S 2007-12-21 15:36:11.000000000 -0500
+@@ -9,7 +9,6 @@
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This file contains the low-level support and setup for the
+ * PowerPC platform, including trap and interrupt dispatch.
+@@ -32,10 +31,6 @@
+ #include <asm/ppc_asm.h>
+ #include <asm/asm-offsets.h>
+
+-#ifdef CONFIG_APUS
+-#include <asm/amigappc.h>
+-#endif
+-
+ /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+ #define LOAD_BAT(n, reg, RA, RB) \
+ /* see the comment for clear_bats() -- Cort */ \
+@@ -92,11 +87,6 @@
+ * r4: virtual address of boot_infos_t
+ * r5: 0
+ *
+- * APUS
+- * r3: 'APUS'
+- * r4: physical address of memory base
+- * Linux/m68k style BootInfo structure at &_end.
+- *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader. The expected layout
+@@ -150,14 +140,6 @@
+ */
+ bl early_init
+
+-#ifdef CONFIG_APUS
+-/* On APUS the __va/__pa constants need to be set to the correct
+- * values before continuing.
+- */
+- mr r4,r30
+- bl fix_mem_constants
+-#endif /* CONFIG_APUS */
+-
+ /* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+@@ -167,7 +149,7 @@
+ bl flush_tlbs
+
+ bl initial_bats
+-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
++#if defined(CONFIG_BOOTX_TEXT)
+ bl setup_disp_bat
+ #endif
+
+@@ -183,7 +165,6 @@
+ #endif /* CONFIG_6xx */
+
+
+-#ifndef CONFIG_APUS
+ /*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+@@ -196,7 +177,6 @@
+ addis r4,r3,KERNELBASE@h /* current address of _start */
+ cmpwi 0,r4,0 /* are we already running at 0? */
+ bne relocate_kernel
+-#endif /* CONFIG_APUS */
+ /*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+@@ -881,85 +861,6 @@
+ addi r6,r6,4
+ blr
+
+-#ifdef CONFIG_APUS
+-/*
+- * On APUS the physical base address of the kernel is not known at compile
+- * time, which means the __pa/__va constants used are incorrect. In the
+- * __init section is recorded the virtual addresses of instructions using
+- * these constants, so all that has to be done is fix these before
+- * continuing the kernel boot.
+- *
+- * r4 = The physical address of the kernel base.
+- */
+-fix_mem_constants:
+- mr r10,r4
+- addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */
+- neg r11,r10 /* phys_to_virt constant */
+-
+- lis r12,__vtop_table_begin@h
+- ori r12,r12,__vtop_table_begin@l
+- add r12,r12,r10 /* table begin phys address */
+- lis r13,__vtop_table_end@h
+- ori r13,r13,__vtop_table_end@l
+- add r13,r13,r10 /* table end phys address */
+- subi r12,r12,4
+- subi r13,r13,4
+-1: lwzu r14,4(r12) /* virt address of instruction */
+- add r14,r14,r10 /* phys address of instruction */
+- lwz r15,0(r14) /* instruction, now insert top */
+- rlwimi r15,r10,16,16,31 /* half of vp const in low half */
+- stw r15,0(r14) /* of instruction and restore. */
+- dcbst r0,r14 /* write it to memory */
+- sync
+- icbi r0,r14 /* flush the icache line */
+- cmpw r12,r13
+- bne 1b
+- sync /* additional sync needed on g4 */
+- isync
+-
+-/*
+- * Map the memory where the exception handlers will
+- * be copied to when hash constants have been patched.
+- */
+-#ifdef CONFIG_APUS_FAST_EXCEPT
+- lis r8,0xfff0
+-#else
+- lis r8,0
+-#endif
+- ori r8,r8,0x2 /* 128KB, supervisor */
+- mtspr SPRN_DBAT3U,r8
+- mtspr SPRN_DBAT3L,r8
+-
+- lis r12,__ptov_table_begin@h
+- ori r12,r12,__ptov_table_begin@l
+- add r12,r12,r10 /* table begin phys address */
+- lis r13,__ptov_table_end@h
+- ori r13,r13,__ptov_table_end@l
+- add r13,r13,r10 /* table end phys address */
+- subi r12,r12,4
+- subi r13,r13,4
+-1: lwzu r14,4(r12) /* virt address of instruction */
+- add r14,r14,r10 /* phys address of instruction */
+- lwz r15,0(r14) /* instruction, now insert top */
+- rlwimi r15,r11,16,16,31 /* half of pv const in low half*/
+- stw r15,0(r14) /* of instruction and restore. */
+- dcbst r0,r14 /* write it to memory */
+- sync
+- icbi r0,r14 /* flush the icache line */
+- cmpw r12,r13
+- bne 1b
+-
+- sync /* additional sync needed on g4 */
+- isync /* No speculative loading until now */
+- blr
+-
+-/***********************************************************************
+- * Please note that on APUS the exception handlers are located at the
+- * physical address 0xfff0000. For this reason, the exception handlers
+- * cannot use relative branches to access the code below.
+- ***********************************************************************/
+-#endif /* CONFIG_APUS */
+-
+ #ifdef CONFIG_SMP
+ #ifdef CONFIG_GEMINI
+ .globl __secondary_start_gemini
+@@ -1135,19 +1036,6 @@
+ bl __save_cpu_setup
+ bl MMU_init
+
+-#ifdef CONFIG_APUS
+- /* Copy exception code to exception vector base on APUS. */
+- lis r4,KERNELBASE@h
+-#ifdef CONFIG_APUS_FAST_EXCEPT
+- lis r3,0xfff0 /* Copy to 0xfff00000 */
+-#else
+- lis r3,0 /* Copy to 0x00000000 */
+-#endif
+- li r5,0x4000 /* # bytes of memory to copy */
+- li r6,0
+- bl copy_and_flush /* copy the first 0x4000 bytes */
+-#endif /* CONFIG_APUS */
+-
+ /*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+@@ -1324,11 +1212,7 @@
+ #else
+ ori r8,r8,2 /* R/W access */
+ #endif /* CONFIG_SMP */
+-#ifdef CONFIG_APUS
+- ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */
+-#else
+ ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
+-#endif /* CONFIG_APUS */
+
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
+@@ -1338,7 +1222,7 @@
+ blr
+
+
+-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
++#ifdef CONFIG_BOOTX_TEXT
+ setup_disp_bat:
+ /*
+ * setup the display bat prepared for us in prom.c
+@@ -1362,7 +1246,7 @@
+ 1: mtspr SPRN_IBAT3L,r8
+ mtspr SPRN_IBAT3U,r11
+ blr
+-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
++#endif /* CONFIG_BOOTX_TEXT */
+
+ #ifdef CONFIG_8260
+ /* Jump into the system reset for the rom.
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/irq.c linux-2.6.22-591/arch/powerpc/kernel/irq.c
+--- linux-2.6.22-570/arch/powerpc/kernel/irq.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500
+@@ -7,7 +7,6 @@
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c
+--- linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,271 @@
++/*
++ * Routines for tracking a legacy ISA bridge
++ *
++ * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
++ *
++ * Some bits and pieces moved over from pci_64.c
++ *
++ * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#define DEBUG
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/notifier.h>
++
++#include <asm/processor.h>
++#include <asm/io.h>
++#include <asm/prom.h>
++#include <asm/pci-bridge.h>
++#include <asm/machdep.h>
++#include <asm/ppc-pci.h>
++#include <asm/firmware.h>
++
++unsigned long isa_io_base; /* NULL if no ISA bus */
++EXPORT_SYMBOL(isa_io_base);
++
++/* Cached ISA bridge dev. */
++static struct device_node *isa_bridge_devnode;
++struct pci_dev *isa_bridge_pcidev;
++EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
++
++#define ISA_SPACE_MASK 0x1
++#define ISA_SPACE_IO 0x1
++
++static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
++ unsigned long phb_io_base_phys)
++{
++ /* We should get some saner parsing here and remove these structs */
++ struct pci_address {
++ u32 a_hi;
++ u32 a_mid;
++ u32 a_lo;
++ };
++
++ struct isa_address {
++ u32 a_hi;
++ u32 a_lo;
++ };
++
++ struct isa_range {
++ struct isa_address isa_addr;
++ struct pci_address pci_addr;
++ unsigned int size;
++ };
++
++ const struct isa_range *range;
++ unsigned long pci_addr;
++ unsigned int isa_addr;
++ unsigned int size;
++ int rlen = 0;
++
++ range = of_get_property(isa_node, "ranges", &rlen);
++ if (range == NULL || (rlen < sizeof(struct isa_range)))
++ goto inval_range;
++
++ /* From "ISA Binding to 1275"
++ * The ranges property is laid out as an array of elements,
++ * each of which comprises:
++ * cells 0 - 1: an ISA address
++ * cells 2 - 4: a PCI address
++ * (size depending on dev->n_addr_cells)
++ * cell 5: the size of the range
++ */
++ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) {
++ range++;
++ rlen -= sizeof(struct isa_range);
++ if (rlen < sizeof(struct isa_range))
++ goto inval_range;
++ }
++ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO)
++ goto inval_range;
++
++ isa_addr = range->isa_addr.a_lo;
++ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
++ range->pci_addr.a_lo;
++
++ /* Assume these are both zero. Note: We could fix that and
++ * do a proper parsing instead ... oh well, that will do for
++ * now as nobody uses fancy mappings for ISA bridges
++ */
++ if ((pci_addr != 0) || (isa_addr != 0)) {
++ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
++ __FUNCTION__);
++ return;
++ }
++
++ /* Align size and make sure it's cropped to 64K */
++ size = PAGE_ALIGN(range->size);
++ if (size > 0x10000)
++ size = 0x10000;
++
++ printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
++ "mapping 64k\n");
++
++ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
++ size, _PAGE_NO_CACHE|_PAGE_GUARDED);
++ return;
++
++inval_range:
++ printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
++ "mapping 64k\n");
++ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
++ 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
++}
++
++
++/**
++ * isa_bridge_find_early - Find and map the ISA IO space early before
++ * main PCI discovery. This is optionally called by
++ * the arch code when adding PCI PHBs to get early
++ * access to ISA IO ports
++ */
++void __init isa_bridge_find_early(struct pci_controller *hose)
++{
++ struct device_node *np, *parent = NULL, *tmp;
++
++ /* If we already have an ISA bridge, bail off */
++ if (isa_bridge_devnode != NULL)
++ return;
++
++ /* For each "isa" node in the system. Note : we do a search by
++ * type and not by name. It might be better to do by name but that's
++ * what the code used to do and I don't want to break too much at
++ * once. We can look into changing that separately
++ */
++ for_each_node_by_type(np, "isa") {
++ /* Look for our hose being a parent */
++ for (parent = of_get_parent(np); parent;) {
++ if (parent == hose->arch_data) {
++ of_node_put(parent);
++ break;
++ }
++ tmp = parent;
++ parent = of_get_parent(parent);
++ of_node_put(tmp);
++ }
++ if (parent != NULL)
++ break;
++ }
++ if (np == NULL)
++ return;
++ isa_bridge_devnode = np;
++
++ /* Now parse the "ranges" property and setup the ISA mapping */
++ pci_process_ISA_OF_ranges(np, hose->io_base_phys);
++
++ /* Set the global ISA io base to indicate we have an ISA bridge */
++ isa_io_base = ISA_IO_BASE;
++
++ pr_debug("ISA bridge (early) is %s\n", np->full_name);
++}
++
++/**
++ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
++ * a new ISA bridge
++ */
++static void __devinit isa_bridge_find_late(struct pci_dev *pdev,
++ struct device_node *devnode)
++{
++ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
++
++ /* Store ISA device node and PCI device */
++ isa_bridge_devnode = of_node_get(devnode);
++ isa_bridge_pcidev = pdev;
++
++ /* Now parse the "ranges" property and setup the ISA mapping */
++ pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
++
++ /* Set the global ISA io base to indicate we have an ISA bridge */
++ isa_io_base = ISA_IO_BASE;
++
++ pr_debug("ISA bridge (late) is %s on %s\n",
++ devnode->full_name, pci_name(pdev));
++}
++
++/**
++ * isa_bridge_remove - Remove/unmap an ISA bridge
++ */
++static void isa_bridge_remove(void)
++{
++ pr_debug("ISA bridge removed !\n");
++
++ /* Clear the global ISA io base to indicate that we have no more
++ * ISA bridge. Note that drivers don't quite handle that, though
++ * we should probably do something about it. But do we ever really
++ * have ISA bridges being removed on machines using legacy devices ?
++ */
++ isa_io_base = ISA_IO_BASE;
++
++ /* Clear references to the bridge */
++ of_node_put(isa_bridge_devnode);
++ isa_bridge_devnode = NULL;
++ isa_bridge_pcidev = NULL;
++
++ /* Unmap the ISA area */
++ __iounmap_at((void *)ISA_IO_BASE, 0x10000);
++}
++
++/**
++ * isa_bridge_notify - Get notified of PCI devices addition/removal
++ */
++static int __devinit isa_bridge_notify(struct notifier_block *nb,
++ unsigned long action, void *data)
++{
++ struct device *dev = data;
++ struct pci_dev *pdev = to_pci_dev(dev);
++ struct device_node *devnode = pci_device_to_OF_node(pdev);
++
++ switch(action) {
++ case BUS_NOTIFY_ADD_DEVICE:
++ /* Check if we have an early ISA device, without PCI dev */
++ if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
++ !isa_bridge_pcidev) {
++ pr_debug("ISA bridge PCI attached: %s\n",
++ pci_name(pdev));
++ isa_bridge_pcidev = pdev;
++ }
++
++ /* Check if we have no ISA device, and this happens to be one,
++ * register it as such if it has an OF device
++ */
++ if (!isa_bridge_devnode && devnode && devnode->type &&
++ !strcmp(devnode->type, "isa"))
++ isa_bridge_find_late(pdev, devnode);
++
++ return 0;
++ case BUS_NOTIFY_DEL_DEVICE:
++ /* Check if this our existing ISA device */
++ if (pdev == isa_bridge_pcidev ||
++ (devnode && devnode == isa_bridge_devnode))
++ isa_bridge_remove();
++ return 0;
++ }
++ return 0;
++}
++
++static struct notifier_block isa_bridge_notifier = {
++ .notifier_call = isa_bridge_notify
++};
++
++/**
++ * isa_bridge_init - register to be notified of ISA bridge addition/removal
++ *
++ */
++static int __init isa_bridge_init(void)
++{
++ if (firmware_has_feature(FW_FEATURE_ISERIES))
++ return 0;
++ bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
++ return 0;
++}
++arch_initcall(isa_bridge_init);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb.c linux-2.6.22-591/arch/powerpc/kernel/kgdb.c
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,499 @@
++/*
++ * arch/powerpc/kernel/kgdb.c
++ *
++ * PowerPC backend to the KGDB stub.
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * Copied from arch/ppc/kernel/kgdb.c, updated for ppc64
++ *
++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
++ * Copyright (C) 2003 Timesys Corporation.
++ * Copyright (C) 2004-2006 MontaVista Software, Inc.
++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
++ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
++ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/smp.h>
++#include <linux/signal.h>
++#include <linux/ptrace.h>
++#include <asm/current.h>
++#include <asm/ptrace.h>
++#include <asm/processor.h>
++#include <asm/machdep.h>
++
++/*
++ * This table contains the mapping between PowerPC hardware trap types, and
++ * signals, which are primarily what GDB understands. GDB and the kernel
++ * don't always agree on values, so we use constants taken from gdb-6.2.
++ */
++static struct hard_trap_info
++{
++ unsigned int tt; /* Trap type code for powerpc */
++ unsigned char signo; /* Signal that we map this trap into */
++} hard_trap_info[] = {
++ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */
++ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */
++ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */
++ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */
++ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */
++ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */
++ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */
++ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
++ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
++ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
++#if defined(CONFIG_FSL_BOOKE)
++ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
++ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
++ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
++ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */
++ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */
++ { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */
++ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
++ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
++ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
++#else
++ { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */
++ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
++ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
++ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
++ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
++#endif
++#else
++ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
++#if defined(CONFIG_8xx)
++ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
++#else
++ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
++ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
++ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
++#if defined(CONFIG_PPC64)
++ { 0x1200, 0x05 /* SIGILL */ }, /* system error */
++ { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */
++ { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */
++ { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */
++ { 0x1800, 0x04 /* SIGILL */ }, /* thermal */
++#else
++ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */
++ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */
++ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */
++ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */
++#endif
++#endif
++#endif
++ { 0x0000, 0x00 } /* Must be last */
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int computeSignal(unsigned int tt)
++{
++ struct hard_trap_info *ht;
++
++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++ if (ht->tt == tt)
++ return ht->signo;
++
++ return SIGHUP; /* default for things we don't know about */
++}
++
++static int kgdb_call_nmi_hook(struct pt_regs *regs)
++{
++ kgdb_nmihook(smp_processor_id(), regs);
++ return 0;
++}
++
++#ifdef CONFIG_SMP
++void kgdb_roundup_cpus(unsigned long flags)
++{
++ smp_send_debugger_break(MSG_ALL_BUT_SELF);
++}
++#endif
++
++/* KGDB functions to use existing PowerPC64 hooks. */
++static int kgdb_debugger(struct pt_regs *regs)
++{
++ return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++}
++
++static int kgdb_breakpoint(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
++ regs->nip += 4;
++
++ return 1;
++}
++
++static int kgdb_singlestep(struct pt_regs *regs)
++{
++ struct thread_info *thread_info, *exception_thread_info;
++
++ if (user_mode(regs))
++ return 0;
++
++ /*
++ * On Book E and perhaps other processsors, singlestep is handled on
++ * the critical exception stack. This causes current_thread_info()
++ * to fail, since it it locates the thread_info by masking off
++ * the low bits of the current stack pointer. We work around
++ * this issue by copying the thread_info from the kernel stack
++ * before calling kgdb_handle_exception, and copying it back
++ * afterwards. On most processors the copy is avoided since
++ * exception_thread_info == thread_info.
++ */
++ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
++ exception_thread_info = current_thread_info();
++
++ if (thread_info != exception_thread_info)
++ memcpy(exception_thread_info, thread_info, sizeof *thread_info);
++
++ kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++ if (thread_info != exception_thread_info)
++ memcpy(thread_info, exception_thread_info, sizeof *thread_info);
++
++ return 1;
++}
++
++int kgdb_iabr_match(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++ return 1;
++}
++
++int kgdb_dabr_match(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++ return 1;
++}
++
++#define PACK64(ptr,src) do { *(ptr++) = (src); } while(0)
++
++#define PACK32(ptr,src) do { \
++ u32 *ptr32; \
++ ptr32 = (u32 *)ptr; \
++ *(ptr32++) = (src); \
++ ptr = (unsigned long *)ptr32; \
++ } while(0)
++
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ unsigned long *ptr = gdb_regs;
++ int reg;
++
++ memset(gdb_regs, 0, NUMREGBYTES);
++
++ for (reg = 0; reg < 32; reg++)
++ PACK64(ptr, regs->gpr[reg]);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ PACK64(ptr, current->thread.evr[reg]);
++#else
++ ptr += 32;
++#endif
++#else
++ /* fp registers not used by kernel, leave zero */
++ ptr += 32 * 8 / sizeof(long);
++#endif
++
++ PACK64(ptr, regs->nip);
++ PACK64(ptr, regs->msr);
++ PACK32(ptr, regs->ccr);
++ PACK64(ptr, regs->link);
++ PACK64(ptr, regs->ctr);
++ PACK32(ptr, regs->xer);
++
++#if 0
++ Following are in struct thread_struct, not struct pt_regs,
++ ignoring for now since kernel does not use them. Would it
++ make sense to get them from the thread that kgdb is set to?
++
++ If this code is enabled, update the definition of NUMREGBYTES to
++ include the vector registers and vector state registers.
++
++ PACK32(ptr, current->thread->fpscr);
++
++ /* vr registers not used by kernel, leave zero */
++ ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++ PACK32(ptr, current->thread->vscr);
++ PACK32(ptr, current->thread->vrsave);
++#else
++ ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ PACK32(ptr, current->thread.acc >> 32);
++ PACK32(ptr, current->thread.acc & 0xffffffff);
++ PACK64(ptr, current->thread.spefscr);
++#else
++ ptr += 2 + 1;
++#endif
++#else
++ /* fpscr not used by kernel, leave zero */
++ PACK32(ptr, 0);
++#endif
++#endif
++
++ BUG_ON((unsigned long)ptr >
++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
++ STACK_FRAME_OVERHEAD);
++ unsigned long *ptr = gdb_regs;
++ int reg;
++
++ memset(gdb_regs, 0, NUMREGBYTES);
++
++ /* Regs GPR0-2 */
++ for (reg = 0; reg < 3; reg++)
++ PACK64(ptr, regs->gpr[reg]);
++
++ /* Regs GPR3-13 are caller saved, not in regs->gpr[] */
++ ptr += 11;
++
++ /* Regs GPR14-31 */
++ for (reg = 14; reg < 32; reg++)
++ PACK64(ptr, regs->gpr[reg]);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ PACK64(ptr, p->thread.evr[reg]);
++#else
++ ptr += 32;
++#endif
++#else
++ /* fp registers not used by kernel, leave zero */
++ ptr += 32 * 8 / sizeof(long);
++#endif
++
++ PACK64(ptr, regs->nip);
++ PACK64(ptr, regs->msr);
++ PACK32(ptr, regs->ccr);
++ PACK64(ptr, regs->link);
++ PACK64(ptr, regs->ctr);
++ PACK32(ptr, regs->xer);
++
++#if 0
++ Following are in struct thread_struct, not struct pt_regs,
++ ignoring for now since kernel does not use them. Would it
++ make sense to get them from the thread that kgdb is set to?
++
++ If this code is enabled, update the definition of NUMREGBYTES to
++ include the vector registers and vector state registers.
++
++ PACK32(ptr, p->thread->fpscr);
++
++ /* vr registers not used by kernel, leave zero */
++ ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++ PACK32(ptr, p->thread->vscr);
++ PACK32(ptr, p->thread->vrsave);
++#else
++ ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ PACK32(ptr, p->thread.acc >> 32);
++ PACK32(ptr, p->thread.acc & 0xffffffff);
++ PACK64(ptr, p->thread.spefscr);
++#else
++ ptr += 2 + 1;
++#endif
++#else
++ /* fpscr not used by kernel, leave zero */
++ PACK32(ptr, 0);
++#endif
++#endif
++
++ BUG_ON((unsigned long)ptr >
++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++#define UNPACK64(dest,ptr) do { dest = *(ptr++); } while(0)
++
++#define UNPACK32(dest,ptr) do { \
++ u32 *ptr32; \
++ ptr32 = (u32 *)ptr; \
++ dest = *(ptr32++); \
++ ptr = (unsigned long *)ptr32; \
++ } while(0)
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ unsigned long *ptr = gdb_regs;
++ int reg;
++#ifdef CONFIG_SPE
++ union {
++ u32 v32[2];
++ u64 v64;
++ } acc;
++#endif
++
++ for (reg = 0; reg < 32; reg++)
++ UNPACK64(regs->gpr[reg], ptr);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ UNPACK64(current->thread.evr[reg], ptr);
++#else
++ ptr += 32;
++#endif
++#else
++ /* fp registers not used by kernel, leave zero */
++ ptr += 32 * 8 / sizeof(int);
++#endif
++
++ UNPACK64(regs->nip, ptr);
++ UNPACK64(regs->msr, ptr);
++ UNPACK32(regs->ccr, ptr);
++ UNPACK64(regs->link, ptr);
++ UNPACK64(regs->ctr, ptr);
++ UNPACK32(regs->xer, ptr);
++
++#if 0
++ Following are in struct thread_struct, not struct pt_regs,
++ ignoring for now since kernel does not use them. Would it
++ make sense to get them from the thread that kgdb is set to?
++
++ If this code is enabled, update the definition of NUMREGBYTES to
++ include the vector registers and vector state registers.
++
++ /* fpscr, vscr, vrsave not used by kernel, leave unchanged */
++
++ UNPACK32(current->thread->fpscr, ptr);
++
++ /* vr registers not used by kernel, leave zero */
++ ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++ UNPACK32(current->thread->vscr, ptr);
++ UNPACK32(current->thread->vrsave, ptr);
++#else
++ ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ UNPACK32(acc.v32[0], ptr);
++ UNPACK32(acc.v32[1], ptr);
++ current->thread.acc = acc.v64;
++ UNPACK64(current->thread.spefscr, ptr);
++#else
++ ptr += 2 + 1;
++#endif
++#endif
++#endif
++
++ BUG_ON((unsigned long)ptr >
++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++/*
++ * This function does PowerPC specific procesing for interfacing to gdb.
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++ char *remcom_in_buffer, char *remcom_out_buffer,
++ struct pt_regs *linux_regs)
++{
++ char *ptr = &remcom_in_buffer[1];
++ unsigned long addr;
++
++ switch (remcom_in_buffer[0]) {
++ /*
++ * sAA..AA Step one instruction from AA..AA
++ * This will return an error to gdb ..
++ */
++ case 's':
++ case 'c':
++ /* handle the optional parameter */
++ if (kgdb_hex2long(&ptr, &addr))
++ linux_regs->nip = addr;
++
++ atomic_set(&cpu_doing_single_step, -1);
++ /* set the trace bit if we're stepping */
++ if (remcom_in_buffer[0] == 's') {
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++ mtspr(SPRN_DBCR0,
++ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
++ linux_regs->msr |= MSR_DE;
++#else
++ linux_regs->msr |= MSR_SE;
++#endif
++ debugger_step = 1;
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step,
++ smp_processor_id());
++ }
++ return 0;
++ }
++
++ return -1;
++}
++
++/*
++ * Global data
++ */
++struct kgdb_arch arch_kgdb_ops = {
++ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
++};
++
++int kgdb_not_implemented(struct pt_regs *regs)
++{
++ return 0;
++}
++
++int kgdb_arch_init(void)
++{
++#ifdef CONFIG_XMON
++#error Both XMON and KGDB selected in .config. Unselect one of them.
++#endif
++
++ __debugger_ipi = kgdb_call_nmi_hook;
++ __debugger = kgdb_debugger;
++ __debugger_bpt = kgdb_breakpoint;
++ __debugger_sstep = kgdb_singlestep;
++ __debugger_iabr_match = kgdb_iabr_match;
++ __debugger_dabr_match = kgdb_dabr_match;
++ __debugger_fault_handler = kgdb_not_implemented;
++
++ return 0;
++}
++
++arch_initcall(kgdb_arch_init);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++ .text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++ mflr r0
++ stw r0,0(r3)
++ stw r1,4(r3)
++ stw r2,8(r3)
++ mfcr r0
++ stw r0,12(r3)
++ stmw r13,16(r3)
++ li r3,0
++ blr
++
++_GLOBAL(kgdb_fault_longjmp)
++ lmw r13,16(r3)
++ lwz r0,12(r3)
++ mtcrf 0x38,r0
++ lwz r0,0(r3)
++ lwz r1,4(r3)
++ lwz r2,8(r3)
++ mtlr r0
++ mr r3,r1
++ blr
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++ .text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++ mflr r0
++ std r0,0(r3)
++ std r1,8(r3)
++ std r2,16(r3)
++ mfcr r0
++ std r0,24(r3)
++ std r13,32(r3)
++ std r14,40(r3)
++ std r15,48(r3)
++ std r16,56(r3)
++ std r17,64(r3)
++ std r18,72(r3)
++ std r19,80(r3)
++ std r20,88(r3)
++ std r21,96(r3)
++ std r22,104(r3)
++ std r23,112(r3)
++ std r24,120(r3)
++ std r25,128(r3)
++ std r26,136(r3)
++ std r27,144(r3)
++ std r28,152(r3)
++ std r29,160(r3)
++ std r30,168(r3)
++ std r31,176(r3)
++ li r3,0
++ blr
++
++_GLOBAL(kgdb_fault_longjmp)
++ ld r13,32(r3)
++ ld r14,40(r3)
++ ld r15,48(r3)
++ ld r16,56(r3)
++ ld r17,64(r3)
++ ld r18,72(r3)
++ ld r19,80(r3)
++ ld r20,88(r3)
++ ld r21,96(r3)
++ ld r22,104(r3)
++ ld r23,112(r3)
++ ld r24,120(r3)
++ ld r25,128(r3)
++ ld r26,136(r3)
++ ld r27,144(r3)
++ ld r28,152(r3)
++ ld r29,160(r3)
++ ld r30,168(r3)
++ ld r31,176(r3)
++ ld r0,24(r3)
++ mtcrf 0x38,r0
++ ld r0,0(r3)
++ ld r1,8(r3)
++ ld r2,16(r3)
++ mtlr r0
++ mr r3,r1
++ blr
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c
+--- linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c 2007-12-21 15:36:11.000000000 -0500
+@@ -11,6 +11,9 @@
+ #include <asm/udbg.h>
+ #include <asm/pci-bridge.h>
+ #include <asm/ppc-pci.h>
++#ifdef CONFIG_KGDB_8250
++#include <linux/kgdb.h>
++#endif
+
+ #undef DEBUG
+
+@@ -487,6 +490,9 @@
+ fixup_port_pio(i, np, port);
+ if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI))
+ fixup_port_mmio(i, np, port);
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_platform_port(i, port);
++#endif
+ }
+
+ DBG("Registering platform serial ports\n");
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/misc_32.S linux-2.6.22-591/arch/powerpc/kernel/misc_32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/misc_32.S 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/misc_32.S 2007-12-21 15:36:11.000000000 -0500
+@@ -392,7 +392,7 @@
+ mtspr SPRN_L1CSR0,r3
+ isync
+ blr
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+ mfspr r3,SPRN_L1CSR1
+ ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
+ mtspr SPRN_L1CSR1,r3
+@@ -419,7 +419,7 @@
+ _GLOBAL(__flush_icache_range)
+ BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+@@ -514,8 +514,8 @@
+ */
+ _GLOBAL(__flush_dcache_icache)
+ BEGIN_FTR_SECTION
+- blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++ blr
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ rlwinm r3,r3,0,0,19 /* Get page base address */
+ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */
+ mtctr r4
+@@ -543,7 +543,7 @@
+ _GLOBAL(__flush_dcache_icache_phys)
+ BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ mfmsr r10
+ rlwinm r0,r10,0,28,26 /* clear DR */
+ mtmsr r0
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/of_platform.c linux-2.6.22-591/arch/powerpc/kernel/of_platform.c
+--- linux-2.6.22-570/arch/powerpc/kernel/of_platform.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/of_platform.c 2007-12-21 15:36:11.000000000 -0500
+@@ -427,14 +427,6 @@
+ /* Process "ranges" property */
+ pci_process_bridge_OF_ranges(phb, dev->node, 0);
+
+- /* Setup IO space. We use the non-dynamic version of that code here,
+- * which doesn't quite support unplugging. Next kernel release will
+- * have a better fix for this.
+- * Note also that we don't do ISA, this will also be fixed with a
+- * more massive rework.
+- */
+- pci_setup_phb_io(phb, pci_io_base == 0);
+-
+ /* Init pci_dn data structures */
+ pci_devs_phb_init_dynamic(phb);
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/pci_64.c linux-2.6.22-591/arch/powerpc/kernel/pci_64.c
+--- linux-2.6.22-570/arch/powerpc/kernel/pci_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/pci_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -11,7 +11,7 @@
+ * 2 of the License, or (at your option) any later version.
+ */
+
+-#undef DEBUG
++#define DEBUG
+
+ #include <linux/kernel.h>
+ #include <linux/pci.h>
+@@ -22,6 +22,7 @@
+ #include <linux/list.h>
+ #include <linux/syscalls.h>
+ #include <linux/irq.h>
++#include <linux/vmalloc.h>
+
+ #include <asm/processor.h>
+ #include <asm/io.h>
+@@ -41,35 +42,26 @@
+
+ unsigned long pci_probe_only = 1;
+ int pci_assign_all_buses = 0;
+-static int pci_initial_scan_done;
+
+ static void fixup_resource(struct resource *res, struct pci_dev *dev);
+ static void do_bus_setup(struct pci_bus *bus);
+-static void phbs_remap_io(void);
+
+ /* pci_io_base -- the base address from which io bars are offsets.
+ * This is the lowest I/O base address (so bar values are always positive),
+ * and it *must* be the start of ISA space if an ISA bus exists because
+- * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
+- * page is mapped and isa_io_limit prevents access to it.
++ * ISA drivers use hard coded offsets. If no ISA bus exists nothing
++ * is mapped on the first 64K of IO space
+ */
+-unsigned long isa_io_base; /* NULL if no ISA bus */
+-EXPORT_SYMBOL(isa_io_base);
+-unsigned long pci_io_base;
++unsigned long pci_io_base = ISA_IO_BASE;
+ EXPORT_SYMBOL(pci_io_base);
+
+-void iSeries_pcibios_init(void);
+-
+ LIST_HEAD(hose_list);
+
+ static struct dma_mapping_ops *pci_dma_ops;
+
++/* XXX kill that some day ... */
+ int global_phb_number; /* Global phb counter */
+
+-/* Cached ISA bridge dev. */
+-struct pci_dev *ppc64_isabridge_dev = NULL;
+-EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
+-
+ void set_pci_dma_ops(struct dma_mapping_ops *dma_ops)
+ {
+ pci_dma_ops = dma_ops;
+@@ -100,7 +92,7 @@
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+- offset = (unsigned long)hose->io_base_virt - pci_io_base;
++ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+@@ -119,7 +111,7 @@
+ return;
+
+ if (res->flags & IORESOURCE_IO)
+- offset = (unsigned long)hose->io_base_virt - pci_io_base;
++ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ if (res->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+@@ -156,7 +148,7 @@
+
+ if (res->flags & IORESOURCE_IO) {
+ unsigned long offset = (unsigned long)hose->io_base_virt -
+- pci_io_base;
++ _IO_BASE;
+ /* Make sure we start at our min on all hoses */
+ if (start - offset < PCIBIOS_MIN_IO)
+ start = PCIBIOS_MIN_IO + offset;
+@@ -535,10 +527,16 @@
+ bus->secondary = hose->first_busno;
+ hose->bus = bus;
+
++ if (!firmware_has_feature(FW_FEATURE_ISERIES))
++ pcibios_map_io_space(bus);
++
+ bus->resource[0] = res = &hose->io_resource;
+- if (res->flags && request_resource(&ioport_resource, res))
++ if (res->flags && request_resource(&ioport_resource, res)) {
+ printk(KERN_ERR "Failed to request PCI IO region "
+ "on PCI domain %04x\n", hose->global_number);
++ DBG("res->start = 0x%016lx, res->end = 0x%016lx\n",
++ res->start, res->end);
++ }
+
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+@@ -596,17 +594,6 @@
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
+- /* Cache the location of the ISA bridge (if we have one) */
+- ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+- if (ppc64_isabridge_dev != NULL)
+- printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+-
+- if (!firmware_has_feature(FW_FEATURE_ISERIES))
+- /* map in PCI I/O space */
+- phbs_remap_io();
+-
+- pci_initial_scan_done = 1;
+-
+ printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+
+ return 0;
+@@ -711,7 +698,7 @@
+ #endif
+ res_bit = IORESOURCE_MEM;
+ } else {
+- io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
++ io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+@@ -881,76 +868,6 @@
+ device_create_file(&pdev->dev, &dev_attr_devspec);
+ }
+
+-#define ISA_SPACE_MASK 0x1
+-#define ISA_SPACE_IO 0x1
+-
+-static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+- unsigned long phb_io_base_phys,
+- void __iomem * phb_io_base_virt)
+-{
+- /* Remove these asap */
+-
+- struct pci_address {
+- u32 a_hi;
+- u32 a_mid;
+- u32 a_lo;
+- };
+-
+- struct isa_address {
+- u32 a_hi;
+- u32 a_lo;
+- };
+-
+- struct isa_range {
+- struct isa_address isa_addr;
+- struct pci_address pci_addr;
+- unsigned int size;
+- };
+-
+- const struct isa_range *range;
+- unsigned long pci_addr;
+- unsigned int isa_addr;
+- unsigned int size;
+- int rlen = 0;
+-
+- range = of_get_property(isa_node, "ranges", &rlen);
+- if (range == NULL || (rlen < sizeof(struct isa_range))) {
+- printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+- "mapping 64k\n");
+- __ioremap_explicit(phb_io_base_phys,
+- (unsigned long)phb_io_base_virt,
+- 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+- return;
+- }
+-
+- /* From "ISA Binding to 1275"
+- * The ranges property is laid out as an array of elements,
+- * each of which comprises:
+- * cells 0 - 1: an ISA address
+- * cells 2 - 4: a PCI address
+- * (size depending on dev->n_addr_cells)
+- * cell 5: the size of the range
+- */
+- if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+- isa_addr = range->isa_addr.a_lo;
+- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+- range->pci_addr.a_lo;
+-
+- /* Assume these are both zero */
+- if ((pci_addr != 0) || (isa_addr != 0)) {
+- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+- __FUNCTION__);
+- return;
+- }
+-
+- size = PAGE_ALIGN(range->size);
+-
+- __ioremap_explicit(phb_io_base_phys,
+- (unsigned long) phb_io_base_virt,
+- size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+- }
+-}
+-
+ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int prim)
+ {
+@@ -1045,155 +962,122 @@
+ }
+ }
+
+-void __devinit pci_setup_phb_io(struct pci_controller *hose, int primary)
+-{
+- unsigned long size = hose->pci_io_size;
+- unsigned long io_virt_offset;
+- struct resource *res;
+- struct device_node *isa_dn;
+-
+- if (size == 0)
+- return;
+-
+- hose->io_base_virt = reserve_phb_iospace(size);
+- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+- hose->global_number, hose->io_base_phys,
+- (unsigned long) hose->io_base_virt);
+-
+- if (primary) {
+- pci_io_base = (unsigned long)hose->io_base_virt;
+- isa_dn = of_find_node_by_type(NULL, "isa");
+- if (isa_dn) {
+- isa_io_base = pci_io_base;
+- pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+- hose->io_base_virt);
+- of_node_put(isa_dn);
+- }
+- }
+-
+- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+- res = &hose->io_resource;
+- res->start += io_virt_offset;
+- res->end += io_virt_offset;
+-
+- /* If this is called after the initial PCI scan, then we need to
+- * proceed to IO mappings now
+- */
+- if (pci_initial_scan_done)
+- __ioremap_explicit(hose->io_base_phys,
+- (unsigned long)hose->io_base_virt,
+- hose->pci_io_size,
+- _PAGE_NO_CACHE | _PAGE_GUARDED);
+-}
++#ifdef CONFIG_HOTPLUG
+
+-void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+- int primary)
++int pcibios_unmap_io_space(struct pci_bus *bus)
+ {
+- unsigned long size = hose->pci_io_size;
+- unsigned long io_virt_offset;
+- struct resource *res;
++ struct pci_controller *hose;
+
+- if (size == 0)
+- return;
++ WARN_ON(bus == NULL);
+
+- hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+- _PAGE_NO_CACHE | _PAGE_GUARDED);
+- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+- hose->global_number, hose->io_base_phys,
+- (unsigned long) hose->io_base_virt);
+-
+- if (primary)
+- pci_io_base = (unsigned long)hose->io_base_virt;
+-
+- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+- res = &hose->io_resource;
+- res->start += io_virt_offset;
+- res->end += io_virt_offset;
+-}
++ /* If this is not a PHB, we only flush the hash table over
++ * the area mapped by this bridge. We don't play with the PTE
++ * mappings since we might have to deal with sub-page alignemnts
++ * so flushing the hash table is the only sane way to make sure
++ * that no hash entries are covering that removed bridge area
++ * while still allowing other busses overlapping those pages
++ */
++ if (bus->self) {
++ struct resource *res = bus->resource[0];
+
++ DBG("IO unmapping for PCI-PCI bridge %s\n",
++ pci_name(bus->self));
+
+-static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+- unsigned long *start_virt, unsigned long *size)
+-{
+- struct pci_controller *hose = pci_bus_to_host(bus);
+- struct resource *res;
++ __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
++ res->end - res->start + 1);
++ return 0;
++ }
+
+- if (bus->self)
+- res = bus->resource[0];
+- else
+- /* Root Bus */
+- res = &hose->io_resource;
++ /* Get the host bridge */
++ hose = pci_bus_to_host(bus);
+
+- if (res->end == 0 && res->start == 0)
+- return 1;
++ /* Check if we have IOs allocated */
++ if (hose->io_base_alloc == 0)
++ return 0;
+
+- *start_virt = pci_io_base + res->start;
+- *start_phys = *start_virt + hose->io_base_phys
+- - (unsigned long) hose->io_base_virt;
++ DBG("IO unmapping for PHB %s\n",
++ ((struct device_node *)hose->arch_data)->full_name);
++ DBG(" alloc=0x%p\n", hose->io_base_alloc);
+
+- if (res->end > res->start)
+- *size = res->end - res->start + 1;
+- else {
+- printk("%s(): unexpected region 0x%lx->0x%lx\n",
+- __FUNCTION__, res->start, res->end);
+- return 1;
+- }
++ /* This is a PHB, we fully unmap the IO area */
++ vunmap(hose->io_base_alloc);
+
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
+
+-int unmap_bus_range(struct pci_bus *bus)
+-{
+- unsigned long start_phys;
+- unsigned long start_virt;
+- unsigned long size;
++#endif /* CONFIG_HOTPLUG */
+
+- if (!bus) {
+- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+- return 1;
+- }
++int __devinit pcibios_map_io_space(struct pci_bus *bus)
++{
++ struct vm_struct *area;
++ unsigned long phys_page;
++ unsigned long size_page;
++ unsigned long io_virt_offset;
++ struct pci_controller *hose;
+
+- if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+- return 1;
+- if (__iounmap_explicit((void __iomem *) start_virt, size))
+- return 1;
++ WARN_ON(bus == NULL);
+
++ /* If this not a PHB, nothing to do, page tables still exist and
++ * thus HPTEs will be faulted in when needed
++ */
++ if (bus->self) {
++ DBG("IO mapping for PCI-PCI bridge %s\n",
++ pci_name(bus->self));
++ DBG(" virt=0x%016lx...0x%016lx\n",
++ bus->resource[0]->start + _IO_BASE,
++ bus->resource[0]->end + _IO_BASE);
+ return 0;
+-}
+-EXPORT_SYMBOL(unmap_bus_range);
+-
+-int remap_bus_range(struct pci_bus *bus)
+-{
+- unsigned long start_phys;
+- unsigned long start_virt;
+- unsigned long size;
+-
+- if (!bus) {
+- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+- return 1;
+ }
+
++ /* Get the host bridge */
++ hose = pci_bus_to_host(bus);
++ phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
++ size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
+
+- if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+- return 1;
+- if (start_phys == 0)
+- return 1;
+- printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+- if (__ioremap_explicit(start_phys, start_virt, size,
+- _PAGE_NO_CACHE | _PAGE_GUARDED))
+- return 1;
++ /* Make sure IO area address is clear */
++ hose->io_base_alloc = NULL;
+
++ /* If there's no IO to map on that bus, get away too */
++ if (hose->pci_io_size == 0 || hose->io_base_phys == 0)
+ return 0;
+-}
+-EXPORT_SYMBOL(remap_bus_range);
+
+-static void phbs_remap_io(void)
+-{
+- struct pci_controller *hose, *tmp;
++ /* Let's allocate some IO space for that guy. We don't pass
++ * VM_IOREMAP because we don't care about alignment tricks that
++ * the core does in that case. Maybe we should due to stupid card
++ * with incomplete address decoding but I'd rather not deal with
++ * those outside of the reserved 64K legacy region.
++ */
++ area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
++ if (area == NULL)
++ return -ENOMEM;
++ hose->io_base_alloc = area->addr;
++ hose->io_base_virt = (void __iomem *)(area->addr +
++ hose->io_base_phys - phys_page);
++
++ DBG("IO mapping for PHB %s\n",
++ ((struct device_node *)hose->arch_data)->full_name);
++ DBG(" phys=0x%016lx, virt=0x%p (alloc=0x%p)\n",
++ hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
++ DBG(" size=0x%016lx (alloc=0x%016lx)\n",
++ hose->pci_io_size, size_page);
++
++ /* Establish the mapping */
++ if (__ioremap_at(phys_page, area->addr, size_page,
++ _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
++ return -ENOMEM;
++
++ /* Fixup hose IO resource */
++ io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
++ hose->io_resource.start += io_virt_offset;
++ hose->io_resource.end += io_virt_offset;
+
+- list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+- remap_bus_range(hose->bus);
++ DBG(" hose->io_resource=0x%016lx...0x%016lx\n",
++ hose->io_resource.start, hose->io_resource.end);
++
++ return 0;
+ }
++EXPORT_SYMBOL_GPL(pcibios_map_io_space);
+
+ static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+ {
+@@ -1201,8 +1085,7 @@
+ unsigned long offset;
+
+ if (res->flags & IORESOURCE_IO) {
+- offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-
++ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ res->start += offset;
+ res->end += offset;
+ } else if (res->flags & IORESOURCE_MEM) {
+@@ -1217,9 +1100,20 @@
+ /* Update device resources. */
+ int i;
+
+- for (i = 0; i < PCI_NUM_RESOURCES; i++)
+- if (dev->resource[i].flags)
+- fixup_resource(&dev->resource[i], dev);
++ DBG("%s: Fixup resources:\n", pci_name(dev));
++ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++ struct resource *res = &dev->resource[i];
++ if (!res->flags)
++ continue;
++
++ DBG(" 0x%02x < %08lx:0x%016lx...0x%016lx\n",
++ i, res->flags, res->start, res->end);
++
++ fixup_resource(res, dev);
++
++ DBG(" > %08lx:0x%016lx...0x%016lx\n",
++ res->flags, res->start, res->end);
++ }
+ }
+ EXPORT_SYMBOL(pcibios_fixup_device_resources);
+
+@@ -1360,7 +1254,7 @@
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+- offset = (unsigned long)hose->io_base_virt - pci_io_base;
++ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ /* We pass a fully fixed up address to userland for MMIO instead of
+ * a BAR value because X is lame and expects to be able to use that
+@@ -1410,7 +1304,7 @@
+ if (address >= hose->io_base_phys &&
+ address < (hose->io_base_phys + hose->pci_io_size)) {
+ unsigned long base =
+- (unsigned long)hose->io_base_virt - pci_io_base;
++ (unsigned long)hose->io_base_virt - _IO_BASE;
+ return base + (address - hose->io_base_phys);
+ }
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/process.c linux-2.6.22-591/arch/powerpc/kernel/process.c
+--- linux-2.6.22-570/arch/powerpc/kernel/process.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/process.c 2007-12-21 15:36:11.000000000 -0500
+@@ -219,22 +219,26 @@
+ }
+ #endif /* CONFIG_SMP */
+
+-#ifdef CONFIG_PPC_MERGE /* XXX for now */
+ int set_dabr(unsigned long dabr)
+ {
++#ifdef CONFIG_PPC_MERGE /* XXX for now */
+ if (ppc_md.set_dabr)
+ return ppc_md.set_dabr(dabr);
++#endif
+
++ /* XXX should we have a CPU_FTR_HAS_DABR ? */
++#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+ mtspr(SPRN_DABR, dabr);
++#endif
+ return 0;
+ }
+-#endif
+
+ #ifdef CONFIG_PPC64
+ DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
+-static DEFINE_PER_CPU(unsigned long, current_dabr);
+ #endif
+
++static DEFINE_PER_CPU(unsigned long, current_dabr);
++
+ struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *new)
+ {
+@@ -299,12 +303,10 @@
+
+ #endif /* CONFIG_SMP */
+
+-#ifdef CONFIG_PPC64 /* for now */
+ if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) {
+ set_dabr(new->thread.dabr);
+ __get_cpu_var(current_dabr) = new->thread.dabr;
+ }
+-#endif /* CONFIG_PPC64 */
+
+ new_thread = &new->thread;
+ old_thread = ¤t->thread;
+@@ -474,12 +476,10 @@
+
+ discard_lazy_cpu_state();
+
+-#ifdef CONFIG_PPC64 /* for now */
+ if (current->thread.dabr) {
+ current->thread.dabr = 0;
+ set_dabr(0);
+ }
+-#endif
+ }
+
+ void
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/prom_init.c linux-2.6.22-591/arch/powerpc/kernel/prom_init.c
+--- linux-2.6.22-570/arch/powerpc/kernel/prom_init.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/prom_init.c 2007-12-21 15:36:11.000000000 -0500
+@@ -635,6 +635,7 @@
+ /* ibm,dynamic-reconfiguration-memory property supported */
+ #define OV5_DRCONF_MEMORY 0x20
+ #define OV5_LARGE_PAGES 0x10 /* large pages supported */
++#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */
+ /* PCIe/MSI support. Without MSI full PCIe is not supported */
+ #ifdef CONFIG_PCI_MSI
+ #define OV5_MSI 0x01 /* PCIe/MSI support */
+@@ -685,7 +686,8 @@
+ /* option vector 5: PAPR/OF options */
+ 3 - 2, /* length */
+ 0, /* don't ignore, don't halt */
+- OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_MSI,
++ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
++ OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+ };
+
+ /* Old method - ELF header with PT_NOTE sections */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,161 +0,0 @@
+-/*
+- * Copyright (c) 2002 Stephen Rothwell, IBM Coproration
+- * Extracted from ptrace.c and ptrace32.c
+- *
+- * This file is subject to the terms and conditions of the GNU General
+- * Public License. See the file README.legal in the main directory of
+- * this archive for more details.
+- */
+-
+-#ifndef _PPC64_PTRACE_COMMON_H
+-#define _PPC64_PTRACE_COMMON_H
+-
+-#include <asm/system.h>
+-
+-/*
+- * Set of msr bits that gdb can change on behalf of a process.
+- */
+-#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1)
+-
+-/*
+- * Get contents of register REGNO in task TASK.
+- */
+-static inline unsigned long get_reg(struct task_struct *task, int regno)
+-{
+- unsigned long tmp = 0;
+-
+- /*
+- * Put the correct FP bits in, they might be wrong as a result
+- * of our lazy FP restore.
+- */
+- if (regno == PT_MSR) {
+- tmp = ((unsigned long *)task->thread.regs)[PT_MSR];
+- tmp |= task->thread.fpexc_mode;
+- } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+- tmp = ((unsigned long *)task->thread.regs)[regno];
+- }
+-
+- return tmp;
+-}
+-
+-/*
+- * Write contents of register REGNO in task TASK.
+- */
+-static inline int put_reg(struct task_struct *task, int regno,
+- unsigned long data)
+-{
+- if (regno < PT_SOFTE) {
+- if (regno == PT_MSR)
+- data = (data & MSR_DEBUGCHANGE)
+- | (task->thread.regs->msr & ~MSR_DEBUGCHANGE);
+- ((unsigned long *)task->thread.regs)[regno] = data;
+- return 0;
+- }
+- return -EIO;
+-}
+-
+-static inline void set_single_step(struct task_struct *task)
+-{
+- struct pt_regs *regs = task->thread.regs;
+- if (regs != NULL)
+- regs->msr |= MSR_SE;
+- set_tsk_thread_flag(task, TIF_SINGLESTEP);
+-}
+-
+-static inline void clear_single_step(struct task_struct *task)
+-{
+- struct pt_regs *regs = task->thread.regs;
+- if (regs != NULL)
+- regs->msr &= ~MSR_SE;
+- clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+-}
+-
+-#ifdef CONFIG_ALTIVEC
+-/*
+- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+- * The transfer totals 34 quadword. Quadwords 0-31 contain the
+- * corresponding vector registers. Quadword 32 contains the vscr as the
+- * last word (offset 12) within that quadword. Quadword 33 contains the
+- * vrsave as the first word (offset 0) within the quadword.
+- *
+- * This definition of the VMX state is compatible with the current PPC32
+- * ptrace interface. This allows signal handling and ptrace to use the
+- * same structures. This also simplifies the implementation of a bi-arch
+- * (combined (32- and 64-bit) gdb.
+- */
+-
+-/*
+- * Get contents of AltiVec register state in task TASK
+- */
+-static inline int get_vrregs(unsigned long __user *data,
+- struct task_struct *task)
+-{
+- unsigned long regsize;
+-
+- /* copy AltiVec registers VR[0] .. VR[31] */
+- regsize = 32 * sizeof(vector128);
+- if (copy_to_user(data, task->thread.vr, regsize))
+- return -EFAULT;
+- data += (regsize / sizeof(unsigned long));
+-
+- /* copy VSCR */
+- regsize = 1 * sizeof(vector128);
+- if (copy_to_user(data, &task->thread.vscr, regsize))
+- return -EFAULT;
+- data += (regsize / sizeof(unsigned long));
+-
+- /* copy VRSAVE */
+- if (put_user(task->thread.vrsave, (u32 __user *)data))
+- return -EFAULT;
+-
+- return 0;
+-}
+-
+-/*
+- * Write contents of AltiVec register state into task TASK.
+- */
+-static inline int set_vrregs(struct task_struct *task,
+- unsigned long __user *data)
+-{
+- unsigned long regsize;
+-
+- /* copy AltiVec registers VR[0] .. VR[31] */
+- regsize = 32 * sizeof(vector128);
+- if (copy_from_user(task->thread.vr, data, regsize))
+- return -EFAULT;
+- data += (regsize / sizeof(unsigned long));
+-
+- /* copy VSCR */
+- regsize = 1 * sizeof(vector128);
+- if (copy_from_user(&task->thread.vscr, data, regsize))
+- return -EFAULT;
+- data += (regsize / sizeof(unsigned long));
+-
+- /* copy VRSAVE */
+- if (get_user(task->thread.vrsave, (u32 __user *)data))
+- return -EFAULT;
+-
+- return 0;
+-}
+-#endif
+-
+-static inline int ptrace_set_debugreg(struct task_struct *task,
+- unsigned long addr, unsigned long data)
+-{
+- /* We only support one DABR and no IABRS at the moment */
+- if (addr > 0)
+- return -EINVAL;
+-
+- /* The bottom 3 bits are flags */
+- if ((data & ~0x7UL) >= TASK_SIZE)
+- return -EIO;
+-
+- /* Ensure translation is on */
+- if (data && !(data & DABR_TRANSLATION))
+- return -EIO;
+-
+- task->thread.dabr = data;
+- return 0;
+-}
+-
+-#endif /* _PPC64_PTRACE_COMMON_H */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace.c linux-2.6.22-591/arch/powerpc/kernel/ptrace.c
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace.c 2007-12-21 15:36:11.000000000 -0500
+@@ -35,11 +35,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/system.h>
+
+-#ifdef CONFIG_PPC64
+-#include "ptrace-common.h"
+-#endif
++/*
++ * does not yet catch signals sent when the child dies.
++ * in exit.c or in signal.c.
++ */
+
+-#ifdef CONFIG_PPC32
+ /*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+@@ -48,65 +48,117 @@
+ #else
+ #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+ #endif
+-#endif /* CONFIG_PPC32 */
+
+ /*
+- * does not yet catch signals sent when the child dies.
+- * in exit.c or in signal.c.
++ * Max register writeable via put_reg
+ */
+-
+ #ifdef CONFIG_PPC32
++#define PT_MAX_PUT_REG PT_MQ
++#else
++#define PT_MAX_PUT_REG PT_CCR
++#endif
++
+ /*
+ * Get contents of register REGNO in task TASK.
+ */
+-static inline unsigned long get_reg(struct task_struct *task, int regno)
++unsigned long ptrace_get_reg(struct task_struct *task, int regno)
+ {
+- if (regno < sizeof(struct pt_regs) / sizeof(unsigned long)
+- && task->thread.regs != NULL)
++ unsigned long tmp = 0;
++
++ if (task->thread.regs == NULL)
++ return -EIO;
++
++ if (regno == PT_MSR) {
++ tmp = ((unsigned long *)task->thread.regs)[PT_MSR];
++ return tmp | task->thread.fpexc_mode;
++ }
++
++ if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long)))
+ return ((unsigned long *)task->thread.regs)[regno];
+- return (0);
++
++ return -EIO;
+ }
+
+ /*
+ * Write contents of register REGNO in task TASK.
+ */
+-static inline int put_reg(struct task_struct *task, int regno,
+- unsigned long data)
++int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+ {
+- if (regno <= PT_MQ && task->thread.regs != NULL) {
++ if (task->thread.regs == NULL)
++ return -EIO;
++
++ if (regno <= PT_MAX_PUT_REG || regno == PT_TRAP) {
+ if (regno == PT_MSR)
+ data = (data & MSR_DEBUGCHANGE)
+ | (task->thread.regs->msr & ~MSR_DEBUGCHANGE);
++ /* We prevent mucking around with the reserved area of trap
++ * which are used internally by the kernel
++ */
++ if (regno == PT_TRAP)
++ data &= 0xfff0;
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+ }
+
++
++static int get_fpregs(void __user *data, struct task_struct *task,
++ int has_fpscr)
++{
++ unsigned int count = has_fpscr ? 33 : 32;
++
++ if (copy_to_user(data, task->thread.fpr, count * sizeof(double)))
++ return -EFAULT;
++ return 0;
++}
++
++static int set_fpregs(void __user *data, struct task_struct *task,
++ int has_fpscr)
++{
++ unsigned int count = has_fpscr ? 33 : 32;
++
++ if (copy_from_user(task->thread.fpr, data, count * sizeof(double)))
++ return -EFAULT;
++ return 0;
++}
++
++
+ #ifdef CONFIG_ALTIVEC
+ /*
++ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
++ * The transfer totals 34 quadword. Quadwords 0-31 contain the
++ * corresponding vector registers. Quadword 32 contains the vscr as the
++ * last word (offset 12) within that quadword. Quadword 33 contains the
++ * vrsave as the first word (offset 0) within the quadword.
++ *
++ * This definition of the VMX state is compatible with the current PPC32
++ * ptrace interface. This allows signal handling and ptrace to use the
++ * same structures. This also simplifies the implementation of a bi-arch
++ * (combined (32- and 64-bit) gdb.
++ */
++
++/*
+ * Get contents of AltiVec register state in task TASK
+ */
+-static inline int get_vrregs(unsigned long __user *data, struct task_struct *task)
++static int get_vrregs(unsigned long __user *data, struct task_struct *task)
+ {
+- int i, j;
+-
+- if (!access_ok(VERIFY_WRITE, data, 133 * sizeof(unsigned long)))
+- return -EFAULT;
++ unsigned long regsize;
+
+ /* copy AltiVec registers VR[0] .. VR[31] */
+- for (i = 0; i < 32; i++)
+- for (j = 0; j < 4; j++, data++)
+- if (__put_user(task->thread.vr[i].u[j], data))
++ regsize = 32 * sizeof(vector128);
++ if (copy_to_user(data, task->thread.vr, regsize))
+ return -EFAULT;
++ data += (regsize / sizeof(unsigned long));
+
+ /* copy VSCR */
+- for (i = 0; i < 4; i++, data++)
+- if (__put_user(task->thread.vscr.u[i], data))
++ regsize = 1 * sizeof(vector128);
++ if (copy_to_user(data, &task->thread.vscr, regsize))
+ return -EFAULT;
++ data += (regsize / sizeof(unsigned long));
+
+ /* copy VRSAVE */
+- if (__put_user(task->thread.vrsave, data))
++ if (put_user(task->thread.vrsave, (u32 __user *)data))
+ return -EFAULT;
+
+ return 0;
+@@ -115,31 +167,29 @@
+ /*
+ * Write contents of AltiVec register state into task TASK.
+ */
+-static inline int set_vrregs(struct task_struct *task, unsigned long __user *data)
++static int set_vrregs(struct task_struct *task, unsigned long __user *data)
+ {
+- int i, j;
+-
+- if (!access_ok(VERIFY_READ, data, 133 * sizeof(unsigned long)))
+- return -EFAULT;
++ unsigned long regsize;
+
+ /* copy AltiVec registers VR[0] .. VR[31] */
+- for (i = 0; i < 32; i++)
+- for (j = 0; j < 4; j++, data++)
+- if (__get_user(task->thread.vr[i].u[j], data))
++ regsize = 32 * sizeof(vector128);
++ if (copy_from_user(task->thread.vr, data, regsize))
+ return -EFAULT;
++ data += (regsize / sizeof(unsigned long));
+
+ /* copy VSCR */
+- for (i = 0; i < 4; i++, data++)
+- if (__get_user(task->thread.vscr.u[i], data))
++ regsize = 1 * sizeof(vector128);
++ if (copy_from_user(&task->thread.vscr, data, regsize))
+ return -EFAULT;
++ data += (regsize / sizeof(unsigned long));
+
+ /* copy VRSAVE */
+- if (__get_user(task->thread.vrsave, data))
++ if (get_user(task->thread.vrsave, (u32 __user *)data))
+ return -EFAULT;
+
+ return 0;
+ }
+-#endif
++#endif /* CONFIG_ALTIVEC */
+
+ #ifdef CONFIG_SPE
+
+@@ -156,7 +206,7 @@
+ /*
+ * Get contents of SPE register state in task TASK.
+ */
+-static inline int get_evrregs(unsigned long *data, struct task_struct *task)
++static int get_evrregs(unsigned long *data, struct task_struct *task)
+ {
+ int i;
+
+@@ -182,7 +232,7 @@
+ /*
+ * Write contents of SPE register state into task TASK.
+ */
+-static inline int set_evrregs(struct task_struct *task, unsigned long *data)
++static int set_evrregs(struct task_struct *task, unsigned long *data)
+ {
+ int i;
+
+@@ -205,8 +255,8 @@
+ }
+ #endif /* CONFIG_SPE */
+
+-static inline void
+-set_single_step(struct task_struct *task)
++
++static void set_single_step(struct task_struct *task)
+ {
+ struct pt_regs *regs = task->thread.regs;
+
+@@ -221,8 +271,7 @@
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+ }
+
+-static inline void
+-clear_single_step(struct task_struct *task)
++static void clear_single_step(struct task_struct *task)
+ {
+ struct pt_regs *regs = task->thread.regs;
+
+@@ -236,7 +285,25 @@
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+ }
+-#endif /* CONFIG_PPC32 */
++
++static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
++ unsigned long data)
++{
++ /* We only support one DABR and no IABRS at the moment */
++ if (addr > 0)
++ return -EINVAL;
++
++ /* The bottom 3 bits are flags */
++ if ((data & ~0x7UL) >= TASK_SIZE)
++ return -EIO;
++
++ /* Ensure translation is on */
++ if (data && !(data & DABR_TRANSLATION))
++ return -EIO;
++
++ task->thread.dabr = data;
++ return 0;
++}
+
+ /*
+ * Called by kernel/ptrace.c when detaching..
+@@ -249,6 +316,62 @@
+ clear_single_step(child);
+ }
+
++/*
++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
++ * we mark them as obsolete now, they will be removed in a future version
++ */
++static long arch_ptrace_old(struct task_struct *child, long request, long addr,
++ long data)
++{
++ int ret = -EPERM;
++
++ switch(request) {
++ case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
++ int i;
++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++ unsigned long __user *tmp = (unsigned long __user *)addr;
++
++ for (i = 0; i < 32; i++) {
++ ret = put_user(*reg, tmp);
++ if (ret)
++ break;
++ reg++;
++ tmp++;
++ }
++ break;
++ }
++
++ case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
++ int i;
++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++ unsigned long __user *tmp = (unsigned long __user *)addr;
++
++ for (i = 0; i < 32; i++) {
++ ret = get_user(*reg, tmp);
++ if (ret)
++ break;
++ reg++;
++ tmp++;
++ }
++ break;
++ }
++
++ case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
++ flush_fp_to_thread(child);
++ ret = get_fpregs((void __user *)addr, child, 0);
++ break;
++ }
++
++ case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
++ flush_fp_to_thread(child);
++ ret = set_fpregs((void __user *)addr, child, 0);
++ break;
++ }
++
++ }
++ return ret;
++}
++
+ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+ {
+ int ret = -EPERM;
+@@ -284,11 +407,9 @@
+ #endif
+ break;
+
+-#ifdef CONFIG_PPC32
+ CHECK_FULL_REGS(child->thread.regs);
+-#endif
+ if (index < PT_FPR0) {
+- tmp = get_reg(child, (int) index);
++ tmp = ptrace_get_reg(child, (int) index);
+ } else {
+ flush_fp_to_thread(child);
+ tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0];
+@@ -323,13 +444,9 @@
+ #endif
+ break;
+
+-#ifdef CONFIG_PPC32
+ CHECK_FULL_REGS(child->thread.regs);
+-#endif
+- if (index == PT_ORIG_R3)
+- break;
+ if (index < PT_FPR0) {
+- ret = put_reg(child, index, data);
++ ret = ptrace_put_reg(child, index, data);
+ } else {
+ flush_fp_to_thread(child);
+ ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data;
+@@ -384,7 +501,6 @@
+ break;
+ }
+
+-#ifdef CONFIG_PPC64
+ case PTRACE_GET_DEBUGREG: {
+ ret = -EINVAL;
+ /* We only support one DABR and no IABRS at the moment */
+@@ -398,73 +514,61 @@
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+-#endif
+
+ case PTRACE_DETACH:
+ ret = ptrace_detach(child, data);
+ break;
+
+- case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+- unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+- for (i = 0; i < 32; i++) {
+- ret = put_user(*reg, tmp);
+- if (ret)
++#ifdef CONFIG_PPC64
++ case PTRACE_GETREGS64:
++#endif
++ case PTRACE_GETREGS: { /* Get all pt_regs from the child. */
++ int ui;
++ if (!access_ok(VERIFY_WRITE, (void __user *)data,
++ sizeof(struct pt_regs))) {
++ ret = -EIO;
+ break;
+- reg++;
+- tmp++;
+ }
+- break;
++ ret = 0;
++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++ ret |= __put_user(ptrace_get_reg(child, ui),
++ (unsigned long __user *) data);
++ data += sizeof(long);
+ }
+-
+- case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+- unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+- for (i = 0; i < 32; i++) {
+- ret = get_user(*reg, tmp);
+- if (ret)
+ break;
+- reg++;
+- tmp++;
+ }
++
++#ifdef CONFIG_PPC64
++ case PTRACE_SETREGS64:
++#endif
++ case PTRACE_SETREGS: { /* Set all gp regs in the child. */
++ unsigned long tmp;
++ int ui;
++ if (!access_ok(VERIFY_READ, (void __user *)data,
++ sizeof(struct pt_regs))) {
++ ret = -EIO;
+ break;
+ }
+-
+- case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+- unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+- flush_fp_to_thread(child);
+-
+- for (i = 0; i < 32; i++) {
+- ret = put_user(*reg, tmp);
++ ret = 0;
++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++ ret = __get_user(tmp, (unsigned long __user *) data);
+ if (ret)
+ break;
+- reg++;
+- tmp++;
++ ptrace_put_reg(child, ui, tmp);
++ data += sizeof(long);
+ }
+ break;
+ }
+
+- case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+- unsigned long __user *tmp = (unsigned long __user *)addr;
+-
++ case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */
+ flush_fp_to_thread(child);
+-
+- for (i = 0; i < 32; i++) {
+- ret = get_user(*reg, tmp);
+- if (ret)
++ ret = get_fpregs((void __user *)data, child, 1);
+ break;
+- reg++;
+- tmp++;
+ }
++
++ case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */
++ flush_fp_to_thread(child);
++ ret = set_fpregs((void __user *)data, child, 1);
+ break;
+ }
+
+@@ -499,11 +603,18 @@
+ break;
+ #endif
+
++ /* Old reverse args ptrace callss */
++ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
++ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
++ case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
++ case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */
++ ret = arch_ptrace_old(child, request, addr, data);
++ break;
++
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+-
+ return ret;
+ }
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -33,13 +33,55 @@
+ #include <asm/pgtable.h>
+ #include <asm/system.h>
+
+-#include "ptrace-common.h"
+-
+ /*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
++/*
++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
++ * we mark them as obsolete now, they will be removed in a future version
++ */
++static long compat_ptrace_old(struct task_struct *child, long request,
++ long addr, long data)
++{
++ int ret = -EPERM;
++
++ switch(request) {
++ case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
++ int i;
++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++ unsigned int __user *tmp = (unsigned int __user *)addr;
++
++ for (i = 0; i < 32; i++) {
++ ret = put_user(*reg, tmp);
++ if (ret)
++ break;
++ reg++;
++ tmp++;
++ }
++ break;
++ }
++
++ case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
++ int i;
++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++ unsigned int __user *tmp = (unsigned int __user *)addr;
++
++ for (i = 0; i < 32; i++) {
++ ret = get_user(*reg, tmp);
++ if (ret)
++ break;
++ reg++;
++ tmp++;
++ }
++ break;
++ }
++
++ }
++ return ret;
++}
++
+ long compat_sys_ptrace(int request, int pid, unsigned long addr,
+ unsigned long data)
+ {
+@@ -123,7 +165,7 @@
+ break;
+
+ if (index < PT_FPR0) {
+- tmp = get_reg(child, index);
++ tmp = ptrace_get_reg(child, index);
+ } else {
+ flush_fp_to_thread(child);
+ /*
+@@ -162,7 +204,9 @@
+ else
+ part = 0; /* want the 1st half of the register (left-most). */
+
+- /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */
++ /* Validate the input - check to see if address is on the wrong boundary
++ * or beyond the end of the user area
++ */
+ if ((addr & 3) || numReg > PT_FPSCR)
+ break;
+
+@@ -170,7 +214,7 @@
+ flush_fp_to_thread(child);
+ tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0];
+ } else { /* register within PT_REGS struct */
+- tmp = get_reg(child, numReg);
++ tmp = ptrace_get_reg(child, numReg);
+ }
+ reg32bits = ((u32*)&tmp)[part];
+ ret = put_user(reg32bits, (u32 __user *)data);
+@@ -226,10 +270,8 @@
+ if ((addr & 3) || (index > PT_FPSCR32))
+ break;
+
+- if (index == PT_ORIG_R3)
+- break;
+ if (index < PT_FPR0) {
+- ret = put_reg(child, index, data);
++ ret = ptrace_put_reg(child, index, data);
+ } else {
+ flush_fp_to_thread(child);
+ /*
+@@ -258,70 +300,25 @@
+ /* Determine which register the user wants */
+ index = (u64)addr >> 2;
+ numReg = index / 2;
++
+ /*
+ * Validate the input - check to see if address is on the
+ * wrong boundary or beyond the end of the user area
+ */
+ if ((addr & 3) || (numReg > PT_FPSCR))
+ break;
+- /* Insure it is a register we let them change */
+- if ((numReg == PT_ORIG_R3)
+- || ((numReg > PT_CCR) && (numReg < PT_FPR0)))
+- break;
+- if (numReg >= PT_FPR0) {
+- flush_fp_to_thread(child);
+- }
+- if (numReg == PT_MSR)
+- data = (data & MSR_DEBUGCHANGE)
+- | (child->thread.regs->msr & ~MSR_DEBUGCHANGE);
+- ((u32*)child->thread.regs)[index] = data;
+- ret = 0;
+- break;
+- }
+-
+- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+- case PTRACE_CONT: { /* restart after signal. */
+- ret = -EIO;
+- if (!valid_signal(data))
+- break;
+- if (request == PTRACE_SYSCALL)
+- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
++ if (numReg < PT_FPR0) {
++ unsigned long freg = ptrace_get_reg(child, numReg);
++ if (index % 2)
++ freg = (freg & ~0xfffffffful) | (data & 0xfffffffful);
+ else
+- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+- child->exit_code = data;
+- /* make sure the single step bit is not set. */
+- clear_single_step(child);
+- wake_up_process(child);
+- ret = 0;
+- break;
+- }
+-
+- /*
+- * make the child exit. Best I can do is send it a sigkill.
+- * perhaps it should be put in the status that it wants to
+- * exit.
+- */
+- case PTRACE_KILL: {
++ freg = (freg & 0xfffffffful) | (data << 32);
++ ret = ptrace_put_reg(child, numReg, freg);
++ } else {
++ flush_fp_to_thread(child);
++ ((unsigned int *)child->thread.regs)[index] = data;
+ ret = 0;
+- if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+- break;
+- child->exit_code = SIGKILL;
+- /* make sure the single step bit is not set. */
+- clear_single_step(child);
+- wake_up_process(child);
+- break;
+ }
+-
+- case PTRACE_SINGLESTEP: { /* set the trap flag. */
+- ret = -EIO;
+- if (!valid_signal(data))
+- break;
+- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+- set_single_step(child);
+- child->exit_code = data;
+- /* give it a chance to run. */
+- wake_up_process(child);
+- ret = 0;
+ break;
+ }
+
+@@ -334,95 +331,67 @@
+ break;
+ }
+
+- case PTRACE_SET_DEBUGREG:
+- ret = ptrace_set_debugreg(child, addr, data);
+- break;
+-
+- case PTRACE_DETACH:
+- ret = ptrace_detach(child, data);
++ case PTRACE_GETEVENTMSG:
++ ret = put_user(child->ptrace_message, (unsigned int __user *) data);
+ break;
+
+- case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+- unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+- for (i = 0; i < 32; i++) {
+- ret = put_user(*reg, tmp);
+- if (ret)
+- break;
+- reg++;
+- tmp++;
+- }
++ case PTRACE_GETREGS: { /* Get all pt_regs from the child. */
++ int ui;
++ if (!access_ok(VERIFY_WRITE, (void __user *)data,
++ PT_REGS_COUNT * sizeof(int))) {
++ ret = -EIO;
+ break;
+ }
+-
+- case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+- unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+- for (i = 0; i < 32; i++) {
+- ret = get_user(*reg, tmp);
+- if (ret)
+- break;
+- reg++;
+- tmp++;
++ ret = 0;
++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++ ret |= __put_user(ptrace_get_reg(child, ui),
++ (unsigned int __user *) data);
++ data += sizeof(int);
+ }
+ break;
+ }
+
+- case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+- unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+- flush_fp_to_thread(child);
+-
+- for (i = 0; i < 32; i++) {
+- ret = put_user(*reg, tmp);
+- if (ret)
+- break;
+- reg++;
+- tmp++;
+- }
++ case PTRACE_SETREGS: { /* Set all gp regs in the child. */
++ unsigned long tmp;
++ int ui;
++ if (!access_ok(VERIFY_READ, (void __user *)data,
++ PT_REGS_COUNT * sizeof(int))) {
++ ret = -EIO;
+ break;
+ }
+-
+- case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
+- int i;
+- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+- unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+- flush_fp_to_thread(child);
+-
+- for (i = 0; i < 32; i++) {
+- ret = get_user(*reg, tmp);
++ ret = 0;
++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++ ret = __get_user(tmp, (unsigned int __user *) data);
+ if (ret)
+ break;
+- reg++;
+- tmp++;
++ ptrace_put_reg(child, ui, tmp);
++ data += sizeof(int);
+ }
+ break;
+ }
+
+- case PTRACE_GETEVENTMSG:
+- ret = put_user(child->ptrace_message, (unsigned int __user *) data);
+- break;
+-
+-#ifdef CONFIG_ALTIVEC
++ case PTRACE_GETFPREGS:
++ case PTRACE_SETFPREGS:
+ case PTRACE_GETVRREGS:
+- /* Get the child altivec register state. */
+- flush_altivec_to_thread(child);
+- ret = get_vrregs((unsigned long __user *)data, child);
++ case PTRACE_SETVRREGS:
++ case PTRACE_GETREGS64:
++ case PTRACE_SETREGS64:
++ case PPC_PTRACE_GETFPREGS:
++ case PPC_PTRACE_SETFPREGS:
++ case PTRACE_KILL:
++ case PTRACE_SINGLESTEP:
++ case PTRACE_DETACH:
++ case PTRACE_SET_DEBUGREG:
++ case PTRACE_SYSCALL:
++ case PTRACE_CONT:
++ ret = arch_ptrace(child, request, addr, data);
+ break;
+
+- case PTRACE_SETVRREGS:
+- /* Set the child altivec register state. */
+- flush_altivec_to_thread(child);
+- ret = set_vrregs(child, (unsigned long __user *)data);
++ /* Old reverse args ptrace callss */
++ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
++ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
++ ret = compat_ptrace_old(child, request, addr, data);
+ break;
+-#endif
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c
+--- linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c 2007-12-21 15:36:11.000000000 -0500
+@@ -278,10 +278,8 @@
+ {
+ struct device_node *node;
+ struct pci_controller *phb;
+- unsigned int index;
+ struct device_node *root = of_find_node_by_path("/");
+
+- index = 0;
+ for (node = of_get_next_child(root, NULL);
+ node != NULL;
+ node = of_get_next_child(root, node)) {
+@@ -295,8 +293,7 @@
+ continue;
+ rtas_setup_phb(phb);
+ pci_process_bridge_OF_ranges(phb, node, 0);
+- pci_setup_phb_io(phb, index == 0);
+- index++;
++ isa_bridge_find_early(phb);
+ }
+
+ of_node_put(root);
+@@ -335,7 +332,7 @@
+ return 1;
+ }
+
+- rc = unmap_bus_range(b);
++ rc = pcibios_unmap_io_space(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __FUNCTION__, b->name);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/setup_32.c linux-2.6.22-591/arch/powerpc/kernel/setup_32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/setup_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/setup_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -45,10 +45,6 @@
+
+ #define DBG(fmt...)
+
+-#if defined CONFIG_KGDB
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void bootx_init(unsigned long r4, unsigned long phys);
+
+ struct ide_machdep_calls ppc_ide_md;
+@@ -245,30 +241,16 @@
+
+ xmon_setup();
+
+-#if defined(CONFIG_KGDB)
+- if (ppc_md.kgdb_map_scc)
+- ppc_md.kgdb_map_scc();
+- set_debug_traps();
+- if (strstr(cmd_line, "gdb")) {
+- if (ppc_md.progress)
+- ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
+- printk("kgdb breakpoint activated\n");
+- breakpoint();
+- }
+-#endif
+-
+ /*
+ * Set cache line size based on type of cpu as a default.
+ * Systems with OF can look in the properties on the cpu node(s)
+ * for a possibly more accurate value.
+ */
+- if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
+ dcache_bsize = cur_cpu_spec->dcache_bsize;
+ icache_bsize = cur_cpu_spec->icache_bsize;
+ ucache_bsize = 0;
+- } else
+- ucache_bsize = dcache_bsize = icache_bsize
+- = cur_cpu_spec->dcache_bsize;
++ if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
++ ucache_bsize = icache_bsize = dcache_bsize;
+
+ /* reboot on panic */
+ panic_timeout = 180;
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.c linux-2.6.22-591/arch/powerpc/kernel/signal.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,180 @@
++/*
++ * Common signal handling code for both 32 and 64 bits
++ *
++ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
++ * Extracted from signal_32.c and signal_64.c
++ *
++ * This file is subject to the terms and conditions of the GNU General
++ * Public License. See the file README.legal in the main directory of
++ * this archive for more details.
++ */
++
++#include <linux/ptrace.h>
++#include <linux/signal.h>
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++
++#include "signal.h"
++
++/*
++ * Allocate space for the signal frame
++ */
++void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
++ size_t frame_size)
++{
++ unsigned long oldsp, newsp;
++
++ /* Default to using normal stack */
++ oldsp = regs->gpr[1];
++
++ /* Check for alt stack */
++ if ((ka->sa.sa_flags & SA_ONSTACK) &&
++ current->sas_ss_size && !on_sig_stack(oldsp))
++ oldsp = (current->sas_ss_sp + current->sas_ss_size);
++
++ /* Get aligned frame */
++ newsp = (oldsp - frame_size) & ~0xFUL;
++
++ /* Check access */
++ if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp))
++ return NULL;
++
++ return (void __user *)newsp;
++}
++
++
++/*
++ * Restore the user process's signal mask
++ */
++void restore_sigmask(sigset_t *set)
++{
++ sigdelsetmask(set, ~_BLOCKABLE);
++ spin_lock_irq(¤t->sighand->siglock);
++ current->blocked = *set;
++ recalc_sigpending();
++ spin_unlock_irq(¤t->sighand->siglock);
++}
++
++static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
++ int has_handler)
++{
++ unsigned long ret = regs->gpr[3];
++ int restart = 1;
++
++ /* syscall ? */
++ if (TRAP(regs) != 0x0C00)
++ return;
++
++ /* error signalled ? */
++ if (!(regs->ccr & 0x10000000))
++ return;
++
++ switch (ret) {
++ case ERESTART_RESTARTBLOCK:
++ case ERESTARTNOHAND:
++ /* ERESTARTNOHAND means that the syscall should only be
++ * restarted if there was no handler for the signal, and since
++ * we only get here if there is a handler, we dont restart.
++ */
++ restart = !has_handler;
++ break;
++ case ERESTARTSYS:
++ /* ERESTARTSYS means to restart the syscall if there is no
++ * handler or the handler was registered with SA_RESTART
++ */
++ restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0;
++ break;
++ case ERESTARTNOINTR:
++ /* ERESTARTNOINTR means that the syscall should be
++ * called again after the signal handler returns.
++ */
++ break;
++ default:
++ return;
++ }
++ if (restart) {
++ if (ret == ERESTART_RESTARTBLOCK)
++ regs->gpr[0] = __NR_restart_syscall;
++ else
++ regs->gpr[3] = regs->orig_gpr3;
++ regs->nip -= 4;
++ regs->result = 0;
++ } else {
++ regs->result = -EINTR;
++ regs->gpr[3] = EINTR;
++ regs->ccr |= 0x10000000;
++ }
++}
++
++int do_signal(sigset_t *oldset, struct pt_regs *regs)
++{
++ siginfo_t info;
++ int signr;
++ struct k_sigaction ka;
++ int ret;
++ int is32 = is_32bit_task();
++
++ if (test_thread_flag(TIF_RESTORE_SIGMASK))
++ oldset = ¤t->saved_sigmask;
++ else if (!oldset)
++ oldset = ¤t->blocked;
++
++ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
++
++ /* Is there any syscall restart business here ? */
++ check_syscall_restart(regs, &ka, signr > 0);
++
++ if (signr <= 0) {
++ /* No signal to deliver -- put the saved sigmask back */
++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
++ clear_thread_flag(TIF_RESTORE_SIGMASK);
++ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
++ }
++ return 0; /* no signals delivered */
++ }
++
++ /*
++ * Reenable the DABR before delivering the signal to
++ * user space. The DABR will have been cleared if it
++ * triggered inside the kernel.
++ */
++ if (current->thread.dabr)
++ set_dabr(current->thread.dabr);
++
++ if (is32) {
++ if (ka.sa.sa_flags & SA_SIGINFO)
++ ret = handle_rt_signal32(signr, &ka, &info, oldset,
++ regs);
++ else
++ ret = handle_signal32(signr, &ka, &info, oldset,
++ regs);
++ } else {
++ ret = handle_rt_signal64(signr, &ka, &info, oldset, regs);
++ }
++
++ if (ret) {
++ spin_lock_irq(¤t->sighand->siglock);
++ sigorsets(¤t->blocked, ¤t->blocked,
++ &ka.sa.sa_mask);
++ if (!(ka.sa.sa_flags & SA_NODEFER))
++ sigaddset(¤t->blocked, signr);
++ recalc_sigpending();
++ spin_unlock_irq(¤t->sighand->siglock);
++
++ /*
++ * A signal was successfully delivered; the saved sigmask is in
++ * its frame, and we can clear the TIF_RESTORE_SIGMASK flag.
++ */
++ if (test_thread_flag(TIF_RESTORE_SIGMASK))
++ clear_thread_flag(TIF_RESTORE_SIGMASK);
++ }
++
++ return ret;
++}
++
++long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
++ unsigned long r5, unsigned long r6, unsigned long r7,
++ unsigned long r8, struct pt_regs *regs)
++{
++ return do_sigaltstack(uss, uoss, regs->gpr[1]);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.h linux-2.6.22-591/arch/powerpc/kernel/signal.h
+--- linux-2.6.22-570/arch/powerpc/kernel/signal.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/signal.h 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
++ * Extracted from signal_32.c and signal_64.c
++ *
++ * This file is subject to the terms and conditions of the GNU General
++ * Public License. See the file README.legal in the main directory of
++ * this archive for more details.
++ */
++
++#ifndef _POWERPC_ARCH_SIGNAL_H
++#define _POWERPC_ARCH_SIGNAL_H
++
++#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++
++extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
++ size_t frame_size);
++extern void restore_sigmask(sigset_t *set);
++
++extern int handle_signal32(unsigned long sig, struct k_sigaction *ka,
++ siginfo_t *info, sigset_t *oldset,
++ struct pt_regs *regs);
++
++extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
++ siginfo_t *info, sigset_t *oldset,
++ struct pt_regs *regs);
++
++
++#ifdef CONFIG_PPC64
++
++static inline int is_32bit_task(void)
++{
++ return test_thread_flag(TIF_32BIT);
++}
++
++extern int handle_rt_signal64(int signr, struct k_sigaction *ka,
++ siginfo_t *info, sigset_t *set,
++ struct pt_regs *regs);
++
++#else /* CONFIG_PPC64 */
++
++static inline int is_32bit_task(void)
++{
++ return 1;
++}
++
++static inline int handle_rt_signal64(int signr, struct k_sigaction *ka,
++ siginfo_t *info, sigset_t *set,
++ struct pt_regs *regs)
++{
++ return -EFAULT;
++}
++
++#endif /* !defined(CONFIG_PPC64) */
++
++#endif /* _POWERPC_ARCH_SIGNAL_H */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_32.c linux-2.6.22-591/arch/powerpc/kernel/signal_32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/signal_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -51,12 +51,11 @@
+ #include <asm/pgtable.h>
+ #endif
+
+-#undef DEBUG_SIG
++#include "signal.h"
+
+-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++#undef DEBUG_SIG
+
+ #ifdef CONFIG_PPC64
+-#define do_signal do_signal32
+ #define sys_sigsuspend compat_sys_sigsuspend
+ #define sys_rt_sigsuspend compat_sys_rt_sigsuspend
+ #define sys_rt_sigreturn compat_sys_rt_sigreturn
+@@ -231,8 +230,6 @@
+
+ #endif /* CONFIG_PPC64 */
+
+-int do_signal(sigset_t *oldset, struct pt_regs *regs);
+-
+ /*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+@@ -251,14 +248,6 @@
+ return -ERESTARTNOHAND;
+ }
+
+-#ifdef CONFIG_PPC32
+-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, int r5,
+- int r6, int r7, int r8, struct pt_regs *regs)
+-{
+- return do_sigaltstack(uss, uoss, regs->gpr[1]);
+-}
+-#endif
+-
+ long sys_sigaction(int sig, struct old_sigaction __user *act,
+ struct old_sigaction __user *oact)
+ {
+@@ -293,14 +282,17 @@
+ /*
+ * When we have signals to deliver, we set up on the
+ * user stack, going down from the original stack pointer:
+- * a sigregs struct
++ * an ABI gap of 56 words
++ * an mcontext struct
+ * a sigcontext struct
+ * a gap of __SIGNAL_FRAMESIZE bytes
+ *
+- * Each of these things must be a multiple of 16 bytes in size.
++ * Each of these things must be a multiple of 16 bytes in size. The following
++ * structure represent all of this except the __SIGNAL_FRAMESIZE gap
+ *
+ */
+-struct sigregs {
++struct sigframe {
++ struct sigcontext sctx; /* the sigcontext */
+ struct mcontext mctx; /* all the register values */
+ /*
+ * Programs using the rs6000/xcoff abi can save up to 19 gp
+@@ -703,44 +695,22 @@
+ }
+ #endif /* CONFIG_PPC64 */
+
+-
+-/*
+- * Restore the user process's signal mask
+- */
+-#ifdef CONFIG_PPC64
+-extern void restore_sigmask(sigset_t *set);
+-#else /* CONFIG_PPC64 */
+-static void restore_sigmask(sigset_t *set)
+-{
+- sigdelsetmask(set, ~_BLOCKABLE);
+- spin_lock_irq(¤t->sighand->siglock);
+- current->blocked = *set;
+- recalc_sigpending();
+- spin_unlock_irq(¤t->sighand->siglock);
+-}
+-#endif
+-
+ /*
+ * Set up a signal frame for a "real-time" signal handler
+ * (one which gets siginfo).
+ */
+-static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
++int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset,
+- struct pt_regs *regs, unsigned long newsp)
++ struct pt_regs *regs)
+ {
+ struct rt_sigframe __user *rt_sf;
+ struct mcontext __user *frame;
+- unsigned long origsp = newsp;
++ unsigned long newsp = 0;
+
+ /* Set up Signal Frame */
+ /* Put a Real Time Context onto stack */
+- newsp -= sizeof(*rt_sf);
+- rt_sf = (struct rt_sigframe __user *)newsp;
+-
+- /* create a stack frame for the caller of the handler */
+- newsp -= __SIGNAL_FRAMESIZE + 16;
+-
+- if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp))
++ rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf));
++ if (unlikely(rt_sf == NULL))
+ goto badframe;
+
+ /* Put the siginfo & fill in most of the ucontext */
+@@ -770,8 +740,12 @@
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
++ /* create a stack frame for the caller of the handler */
++ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
++
++ /* Fill registers for signal handler */
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = sig;
+ regs->gpr[4] = (unsigned long) &rt_sf->info;
+@@ -1015,27 +989,18 @@
+ /*
+ * OK, we're invoking a handler
+ */
+-static int handle_signal(unsigned long sig, struct k_sigaction *ka,
+- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs,
+- unsigned long newsp)
++int handle_signal32(unsigned long sig, struct k_sigaction *ka,
++ siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+ {
+ struct sigcontext __user *sc;
+- struct sigregs __user *frame;
+- unsigned long origsp = newsp;
++ struct sigframe __user *frame;
++ unsigned long newsp = 0;
+
+ /* Set up Signal Frame */
+- newsp -= sizeof(struct sigregs);
+- frame = (struct sigregs __user *) newsp;
+-
+- /* Put a sigcontext on the stack */
+- newsp -= sizeof(*sc);
+- sc = (struct sigcontext __user *) newsp;
+-
+- /* create a stack frame for the caller of the handler */
+- newsp -= __SIGNAL_FRAMESIZE;
+-
+- if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp))
++ frame = get_sigframe(ka, regs, sizeof(*frame));
++ if (unlikely(frame == NULL))
+ goto badframe;
++ sc = (struct sigcontext __user *) &frame->sctx;
+
+ #if _NSIG != 64
+ #error "Please adjust handle_signal()"
+@@ -1047,7 +1012,7 @@
+ #else
+ || __put_user(oldset->sig[1], &sc->_unused[3])
+ #endif
+- || __put_user(to_user_ptr(frame), &sc->regs)
++ || __put_user(to_user_ptr(&frame->mctx), &sc->regs)
+ || __put_user(sig, &sc->signal))
+ goto badframe;
+
+@@ -1063,8 +1028,11 @@
+
+ current->thread.fpscr.val = 0; /* turn off all fp exceptions */
+
++ /* create a stack frame for the caller of the handler */
++ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
++
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = sig;
+ regs->gpr[4] = (unsigned long) sc;
+@@ -1126,106 +1094,3 @@
+ force_sig(SIGSEGV, current);
+ return 0;
+ }
+-
+-/*
+- * Note that 'init' is a special process: it doesn't get signals it doesn't
+- * want to handle. Thus you cannot kill init even with a SIGKILL even by
+- * mistake.
+- */
+-int do_signal(sigset_t *oldset, struct pt_regs *regs)
+-{
+- siginfo_t info;
+- struct k_sigaction ka;
+- unsigned int newsp;
+- int signr, ret;
+-
+-#ifdef CONFIG_PPC32
+- if (try_to_freeze()) {
+- signr = 0;
+- if (!signal_pending(current))
+- goto no_signal;
+- }
+-#endif
+-
+- if (test_thread_flag(TIF_RESTORE_SIGMASK))
+- oldset = ¤t->saved_sigmask;
+- else if (!oldset)
+- oldset = ¤t->blocked;
+-
+- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+-#ifdef CONFIG_PPC32
+-no_signal:
+-#endif
+- if (TRAP(regs) == 0x0C00 /* System Call! */
+- && regs->ccr & 0x10000000 /* error signalled */
+- && ((ret = regs->gpr[3]) == ERESTARTSYS
+- || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR
+- || ret == ERESTART_RESTARTBLOCK)) {
+-
+- if (signr > 0
+- && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK
+- || (ret == ERESTARTSYS
+- && !(ka.sa.sa_flags & SA_RESTART)))) {
+- /* make the system call return an EINTR error */
+- regs->result = -EINTR;
+- regs->gpr[3] = EINTR;
+- /* note that the cr0.SO bit is already set */
+- } else {
+- regs->nip -= 4; /* Back up & retry system call */
+- regs->result = 0;
+- regs->trap = 0;
+- if (ret == ERESTART_RESTARTBLOCK)
+- regs->gpr[0] = __NR_restart_syscall;
+- else
+- regs->gpr[3] = regs->orig_gpr3;
+- }
+- }
+-
+- if (signr == 0) {
+- /* No signal to deliver -- put the saved sigmask back */
+- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+- clear_thread_flag(TIF_RESTORE_SIGMASK);
+- sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
+- }
+- return 0; /* no signals delivered */
+- }
+-
+- if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size
+- && !on_sig_stack(regs->gpr[1]))
+- newsp = current->sas_ss_sp + current->sas_ss_size;
+- else
+- newsp = regs->gpr[1];
+- newsp &= ~0xfUL;
+-
+-#ifdef CONFIG_PPC64
+- /*
+- * Reenable the DABR before delivering the signal to
+- * user space. The DABR will have been cleared if it
+- * triggered inside the kernel.
+- */
+- if (current->thread.dabr)
+- set_dabr(current->thread.dabr);
+-#endif
+-
+- /* Whee! Actually deliver the signal. */
+- if (ka.sa.sa_flags & SA_SIGINFO)
+- ret = handle_rt_signal(signr, &ka, &info, oldset, regs, newsp);
+- else
+- ret = handle_signal(signr, &ka, &info, oldset, regs, newsp);
+-
+- if (ret) {
+- spin_lock_irq(¤t->sighand->siglock);
+- sigorsets(¤t->blocked, ¤t->blocked,
+- &ka.sa.sa_mask);
+- if (!(ka.sa.sa_flags & SA_NODEFER))
+- sigaddset(¤t->blocked, signr);
+- recalc_sigpending();
+- spin_unlock_irq(¤t->sighand->siglock);
+- /* A signal was successfully delivered; the saved sigmask is in
+- its frame, and we can clear the TIF_RESTORE_SIGMASK flag */
+- if (test_thread_flag(TIF_RESTORE_SIGMASK))
+- clear_thread_flag(TIF_RESTORE_SIGMASK);
+- }
+-
+- return ret;
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_64.c linux-2.6.22-591/arch/powerpc/kernel/signal_64.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/kernel/signal_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,9 +34,9 @@
+ #include <asm/syscalls.h>
+ #include <asm/vdso.h>
+
+-#define DEBUG_SIG 0
++#include "signal.h"
+
+-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++#define DEBUG_SIG 0
+
+ #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+ #define FP_REGS_SIZE sizeof(elf_fpregset_t)
+@@ -64,14 +64,6 @@
+ char abigap[288];
+ } __attribute__ ((aligned (16)));
+
+-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5,
+- unsigned long r6, unsigned long r7, unsigned long r8,
+- struct pt_regs *regs)
+-{
+- return do_sigaltstack(uss, uoss, regs->gpr[1]);
+-}
+-
+-
+ /*
+ * Set up the sigcontext for the signal frame.
+ */
+@@ -208,25 +200,6 @@
+ }
+
+ /*
+- * Allocate space for the signal frame
+- */
+-static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+- size_t frame_size)
+-{
+- unsigned long newsp;
+-
+- /* Default to using normal stack */
+- newsp = regs->gpr[1];
+-
+- if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
+- if (! on_sig_stack(regs->gpr[1]))
+- newsp = (current->sas_ss_sp + current->sas_ss_size);
+- }
+-
+- return (void __user *)((newsp - frame_size) & -16ul);
+-}
+-
+-/*
+ * Setup the trampoline code on the stack
+ */
+ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
+@@ -253,19 +226,6 @@
+ }
+
+ /*
+- * Restore the user process's signal mask (also used by signal32.c)
+- */
+-void restore_sigmask(sigset_t *set)
+-{
+- sigdelsetmask(set, ~_BLOCKABLE);
+- spin_lock_irq(¤t->sighand->siglock);
+- current->blocked = *set;
+- recalc_sigpending();
+- spin_unlock_irq(¤t->sighand->siglock);
+-}
+-
+-
+-/*
+ * Handle {get,set,swap}_context operations
+ */
+ int sys_swapcontext(struct ucontext __user *old_ctx,
+@@ -359,7 +319,7 @@
+ return 0;
+ }
+
+-static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
++int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+ {
+ /* Handler is *really* a pointer to the function descriptor for
+@@ -373,8 +333,7 @@
+ long err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+-
+- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
++ if (unlikely(frame == NULL))
+ goto badframe;
+
+ err |= __put_user(&frame->info, &frame->pinfo);
+@@ -411,7 +370,7 @@
+ funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
+
+ /* Allocate a dummy caller frame for the signal handler. */
+- newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE;
++ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
+
+ /* Set up "regs" so we "return" to the signal handler. */
+@@ -442,134 +401,3 @@
+ force_sigsegv(signr, current);
+ return 0;
+ }
+-
+-
+-/*
+- * OK, we're invoking a handler
+- */
+-static int handle_signal(unsigned long sig, struct k_sigaction *ka,
+- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+-{
+- int ret;
+-
+- /* Set up Signal Frame */
+- ret = setup_rt_frame(sig, ka, info, oldset, regs);
+-
+- if (ret) {
+- spin_lock_irq(¤t->sighand->siglock);
+- sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask);
+- if (!(ka->sa.sa_flags & SA_NODEFER))
+- sigaddset(¤t->blocked,sig);
+- recalc_sigpending();
+- spin_unlock_irq(¤t->sighand->siglock);
+- }
+-
+- return ret;
+-}
+-
+-static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
+-{
+- switch ((int)regs->result) {
+- case -ERESTART_RESTARTBLOCK:
+- case -ERESTARTNOHAND:
+- /* ERESTARTNOHAND means that the syscall should only be
+- * restarted if there was no handler for the signal, and since
+- * we only get here if there is a handler, we dont restart.
+- */
+- regs->result = -EINTR;
+- regs->gpr[3] = EINTR;
+- regs->ccr |= 0x10000000;
+- break;
+- case -ERESTARTSYS:
+- /* ERESTARTSYS means to restart the syscall if there is no
+- * handler or the handler was registered with SA_RESTART
+- */
+- if (!(ka->sa.sa_flags & SA_RESTART)) {
+- regs->result = -EINTR;
+- regs->gpr[3] = EINTR;
+- regs->ccr |= 0x10000000;
+- break;
+- }
+- /* fallthrough */
+- case -ERESTARTNOINTR:
+- /* ERESTARTNOINTR means that the syscall should be
+- * called again after the signal handler returns.
+- */
+- regs->gpr[3] = regs->orig_gpr3;
+- regs->nip -= 4;
+- regs->result = 0;
+- break;
+- }
+-}
+-
+-/*
+- * Note that 'init' is a special process: it doesn't get signals it doesn't
+- * want to handle. Thus you cannot kill init even with a SIGKILL even by
+- * mistake.
+- */
+-int do_signal(sigset_t *oldset, struct pt_regs *regs)
+-{
+- siginfo_t info;
+- int signr;
+- struct k_sigaction ka;
+-
+- /*
+- * If the current thread is 32 bit - invoke the
+- * 32 bit signal handling code
+- */
+- if (test_thread_flag(TIF_32BIT))
+- return do_signal32(oldset, regs);
+-
+- if (test_thread_flag(TIF_RESTORE_SIGMASK))
+- oldset = ¤t->saved_sigmask;
+- else if (!oldset)
+- oldset = ¤t->blocked;
+-
+- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+- if (signr > 0) {
+- int ret;
+-
+- /* Whee! Actually deliver the signal. */
+- if (TRAP(regs) == 0x0C00)
+- syscall_restart(regs, &ka);
+-
+- /*
+- * Reenable the DABR before delivering the signal to
+- * user space. The DABR will have been cleared if it
+- * triggered inside the kernel.
+- */
+- if (current->thread.dabr)
+- set_dabr(current->thread.dabr);
+-
+- ret = handle_signal(signr, &ka, &info, oldset, regs);
+-
+- /* If a signal was successfully delivered, the saved sigmask is in
+- its frame, and we can clear the TIF_RESTORE_SIGMASK flag */
+- if (ret && test_thread_flag(TIF_RESTORE_SIGMASK))
+- clear_thread_flag(TIF_RESTORE_SIGMASK);
+-
+- return ret;
+- }
+-
+- if (TRAP(regs) == 0x0C00) { /* System Call! */
+- if ((int)regs->result == -ERESTARTNOHAND ||
+- (int)regs->result == -ERESTARTSYS ||
+- (int)regs->result == -ERESTARTNOINTR) {
+- regs->gpr[3] = regs->orig_gpr3;
+- regs->nip -= 4; /* Back up & retry system call */
+- regs->result = 0;
+- } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) {
+- regs->gpr[0] = __NR_restart_syscall;
+- regs->nip -= 4;
+- regs->result = 0;
+- }
+- }
+- /* No signal to deliver -- put the saved sigmask back */
+- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+- clear_thread_flag(TIF_RESTORE_SIGMASK);
+- sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL);
+- }
+-
+- return 0;
+-}
+-EXPORT_SYMBOL(do_signal);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -773,6 +773,13 @@
+ return sys_truncate(path, (high << 32) | low);
+ }
+
++asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
++ u32 lenhi, u32 lenlo)
++{
++ return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
++ ((loff_t)lenhi << 32) | lenlo);
++}
++
+ asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
+ unsigned long low)
+ {
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/vdso.c linux-2.6.22-591/arch/powerpc/kernel/vdso.c
+--- linux-2.6.22-570/arch/powerpc/kernel/vdso.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/kernel/vdso.c 2007-12-21 15:36:11.000000000 -0500
+@@ -671,7 +671,7 @@
+ /*
+ * Fill up the "systemcfg" stuff for backward compatiblity
+ */
+- strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
++ strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ vdso_data->version.major = SYSTEMCFG_MAJOR;
+ vdso_data->version.minor = SYSTEMCFG_MINOR;
+ vdso_data->processor = mfspr(SPRN_PVR);
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -12,7 +12,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -9,7 +9,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/Makefile linux-2.6.22-591/arch/powerpc/mm/Makefile
+--- linux-2.6.22-570/arch/powerpc/mm/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -11,8 +11,7 @@
+ hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
+ obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o \
+ hash_utils_64.o hash_low_64.o tlb_64.o \
+- slb_low.o slb.o stab.o mmap.o imalloc.o \
+- $(hash-y)
++ slb_low.o slb.o stab.o mmap.o $(hash-y)
+ obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o tlb_32.o
+ obj-$(CONFIG_40x) += 4xx_mmu.o
+ obj-$(CONFIG_44x) += 44x_mmu.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fault.c linux-2.6.22-591/arch/powerpc/mm/fault.c
+--- linux-2.6.22-570/arch/powerpc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/kdebug.h>
++#include <linux/kgdb.h>
+
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -381,7 +382,7 @@
+ printk("VM: killing process %s(%d:#%u)\n",
+ current->comm, current->pid, current->xid);
+ if (user_mode(regs))
+- do_exit(SIGKILL);
++ do_group_exit(SIGKILL);
+ return SIGKILL;
+
+ do_sigbus:
+@@ -412,6 +413,13 @@
+ return;
+ }
+
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault)
++ /* Restore our previous state. */
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Not reached. */
++#endif
++
+ /* kernel has accessed a bad area */
+
+ switch (regs->trap) {
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -14,7 +14,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -104,7 +104,7 @@
+ spin_unlock(&native_tlbie_lock);
+ }
+
+-static inline void native_lock_hpte(hpte_t *hptep)
++static inline void native_lock_hpte(struct hash_pte *hptep)
+ {
+ unsigned long *word = &hptep->v;
+
+@@ -116,7 +116,7 @@
+ }
+ }
+
+-static inline void native_unlock_hpte(hpte_t *hptep)
++static inline void native_unlock_hpte(struct hash_pte *hptep)
+ {
+ unsigned long *word = &hptep->v;
+
+@@ -128,7 +128,7 @@
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
+ {
+- hpte_t *hptep = htab_address + hpte_group;
++ struct hash_pte *hptep = htab_address + hpte_group;
+ unsigned long hpte_v, hpte_r;
+ int i;
+
+@@ -177,7 +177,7 @@
+
+ static long native_hpte_remove(unsigned long hpte_group)
+ {
+- hpte_t *hptep;
++ struct hash_pte *hptep;
+ int i;
+ int slot_offset;
+ unsigned long hpte_v;
+@@ -217,7 +217,7 @@
+ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
+ {
+- hpte_t *hptep = htab_address + slot;
++ struct hash_pte *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
+
+@@ -233,15 +233,14 @@
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+- native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
+- native_unlock_hpte(hptep);
+ }
++ native_unlock_hpte(hptep);
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+@@ -251,7 +250,7 @@
+
+ static long native_hpte_find(unsigned long va, int psize)
+ {
+- hpte_t *hptep;
++ struct hash_pte *hptep;
+ unsigned long hash;
+ unsigned long i, j;
+ long slot;
+@@ -294,7 +293,7 @@
+ {
+ unsigned long vsid, va;
+ long slot;
+- hpte_t *hptep;
++ struct hash_pte *hptep;
+
+ vsid = get_kernel_vsid(ea);
+ va = (vsid << 28) | (ea & 0x0fffffff);
+@@ -315,7 +314,7 @@
+ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+ int psize, int local)
+ {
+- hpte_t *hptep = htab_address + slot;
++ struct hash_pte *hptep = htab_address + slot;
+ unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+@@ -345,7 +344,7 @@
+ #define LP_BITS 8
+ #define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
+
+-static void hpte_decode(hpte_t *hpte, unsigned long slot,
++static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
+ int *psize, unsigned long *va)
+ {
+ unsigned long hpte_r = hpte->r;
+@@ -415,7 +414,7 @@
+ static void native_hpte_clear(void)
+ {
+ unsigned long slot, slots, flags;
+- hpte_t *hptep = htab_address;
++ struct hash_pte *hptep = htab_address;
+ unsigned long hpte_v, va;
+ unsigned long pteg_count;
+ int psize;
+@@ -462,7 +461,7 @@
+ static void native_flush_hash_range(unsigned long number, int local)
+ {
+ unsigned long va, hash, index, hidx, shift, slot;
+- hpte_t *hptep;
++ struct hash_pte *hptep;
+ unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -87,7 +87,7 @@
+ static unsigned long _SDR1;
+ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+-hpte_t *htab_address;
++struct hash_pte *htab_address;
+ unsigned long htab_size_bytes;
+ unsigned long htab_hash_mask;
+ int mmu_linear_psize = MMU_PAGE_4K;
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/imalloc.c linux-2.6.22-591/arch/powerpc/mm/imalloc.c
+--- linux-2.6.22-570/arch/powerpc/mm/imalloc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/imalloc.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,313 +0,0 @@
+-/*
+- * c 2001 PPC 64 Team, IBM Corp
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+-
+-#include <asm/uaccess.h>
+-#include <asm/pgalloc.h>
+-#include <asm/pgtable.h>
+-#include <linux/mutex.h>
+-#include <asm/cacheflush.h>
+-
+-#include "mmu_decl.h"
+-
+-static DEFINE_MUTEX(imlist_mutex);
+-struct vm_struct * imlist = NULL;
+-
+-static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
+-{
+- unsigned long addr;
+- struct vm_struct **p, *tmp;
+-
+- addr = ioremap_bot;
+- for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+- if (size + addr < (unsigned long) tmp->addr)
+- break;
+- if ((unsigned long)tmp->addr >= ioremap_bot)
+- addr = tmp->size + (unsigned long) tmp->addr;
+- if (addr >= IMALLOC_END-size)
+- return 1;
+- }
+- *im_addr = addr;
+-
+- return 0;
+-}
+-
+-/* Return whether the region described by v_addr and size is a subset
+- * of the region described by parent
+- */
+-static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
+- struct vm_struct *parent)
+-{
+- return (int) (v_addr >= (unsigned long) parent->addr &&
+- v_addr < (unsigned long) parent->addr + parent->size &&
+- size < parent->size);
+-}
+-
+-/* Return whether the region described by v_addr and size is a superset
+- * of the region described by child
+- */
+-static int im_region_is_superset(unsigned long v_addr, unsigned long size,
+- struct vm_struct *child)
+-{
+- struct vm_struct parent;
+-
+- parent.addr = (void *) v_addr;
+- parent.size = size;
+-
+- return im_region_is_subset((unsigned long) child->addr, child->size,
+- &parent);
+-}
+-
+-/* Return whether the region described by v_addr and size overlaps
+- * the region described by vm. Overlapping regions meet the
+- * following conditions:
+- * 1) The regions share some part of the address space
+- * 2) The regions aren't identical
+- * 3) Neither region is a subset of the other
+- */
+-static int im_region_overlaps(unsigned long v_addr, unsigned long size,
+- struct vm_struct *vm)
+-{
+- if (im_region_is_superset(v_addr, size, vm))
+- return 0;
+-
+- return (v_addr + size > (unsigned long) vm->addr + vm->size &&
+- v_addr < (unsigned long) vm->addr + vm->size) ||
+- (v_addr < (unsigned long) vm->addr &&
+- v_addr + size > (unsigned long) vm->addr);
+-}
+-
+-/* Determine imalloc status of region described by v_addr and size.
+- * Can return one of the following:
+- * IM_REGION_UNUSED - Entire region is unallocated in imalloc space.
+- * IM_REGION_SUBSET - Region is a subset of a region that is already
+- * allocated in imalloc space.
+- * vm will be assigned to a ptr to the parent region.
+- * IM_REGION_EXISTS - Exact region already allocated in imalloc space.
+- * vm will be assigned to a ptr to the existing imlist
+- * member.
+- * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space.
+- * IM_REGION_SUPERSET - Region is a superset of a region that is already
+- * allocated in imalloc space.
+- */
+-static int im_region_status(unsigned long v_addr, unsigned long size,
+- struct vm_struct **vm)
+-{
+- struct vm_struct *tmp;
+-
+- for (tmp = imlist; tmp; tmp = tmp->next)
+- if (v_addr < (unsigned long) tmp->addr + tmp->size)
+- break;
+-
+- *vm = NULL;
+- if (tmp) {
+- if (im_region_overlaps(v_addr, size, tmp))
+- return IM_REGION_OVERLAP;
+-
+- *vm = tmp;
+- if (im_region_is_subset(v_addr, size, tmp)) {
+- /* Return with tmp pointing to superset */
+- return IM_REGION_SUBSET;
+- }
+- if (im_region_is_superset(v_addr, size, tmp)) {
+- /* Return with tmp pointing to first subset */
+- return IM_REGION_SUPERSET;
+- }
+- else if (v_addr == (unsigned long) tmp->addr &&
+- size == tmp->size) {
+- /* Return with tmp pointing to exact region */
+- return IM_REGION_EXISTS;
+- }
+- }
+-
+- return IM_REGION_UNUSED;
+-}
+-
+-static struct vm_struct * split_im_region(unsigned long v_addr,
+- unsigned long size, struct vm_struct *parent)
+-{
+- struct vm_struct *vm1 = NULL;
+- struct vm_struct *vm2 = NULL;
+- struct vm_struct *new_vm = NULL;
+-
+- vm1 = kmalloc(sizeof(*vm1), GFP_KERNEL);
+- if (vm1 == NULL) {
+- printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+- return NULL;
+- }
+-
+- if (v_addr == (unsigned long) parent->addr) {
+- /* Use existing parent vm_struct to represent child, allocate
+- * new one for the remainder of parent range
+- */
+- vm1->size = parent->size - size;
+- vm1->addr = (void *) (v_addr + size);
+- vm1->next = parent->next;
+-
+- parent->size = size;
+- parent->next = vm1;
+- new_vm = parent;
+- } else if (v_addr + size == (unsigned long) parent->addr +
+- parent->size) {
+- /* Allocate new vm_struct to represent child, use existing
+- * parent one for remainder of parent range
+- */
+- vm1->size = size;
+- vm1->addr = (void *) v_addr;
+- vm1->next = parent->next;
+- new_vm = vm1;
+-
+- parent->size -= size;
+- parent->next = vm1;
+- } else {
+- /* Allocate two new vm_structs for the new child and
+- * uppermost remainder, and use existing parent one for the
+- * lower remainder of parent range
+- */
+- vm2 = kmalloc(sizeof(*vm2), GFP_KERNEL);
+- if (vm2 == NULL) {
+- printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+- kfree(vm1);
+- return NULL;
+- }
+-
+- vm1->size = size;
+- vm1->addr = (void *) v_addr;
+- vm1->next = vm2;
+- new_vm = vm1;
+-
+- vm2->size = ((unsigned long) parent->addr + parent->size) -
+- (v_addr + size);
+- vm2->addr = (void *) v_addr + size;
+- vm2->next = parent->next;
+-
+- parent->size = v_addr - (unsigned long) parent->addr;
+- parent->next = vm1;
+- }
+-
+- return new_vm;
+-}
+-
+-static struct vm_struct * __add_new_im_area(unsigned long req_addr,
+- unsigned long size)
+-{
+- struct vm_struct **p, *tmp, *area;
+-
+- for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+- if (req_addr + size <= (unsigned long)tmp->addr)
+- break;
+- }
+-
+- area = kmalloc(sizeof(*area), GFP_KERNEL);
+- if (!area)
+- return NULL;
+- area->flags = 0;
+- area->addr = (void *)req_addr;
+- area->size = size;
+- area->next = *p;
+- *p = area;
+-
+- return area;
+-}
+-
+-static struct vm_struct * __im_get_area(unsigned long req_addr,
+- unsigned long size,
+- int criteria)
+-{
+- struct vm_struct *tmp;
+- int status;
+-
+- status = im_region_status(req_addr, size, &tmp);
+- if ((criteria & status) == 0) {
+- return NULL;
+- }
+-
+- switch (status) {
+- case IM_REGION_UNUSED:
+- tmp = __add_new_im_area(req_addr, size);
+- break;
+- case IM_REGION_SUBSET:
+- tmp = split_im_region(req_addr, size, tmp);
+- break;
+- case IM_REGION_EXISTS:
+- /* Return requested region */
+- break;
+- case IM_REGION_SUPERSET:
+- /* Return first existing subset of requested region */
+- break;
+- default:
+- printk(KERN_ERR "%s() unexpected imalloc region status\n",
+- __FUNCTION__);
+- tmp = NULL;
+- }
+-
+- return tmp;
+-}
+-
+-struct vm_struct * im_get_free_area(unsigned long size)
+-{
+- struct vm_struct *area;
+- unsigned long addr;
+-
+- mutex_lock(&imlist_mutex);
+- if (get_free_im_addr(size, &addr)) {
+- printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
+- __FUNCTION__, size);
+- area = NULL;
+- goto next_im_done;
+- }
+-
+- area = __im_get_area(addr, size, IM_REGION_UNUSED);
+- if (area == NULL) {
+- printk(KERN_ERR
+- "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
+- __FUNCTION__, addr, size);
+- }
+-next_im_done:
+- mutex_unlock(&imlist_mutex);
+- return area;
+-}
+-
+-struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+- int criteria)
+-{
+- struct vm_struct *area;
+-
+- mutex_lock(&imlist_mutex);
+- area = __im_get_area(v_addr, size, criteria);
+- mutex_unlock(&imlist_mutex);
+- return area;
+-}
+-
+-void im_free(void * addr)
+-{
+- struct vm_struct **p, *tmp;
+-
+- if (!addr)
+- return;
+- if ((unsigned long) addr & ~PAGE_MASK) {
+- printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr);
+- return;
+- }
+- mutex_lock(&imlist_mutex);
+- for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
+- if (tmp->addr == addr) {
+- *p = tmp->next;
+- unmap_vm_area(tmp);
+- kfree(tmp);
+- mutex_unlock(&imlist_mutex);
+- return;
+- }
+- }
+- mutex_unlock(&imlist_mutex);
+- printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
+- addr);
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_32.c linux-2.6.22-591/arch/powerpc/mm/init_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/init_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/init_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -5,7 +5,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ * Derived from "arch/i386/mm/init.c"
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_64.c linux-2.6.22-591/arch/powerpc/mm/init_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/init_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/init_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -5,7 +5,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mem.c linux-2.6.22-591/arch/powerpc/mm/mem.c
+--- linux-2.6.22-570/arch/powerpc/mm/mem.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/mem.c 2007-12-21 15:36:11.000000000 -0500
+@@ -5,7 +5,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ * Derived from "arch/i386/mm/init.c"
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -11,7 +11,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h
+--- linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h 2007-12-21 15:36:11.000000000 -0500
+@@ -8,7 +8,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+@@ -40,8 +39,8 @@
+ extern unsigned long ioremap_base;
+ extern unsigned int rtas_data, rtas_size;
+
+-struct _PTE;
+-extern struct _PTE *Hash, *Hash_end;
++struct hash_pte;
++extern struct hash_pte *Hash, *Hash_end;
+ extern unsigned long Hash_size, Hash_mask;
+
+ extern unsigned int num_tlbcam_entries;
+@@ -90,16 +89,4 @@
+ else
+ _tlbie(va);
+ }
+-#else /* CONFIG_PPC64 */
+-/* imalloc region types */
+-#define IM_REGION_UNUSED 0x1
+-#define IM_REGION_SUBSET 0x2
+-#define IM_REGION_EXISTS 0x4
+-#define IM_REGION_OVERLAP 0x8
+-#define IM_REGION_SUPERSET 0x10
+-
+-extern struct vm_struct * im_get_free_area(unsigned long size);
+-extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+- int region_type);
+-extern void im_free(void *addr);
+ #endif
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -8,7 +8,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+@@ -37,7 +36,6 @@
+ unsigned long ioremap_base;
+ unsigned long ioremap_bot;
+ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
+-int io_bat_index;
+
+ #if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+ #define HAVE_BATS 1
+@@ -300,51 +298,6 @@
+ }
+ }
+
+-/* is x a power of 4? */
+-#define is_power_of_4(x) is_power_of_2(x) && (ffs(x) & 1)
+-
+-/*
+- * Set up a mapping for a block of I/O.
+- * virt, phys, size must all be page-aligned.
+- * This should only be called before ioremap is called.
+- */
+-void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+- unsigned int size, int flags)
+-{
+- int i;
+-
+- if (virt > KERNELBASE && virt < ioremap_bot)
+- ioremap_bot = ioremap_base = virt;
+-
+-#ifdef HAVE_BATS
+- /*
+- * Use a BAT for this if possible...
+- */
+- if (io_bat_index < 2 && is_power_of_2(size)
+- && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+- setbat(io_bat_index, virt, phys, size, flags);
+- ++io_bat_index;
+- return;
+- }
+-#endif /* HAVE_BATS */
+-
+-#ifdef HAVE_TLBCAM
+- /*
+- * Use a CAM for this if possible...
+- */
+- if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+- && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+- settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+- ++tlbcam_index;
+- return;
+- }
+-#endif /* HAVE_TLBCAM */
+-
+- /* No BATs available, put it in the page tables. */
+- for (i = 0; i < size; i += PAGE_SIZE)
+- map_page(virt + i, phys + i, flags);
+-}
+-
+ /* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise. The pointer to
+@@ -379,82 +332,6 @@
+ return(retval);
+ }
+
+-/* Find physical address for this virtual address. Normally used by
+- * I/O functions, but anyone can call it.
+- */
+-unsigned long iopa(unsigned long addr)
+-{
+- unsigned long pa;
+-
+- /* I don't know why this won't work on PMacs or CHRP. It
+- * appears there is some bug, or there is some implicit
+- * mapping done not properly represented by BATs or in page
+- * tables.......I am actively working on resolving this, but
+- * can't hold up other stuff. -- Dan
+- */
+- pte_t *pte;
+- struct mm_struct *mm;
+-
+- /* Check the BATs */
+- pa = v_mapped_by_bats(addr);
+- if (pa)
+- return pa;
+-
+- /* Allow mapping of user addresses (within the thread)
+- * for DMA if necessary.
+- */
+- if (addr < TASK_SIZE)
+- mm = current->mm;
+- else
+- mm = &init_mm;
+-
+- pa = 0;
+- if (get_pteptr(mm, addr, &pte, NULL)) {
+- pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+- pte_unmap(pte);
+- }
+-
+- return(pa);
+-}
+-
+-/* This is will find the virtual address for a physical one....
+- * Swiped from APUS, could be dangerous :-).
+- * This is only a placeholder until I really find a way to make this
+- * work. -- Dan
+- */
+-unsigned long
+-mm_ptov (unsigned long paddr)
+-{
+- unsigned long ret;
+-#if 0
+- if (paddr < 16*1024*1024)
+- ret = ZTWO_VADDR(paddr);
+- else {
+- int i;
+-
+- for (i = 0; i < kmap_chunk_count;){
+- unsigned long phys = kmap_chunks[i++];
+- unsigned long size = kmap_chunks[i++];
+- unsigned long virt = kmap_chunks[i++];
+- if (paddr >= phys
+- && paddr < (phys + size)){
+- ret = virt + paddr - phys;
+- goto exit;
+- }
+- }
+-
+- ret = (unsigned long) __va(paddr);
+- }
+-exit:
+-#ifdef DEBUGPV
+- printk ("PTOV(%lx)=%lx\n", paddr, ret);
+-#endif
+-#else
+- ret = (unsigned long)paddr + KERNELBASE;
+-#endif
+- return ret;
+-}
+-
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+
+ static int __change_page_attr(struct page *page, pgprot_t prot)
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -7,7 +7,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+@@ -34,41 +33,27 @@
+ #include <linux/stddef.h>
+ #include <linux/vmalloc.h>
+ #include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/highmem.h>
+-#include <linux/idr.h>
+-#include <linux/nodemask.h>
+-#include <linux/module.h>
+
+ #include <asm/pgalloc.h>
+ #include <asm/page.h>
+ #include <asm/prom.h>
+-#include <asm/lmb.h>
+-#include <asm/rtas.h>
+ #include <asm/io.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu.h>
+-#include <asm/uaccess.h>
+ #include <asm/smp.h>
+ #include <asm/machdep.h>
+ #include <asm/tlb.h>
+-#include <asm/eeh.h>
+ #include <asm/processor.h>
+-#include <asm/mmzone.h>
+ #include <asm/cputable.h>
+ #include <asm/sections.h>
+ #include <asm/system.h>
+-#include <asm/iommu.h>
+ #include <asm/abs_addr.h>
+-#include <asm/vdso.h>
+ #include <asm/firmware.h>
+
+ #include "mmu_decl.h"
+
+-unsigned long ioremap_bot = IMALLOC_BASE;
+-static unsigned long phbs_io_bot = PHBS_IO_BASE;
++unsigned long ioremap_bot = IOREMAP_BASE;
+
+ /*
+ * map_io_page currently only called by __ioremap
+@@ -102,8 +87,8 @@
+ * entry in the hardware page table.
+ *
+ */
+- if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+- mmu_io_psize)) {
++ if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE,
++ pa, flags, mmu_io_psize)) {
+ printk(KERN_ERR "Failed to do bolted mapping IO "
+ "memory at %016lx !\n", pa);
+ return -ENOMEM;
+@@ -113,8 +98,11 @@
+ }
+
+
+-static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa,
+- unsigned long ea, unsigned long size,
++/**
++ * __ioremap_at - Low level function to establish the page tables
++ * for an IO mapping
++ */
++void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
+ unsigned long flags)
+ {
+ unsigned long i;
+@@ -122,17 +110,35 @@
+ if ((flags & _PAGE_PRESENT) == 0)
+ flags |= pgprot_val(PAGE_KERNEL);
+
++ WARN_ON(pa & ~PAGE_MASK);
++ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
++ WARN_ON(size & ~PAGE_MASK);
++
+ for (i = 0; i < size; i += PAGE_SIZE)
+- if (map_io_page(ea+i, pa+i, flags))
++ if (map_io_page((unsigned long)ea+i, pa+i, flags))
+ return NULL;
+
+- return (void __iomem *) (ea + (addr & ~PAGE_MASK));
++ return (void __iomem *)ea;
++}
++
++/**
++ * __iounmap_from - Low level function to tear down the page tables
++ * for an IO mapping. This is used for mappings that
++ * are manipulated manually, like partial unmapping of
++ * PCI IOs or ISA space.
++ */
++void __iounmap_at(void *ea, unsigned long size)
++{
++ WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
++ WARN_ON(size & ~PAGE_MASK);
++
++ unmap_kernel_range((unsigned long)ea, size);
+ }
+
+ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
+ unsigned long flags)
+ {
+- unsigned long pa, ea;
++ phys_addr_t paligned;
+ void __iomem *ret;
+
+ /*
+@@ -144,27 +150,30 @@
+ * IMALLOC_END
+ *
+ */
+- pa = addr & PAGE_MASK;
+- size = PAGE_ALIGN(addr + size) - pa;
++ paligned = addr & PAGE_MASK;
++ size = PAGE_ALIGN(addr + size) - paligned;
+
+- if ((size == 0) || (pa == 0))
++ if ((size == 0) || (paligned == 0))
+ return NULL;
+
+ if (mem_init_done) {
+ struct vm_struct *area;
+- area = im_get_free_area(size);
++
++ area = __get_vm_area(size, VM_IOREMAP,
++ ioremap_bot, IOREMAP_END);
+ if (area == NULL)
+ return NULL;
+- ea = (unsigned long)(area->addr);
+- ret = __ioremap_com(addr, pa, ea, size, flags);
++ ret = __ioremap_at(paligned, area->addr, size, flags);
+ if (!ret)
+- im_free(area->addr);
++ vunmap(area->addr);
+ } else {
+- ea = ioremap_bot;
+- ret = __ioremap_com(addr, pa, ea, size, flags);
++ ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
+ if (ret)
+ ioremap_bot += size;
+ }
++
++ if (ret)
++ ret += addr & ~PAGE_MASK;
+ return ret;
+ }
+
+@@ -187,62 +196,9 @@
+ }
+
+
+-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+-
+-int __ioremap_explicit(phys_addr_t pa, unsigned long ea,
+- unsigned long size, unsigned long flags)
+-{
+- struct vm_struct *area;
+- void __iomem *ret;
+-
+- /* For now, require page-aligned values for pa, ea, and size */
+- if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+- !IS_PAGE_ALIGNED(size)) {
+- printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__);
+- return 1;
+- }
+-
+- if (!mem_init_done) {
+- /* Two things to consider in this case:
+- * 1) No records will be kept (imalloc, etc) that the region
+- * has been remapped
+- * 2) It won't be easy to iounmap() the region later (because
+- * of 1)
+- */
+- ;
+- } else {
+- area = im_get_area(ea, size,
+- IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+- if (area == NULL) {
+- /* Expected when PHB-dlpar is in play */
+- return 1;
+- }
+- if (ea != (unsigned long) area->addr) {
+- printk(KERN_ERR "unexpected addr return from "
+- "im_get_area\n");
+- return 1;
+- }
+- }
+-
+- ret = __ioremap_com(pa, pa, ea, size, flags);
+- if (ret == NULL) {
+- printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+- return 1;
+- }
+- if (ret != (void *) ea) {
+- printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+ /*
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+- * This code is modeled after vmalloc code - unmap_vm_area()
+- *
+- * XXX what about calls before mem_init_done (ie python_countermeasures())
+ */
+ void __iounmap(volatile void __iomem *token)
+ {
+@@ -251,9 +207,14 @@
+ if (!mem_init_done)
+ return;
+
+- addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+-
+- im_free(addr);
++ addr = (void *) ((unsigned long __force)
++ PCI_FIX_ADDR(token) & PAGE_MASK);
++ if ((unsigned long)addr < ioremap_bot) {
++ printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
++ " at 0x%p\n", addr);
++ return;
++ }
++ vunmap(addr);
+ }
+
+ void iounmap(volatile void __iomem *token)
+@@ -264,77 +225,8 @@
+ __iounmap(token);
+ }
+
+-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+-{
+- struct vm_struct *area;
+-
+- /* Check whether subsets of this region exist */
+- area = im_get_area(addr, size, IM_REGION_SUPERSET);
+- if (area == NULL)
+- return 1;
+-
+- while (area) {
+- iounmap((void __iomem *) area->addr);
+- area = im_get_area(addr, size,
+- IM_REGION_SUPERSET);
+- }
+-
+- return 0;
+-}
+-
+-int __iounmap_explicit(volatile void __iomem *start, unsigned long size)
+-{
+- struct vm_struct *area;
+- unsigned long addr;
+- int rc;
+-
+- addr = (unsigned long __force) start & PAGE_MASK;
+-
+- /* Verify that the region either exists or is a subset of an existing
+- * region. In the latter case, split the parent region to create
+- * the exact region
+- */
+- area = im_get_area(addr, size,
+- IM_REGION_EXISTS | IM_REGION_SUBSET);
+- if (area == NULL) {
+- /* Determine whether subset regions exist. If so, unmap */
+- rc = iounmap_subset_regions(addr, size);
+- if (rc) {
+- printk(KERN_ERR
+- "%s() cannot unmap nonexistent range 0x%lx\n",
+- __FUNCTION__, addr);
+- return 1;
+- }
+- } else {
+- iounmap((void __iomem *) area->addr);
+- }
+- /*
+- * FIXME! This can't be right:
+- iounmap(area->addr);
+- * Maybe it should be "iounmap(area);"
+- */
+- return 0;
+-}
+-
+ EXPORT_SYMBOL(ioremap);
+ EXPORT_SYMBOL(ioremap_flags);
+ EXPORT_SYMBOL(__ioremap);
+ EXPORT_SYMBOL(iounmap);
+ EXPORT_SYMBOL(__iounmap);
+-
+-static DEFINE_SPINLOCK(phb_io_lock);
+-
+-void __iomem * reserve_phb_iospace(unsigned long size)
+-{
+- void __iomem *virt_addr;
+-
+- if (phbs_io_bot >= IMALLOC_BASE)
+- panic("reserve_phb_iospace(): phb io space overflow\n");
+-
+- spin_lock(&phb_io_lock);
+- virt_addr = (void __iomem *) phbs_io_bot;
+- phbs_io_bot += size;
+- spin_unlock(&phb_io_lock);
+-
+- return virt_addr;
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -11,7 +11,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+@@ -35,12 +34,12 @@
+
+ #include "mmu_decl.h"
+
+-PTE *Hash, *Hash_end;
++struct hash_pte *Hash, *Hash_end;
+ unsigned long Hash_size, Hash_mask;
+ unsigned long _SDR1;
+
+ union ubat { /* BAT register values to be loaded */
+- BAT bat;
++ struct ppc_bat bat;
+ u32 word[2];
+ } BATS[8][2]; /* 8 pairs of IBAT, DBAT */
+
+@@ -245,7 +244,7 @@
+ cacheable_memzero(Hash, Hash_size);
+ _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+
+- Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
++ Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+
+ printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+ total_memory >> 20, Hash_size >> 10, Hash);
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_32.c linux-2.6.22-591/arch/powerpc/mm/tlb_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/tlb_32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/tlb_32.c 2007-12-21 15:36:11.000000000 -0500
+@@ -11,7 +11,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_64.c linux-2.6.22-591/arch/powerpc/mm/tlb_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/tlb_64.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/mm/tlb_64.c 2007-12-21 15:36:11.000000000 -0500
+@@ -8,7 +8,6 @@
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+@@ -239,3 +238,59 @@
+ pte_free_submit(*batchp);
+ *batchp = NULL;
+ }
++
++/**
++ * __flush_hash_table_range - Flush all HPTEs for a given address range
++ * from the hash table (and the TLB). But keeps
++ * the linux PTEs intact.
++ *
++ * @mm : mm_struct of the target address space (generally init_mm)
++ * @start : starting address
++ * @end : ending address (not included in the flush)
++ *
++ * This function is mostly to be used by some IO hotplug code in order
++ * to remove all hash entries from a given address range used to map IO
++ * space on a removed PCI-PCI bidge without tearing down the full mapping
++ * since 64K pages may overlap with other bridges when using 64K pages
++ * with 4K HW pages on IO space.
++ *
++ * Because of that usage pattern, it's only available with CONFIG_HOTPLUG
++ * and is implemented for small size rather than speed.
++ */
++#ifdef CONFIG_HOTPLUG
++
++void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
++ unsigned long end)
++{
++ unsigned long flags;
++
++ start = _ALIGN_DOWN(start, PAGE_SIZE);
++ end = _ALIGN_UP(end, PAGE_SIZE);
++
++ BUG_ON(!mm->pgd);
++
++ /* Note: Normally, we should only ever use a batch within a
++ * PTE locked section. This violates the rule, but will work
++ * since we don't actually modify the PTEs, we just flush the
++ * hash while leaving the PTEs intact (including their reference
++ * to being hashed). This is not the most performance oriented
++ * way to do things but is fine for our needs here.
++ */
++ local_irq_save(flags);
++ arch_enter_lazy_mmu_mode();
++ for (; start < end; start += PAGE_SIZE) {
++ pte_t *ptep = find_linux_pte(mm->pgd, start);
++ unsigned long pte;
++
++ if (ptep == NULL)
++ continue;
++ pte = pte_val(*ptep);
++ if (!(pte & _PAGE_HASHPTE))
++ continue;
++ hpte_need_flush(mm, start, ptep, pte, 0);
++ }
++ arch_leave_lazy_mmu_mode();
++ local_irq_restore(flags);
++}
++
++#endif /* CONFIG_HOTPLUG */
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig linux-2.6.22-591/arch/powerpc/platforms/Kconfig
+--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -16,13 +16,6 @@
+ bool "Embedded 6xx/7xx/7xxx-based board"
+ depends on PPC32 && (BROKEN||BROKEN_ON_SMP)
+
+-config APUS
+- bool "Amiga-APUS"
+- depends on PPC32 && BROKEN
+- help
+- Select APUS if configuring for a PowerUP Amiga.
+- More information is available at:
+- <http://linux-apus.sourceforge.net/>.
+ endchoice
+
+ source "arch/powerpc/platforms/pseries/Kconfig"
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype
+--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,252 @@
++config PPC64
++ bool "64-bit kernel"
++ default n
++ help
++ This option selects whether a 32-bit or a 64-bit kernel
++ will be built.
++
++menu "Processor support"
++choice
++ prompt "Processor Type"
++ depends on PPC32
++ default 6xx
++
++config CLASSIC32
++ bool "52xx/6xx/7xx/74xx"
++ select PPC_FPU
++ select 6xx
++ help
++ There are four families of PowerPC chips supported. The more common
++ types (601, 603, 604, 740, 750, 7400), the Motorola embedded
++ versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
++ embedded versions (403 and 405) and the high end 64 bit Power
++ processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
++
++ This option is the catch-all for 6xx types, including some of the
++ embedded versions. Unless there is see an option for the specific
++ chip family you are using, you want this option.
++
++ You do not want this if you are building a kernel for a 64 bit
++ IBM RS/6000 or an Apple G5, choose 6xx.
++
++ If unsure, select this option
++
++ Note that the kernel runs in 32-bit mode even on 64-bit chips.
++
++config PPC_82xx
++ bool "Freescale 82xx"
++ select 6xx
++ select PPC_FPU
++
++config PPC_83xx
++ bool "Freescale 83xx"
++ select 6xx
++ select FSL_SOC
++ select 83xx
++ select PPC_FPU
++ select WANT_DEVICE_TREE
++
++config PPC_85xx
++ bool "Freescale 85xx"
++ select E500
++ select FSL_SOC
++ select 85xx
++ select WANT_DEVICE_TREE
++
++config PPC_86xx
++ bool "Freescale 86xx"
++ select 6xx
++ select FSL_SOC
++ select FSL_PCIE
++ select PPC_FPU
++ select ALTIVEC
++ help
++ The Freescale E600 SoCs have 74xx cores.
++
++config PPC_8xx
++ bool "Freescale 8xx"
++ select FSL_SOC
++ select 8xx
++
++config 40x
++ bool "AMCC 40x"
++ select PPC_DCR_NATIVE
++
++config 44x
++ bool "AMCC 44x"
++ select PPC_DCR_NATIVE
++ select WANT_DEVICE_TREE
++
++config E200
++ bool "Freescale e200"
++
++endchoice
++
++config POWER4_ONLY
++ bool "Optimize for POWER4"
++ depends on PPC64
++ default n
++ ---help---
++ Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
++ The resulting binary will not work on POWER3 or RS64 processors
++ when compiled with binutils 2.15 or later.
++
++config POWER3
++ bool
++ depends on PPC64
++ default y if !POWER4_ONLY
++
++config POWER4
++ depends on PPC64
++ def_bool y
++
++config 6xx
++ bool
++
++# this is temp to handle compat with arch=ppc
++config 8xx
++ bool
++
++# this is temp to handle compat with arch=ppc
++config 83xx
++ bool
++
++# this is temp to handle compat with arch=ppc
++config 85xx
++ bool
++
++config E500
++ bool
++
++config PPC_FPU
++ bool
++ default y if PPC64
++
++config 4xx
++ bool
++ depends on 40x || 44x
++ default y
++
++config BOOKE
++ bool
++ depends on E200 || E500 || 44x
++ default y
++
++config FSL_BOOKE
++ bool
++ depends on E200 || E500
++ default y
++
++config PTE_64BIT
++ bool
++ depends on 44x || E500
++ default y if 44x
++ default y if E500 && PHYS_64BIT
++
++config PHYS_64BIT
++ bool 'Large physical address support' if E500
++ depends on 44x || E500
++ select RESOURCES_64BIT
++ default y if 44x
++ ---help---
++ This option enables kernel support for larger than 32-bit physical
++ addresses. This features is not be available on all e500 cores.
++
++ If in doubt, say N here.
++
++config ALTIVEC
++ bool "AltiVec Support"
++ depends on CLASSIC32 || POWER4
++ ---help---
++ This option enables kernel support for the Altivec extensions to the
++ PowerPC processor. The kernel currently supports saving and restoring
++ altivec registers, and turning on the 'altivec enable' bit so user
++ processes can execute altivec instructions.
++
++ This option is only usefully if you have a processor that supports
++ altivec (G4, otherwise known as 74xx series), but does not have
++ any affect on a non-altivec cpu (it does, however add code to the
++ kernel).
++
++ If in doubt, say Y here.
++
++config SPE
++ bool "SPE Support"
++ depends on E200 || E500
++ default y
++ ---help---
++ This option enables kernel support for the Signal Processing
++ Extensions (SPE) to the PowerPC processor. The kernel currently
++ supports saving and restoring SPE registers, and turning on the
++ 'spe enable' bit so user processes can execute SPE instructions.
++
++ This option is only useful if you have a processor that supports
++ SPE (e500, otherwise known as 85xx series), but does not have any
++ effect on a non-spe cpu (it does, however add code to the kernel).
++
++ If in doubt, say Y here.
++
++config PPC_STD_MMU
++ bool
++ depends on 6xx || POWER3 || POWER4 || PPC64
++ default y
++
++config PPC_STD_MMU_32
++ def_bool y
++ depends on PPC_STD_MMU && PPC32
++
++config PPC_MM_SLICES
++ bool
++ default y if HUGETLB_PAGE
++ default n
++
++config VIRT_CPU_ACCOUNTING
++ bool "Deterministic task and CPU time accounting"
++ depends on PPC64
++ default y
++ help
++ Select this option to enable more accurate task and CPU time
++ accounting. This is done by reading a CPU counter on each
++ kernel entry and exit and on transitions within the kernel
++ between system, softirq and hardirq state, so there is a
++ small performance impact. This also enables accounting of
++ stolen time on logically-partitioned systems running on
++ IBM POWER5-based machines.
++
++ If in doubt, say Y here.
++
++config SMP
++ depends on PPC_STD_MMU
++ bool "Symmetric multi-processing support"
++ ---help---
++ This enables support for systems with more than one CPU. If you have
++ a system with only one CPU, say N. If you have a system with more
++ than one CPU, say Y. Note that the kernel does not currently
++ support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
++ since they have inadequate hardware support for multiprocessor
++ operation.
++
++ If you say N here, the kernel will run on single and multiprocessor
++ machines, but will use only one CPU of a multiprocessor machine. If
++ you say Y here, the kernel will run on single-processor machines.
++ On a single-processor machine, the kernel will run faster if you say
++ N here.
++
++ If you don't know what to do here, say N.
++
++config NR_CPUS
++ int "Maximum number of CPUs (2-128)"
++ range 2 128
++ depends on SMP
++ default "32" if PPC64
++ default "4"
++
++config NOT_COHERENT_CACHE
++ bool
++ depends on 4xx || 8xx || E200
++ default y
++
++config CONFIG_CHECK_CACHE_COHERENCY
++ bool
++
++endmenu
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig
+--- linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,130 +0,0 @@
+-
+-config AMIGA
+- bool
+- depends on APUS
+- default y
+- help
+- This option enables support for the Amiga series of computers.
+-
+-config ZORRO
+- bool
+- depends on APUS
+- default y
+- help
+- This enables support for the Zorro bus in the Amiga. If you have
+- expansion cards in your Amiga that conform to the Amiga
+- AutoConfig(tm) specification, say Y, otherwise N. Note that even
+- expansion cards that do not fit in the Zorro slots but fit in e.g.
+- the CPU slot may fall in this category, so you have to say Y to let
+- Linux use these.
+-
+-config ABSTRACT_CONSOLE
+- bool
+- depends on APUS
+- default y
+-
+-config APUS_FAST_EXCEPT
+- bool
+- depends on APUS
+- default y
+-
+-config AMIGA_PCMCIA
+- bool "Amiga 1200/600 PCMCIA support"
+- depends on APUS && EXPERIMENTAL
+- help
+- Include support in the kernel for pcmcia on Amiga 1200 and Amiga
+- 600. If you intend to use pcmcia cards say Y; otherwise say N.
+-
+-config AMIGA_BUILTIN_SERIAL
+- tristate "Amiga builtin serial support"
+- depends on APUS
+- help
+- If you want to use your Amiga's built-in serial port in Linux,
+- answer Y.
+-
+- To compile this driver as a module, choose M here.
+-
+-config GVPIOEXT
+- tristate "GVP IO-Extender support"
+- depends on APUS
+- help
+- If you want to use a GVP IO-Extender serial card in Linux, say Y.
+- Otherwise, say N.
+-
+-config GVPIOEXT_LP
+- tristate "GVP IO-Extender parallel printer support"
+- depends on GVPIOEXT
+- help
+- Say Y to enable driving a printer from the parallel port on your
+- GVP IO-Extender card, N otherwise.
+-
+-config GVPIOEXT_PLIP
+- tristate "GVP IO-Extender PLIP support"
+- depends on GVPIOEXT
+- help
+- Say Y to enable doing IP over the parallel port on your GVP
+- IO-Extender card, N otherwise.
+-
+-config MULTIFACE_III_TTY
+- tristate "Multiface Card III serial support"
+- depends on APUS
+- help
+- If you want to use a Multiface III card's serial port in Linux,
+- answer Y.
+-
+- To compile this driver as a module, choose M here.
+-
+-config A2232
+- tristate "Commodore A2232 serial support (EXPERIMENTAL)"
+- depends on EXPERIMENTAL && APUS
+- ---help---
+- This option supports the 2232 7-port serial card shipped with the
+- Amiga 2000 and other Zorro-bus machines, dating from 1989. At
+- a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip
+- each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The
+- ports were connected with 8 pin DIN connectors on the card bracket,
+- for which 8 pin to DB25 adapters were supplied. The card also had
+- jumpers internally to toggle various pinning configurations.
+-
+- This driver can be built as a module; but then "generic_serial"
+- will also be built as a module. This has to be loaded before
+- "ser_a2232". If you want to do this, answer M here.
+-
+-config WHIPPET_SERIAL
+- tristate "Hisoft Whippet PCMCIA serial support"
+- depends on AMIGA_PCMCIA
+- help
+- HiSoft has a web page at <http://www.hisoft.co.uk/>, but there
+- is no listing for the Whippet in their Amiga section.
+-
+-config APNE
+- tristate "PCMCIA NE2000 support"
+- depends on AMIGA_PCMCIA
+- help
+- If you have a PCMCIA NE2000 compatible adapter, say Y. Otherwise,
+- say N.
+-
+- To compile this driver as a module, choose M here: the
+- module will be called apne.
+-
+-config SERIAL_CONSOLE
+- bool "Support for serial port console"
+- depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y)
+-
+-config HEARTBEAT
+- bool "Use power LED as a heartbeat"
+- depends on APUS
+- help
+- Use the power-on LED on your machine as a load meter. The exact
+- behavior is platform-dependent, but normally the flash frequency is
+- a hyperbolic function of the 5-minute load average.
+-
+-config PROC_HARDWARE
+- bool "/proc/hardware support"
+- depends on APUS
+-
+-source "drivers/zorro/Kconfig"
+-
+-config PCI_PERMEDIA
+- bool "PCI for Permedia2"
+- depends on !4xx && !8xx && APUS
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c 2007-12-21 15:36:11.000000000 -0500
+@@ -102,7 +102,7 @@
+ vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+
+ /* Check if it's in allowed range for PIO */
+- if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE)
++ if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END)
+ return;
+
+ /* Try to find a PTE. If not, clear the paddr, we'll do
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c 2007-12-21 15:36:11.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/poll.h>
+ #include <linux/ptrace.h>
++#include <linux/seq_file.h>
+
+ #include <asm/io.h>
+ #include <asm/semaphore.h>
+@@ -39,6 +40,7 @@
+
+ #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
++
+ static int
+ spufs_mem_open(struct inode *inode, struct file *file)
+ {
+@@ -1797,6 +1799,29 @@
+ return 0;
+ }
+
++static int spufs_caps_show(struct seq_file *s, void *private)
++{
++ struct spu_context *ctx = s->private;
++
++ if (!(ctx->flags & SPU_CREATE_NOSCHED))
++ seq_puts(s, "sched\n");
++ if (!(ctx->flags & SPU_CREATE_ISOLATE))
++ seq_puts(s, "step\n");
++ return 0;
++}
++
++static int spufs_caps_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
++}
++
++static const struct file_operations spufs_caps_fops = {
++ .open = spufs_caps_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
+ static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+ {
+@@ -2015,6 +2040,7 @@
+ };
+
+ struct tree_descr spufs_dir_contents[] = {
++ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, },
+ { "regs", &spufs_regs_fops, 0666, },
+ { "mbox", &spufs_mbox_fops, 0444, },
+@@ -2050,6 +2076,7 @@
+ };
+
+ struct tree_descr spufs_dir_nosched_contents[] = {
++ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c 2007-12-21 15:36:11.000000000 -0500
+@@ -142,8 +142,12 @@
+ runcntl = SPU_RUNCNTL_RUNNABLE;
+ ctx->ops->runcntl_write(ctx, runcntl);
+ } else {
++ unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL;
+ spu_start_tick(ctx);
+ ctx->ops->npc_write(ctx, *npc);
++ if (test_thread_flag(TIF_SINGLESTEP))
++ mode = SPU_PRIVCNTL_MODE_SINGLE_STEP;
++ out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ }
+
+@@ -334,7 +338,8 @@
+ ret = spu_process_events(ctx);
+
+ } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+- SPU_STATUS_STOPPED_BY_HALT)));
++ SPU_STATUS_STOPPED_BY_HALT |
++ SPU_STATUS_SINGLE_STEP)));
+
+ ctx->ops->master_stop(ctx);
+ ret = spu_run_fini(ctx, npc, &status);
+@@ -344,10 +349,15 @@
+ if ((ret == 0) ||
+ ((ret == -ERESTARTSYS) &&
+ ((status & SPU_STATUS_STOPPED_BY_HALT) ||
++ (status & SPU_STATUS_SINGLE_STEP) ||
+ ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+ ret = status;
+
++ /* Note: we don't need to force_sig SIGTRAP on single-step
++ * since we have TIF_SINGLESTEP set, thus the kernel will do
++ * it upon return from the syscall anyawy
++ */
+ if ((status & SPU_STATUS_STOPPED_BY_STOP)
+ && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
+ force_sig(SIGTRAP, current);
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h 2007-12-21 15:36:11.000000000 -0500
+@@ -76,24 +76,25 @@
+ return compressedStatus;
+ }
+
+-static inline u64 HvCallHpt_findValid(hpte_t *hpte, u64 vpn)
++static inline u64 HvCallHpt_findValid(struct hash_pte *hpte, u64 vpn)
+ {
+ return HvCall3Ret16(HvCallHptFindValid, hpte, vpn, 0, 0);
+ }
+
+-static inline u64 HvCallHpt_findNextValid(hpte_t *hpte, u32 hpteIndex,
++static inline u64 HvCallHpt_findNextValid(struct hash_pte *hpte, u32 hpteIndex,
+ u8 bitson, u8 bitsoff)
+ {
+ return HvCall3Ret16(HvCallHptFindNextValid, hpte, hpteIndex,
+ bitson, bitsoff);
+ }
+
+-static inline void HvCallHpt_get(hpte_t *hpte, u32 hpteIndex)
++static inline void HvCallHpt_get(struct hash_pte *hpte, u32 hpteIndex)
+ {
+ HvCall2Ret16(HvCallHptGet, hpte, hpteIndex, 0);
+ }
+
+-static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, hpte_t *hpte)
++static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit,
++ struct hash_pte *hpte)
+ {
+ HvCall4(HvCallHptAddValidate, hpteIndex, hBit, hpte->v, hpte->r);
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c 2007-12-21 15:36:11.000000000 -0500
+@@ -44,7 +44,7 @@
+ unsigned long vflags, int psize)
+ {
+ long slot;
+- hpte_t lhpte;
++ struct hash_pte lhpte;
+ int secondary = 0;
+
+ BUG_ON(psize != MMU_PAGE_4K);
+@@ -99,7 +99,7 @@
+
+ static unsigned long iSeries_hpte_getword0(unsigned long slot)
+ {
+- hpte_t hpte;
++ struct hash_pte hpte;
+
+ HvCallHpt_get(&hpte, slot);
+ return hpte.v;
+@@ -144,7 +144,7 @@
+ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
+ {
+- hpte_t hpte;
++ struct hash_pte hpte;
+ unsigned long want_v;
+
+ iSeries_hlock(slot);
+@@ -176,7 +176,7 @@
+ */
+ static long iSeries_hpte_find(unsigned long vpn)
+ {
+- hpte_t hpte;
++ struct hash_pte hpte;
+ long slot;
+
+ /*
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c 2007-12-21 15:36:11.000000000 -0500
+@@ -742,6 +742,11 @@
+ /* Install IO hooks */
+ ppc_pci_io = iseries_pci_io;
+
++ /* iSeries has no IO space in the common sense, it needs to set
++ * the IO base to 0
++ */
++ pci_io_base = 0;
++
+ if (root == NULL) {
+ printk(KERN_CRIT "iSeries_pcibios_init: can't find root "
+ "of device tree\n");
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c 2007-12-21 15:36:11.000000000 -0500
+@@ -519,23 +519,6 @@
+ DBG(" <- maple_pci_irq_fixup\n");
+ }
+
+-static void __init maple_fixup_phb_resources(void)
+-{
+- struct pci_controller *hose, *tmp;
+-
+- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+- unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-
+- hose->io_resource.start += offset;
+- hose->io_resource.end += offset;
+-
+- printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n",
+- hose->global_number,
+- (unsigned long long)hose->io_resource.start,
+- (unsigned long long)hose->io_resource.end);
+- }
+-}
+-
+ void __init maple_pci_init(void)
+ {
+ struct device_node *np, *root;
+@@ -573,24 +556,6 @@
+ if (ht && add_bridge(ht) != 0)
+ of_node_put(ht);
+
+- /*
+- * We need to call pci_setup_phb_io for the HT bridge first
+- * so it gets the I/O port numbers starting at 0, and we
+- * need to call it for the AGP bridge after that so it gets
+- * small positive I/O port numbers.
+- */
+- if (u3_ht)
+- pci_setup_phb_io(u3_ht, 1);
+- if (u3_agp)
+- pci_setup_phb_io(u3_agp, 0);
+- if (u4_pcie)
+- pci_setup_phb_io(u4_pcie, 0);
+-
+- /* Fixup the IO resources on our host bridges as the common code
+- * does it only for childs of the host bridges
+- */
+- maple_fixup_phb_resources();
+-
+ /* Setup the linkage between OF nodes and PHBs */
+ pci_devs_phb_init();
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c 2007-12-21 15:36:11.000000000 -0500
+@@ -150,29 +150,11 @@
+ printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+
+ /* Interpret the "ranges" property */
+- /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+- pci_setup_phb_io(hose, 1);
+
+ return 0;
+ }
+
+-
+-static void __init pas_fixup_phb_resources(void)
+-{
+- struct pci_controller *hose, *tmp;
+-
+- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+- unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+- hose->io_resource.start += offset;
+- hose->io_resource.end += offset;
+- printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+- hose->global_number,
+- hose->io_resource.start, hose->io_resource.end);
+- }
+-}
+-
+-
+ void __init pas_pci_init(void)
+ {
+ struct device_node *np, *root;
+@@ -190,8 +172,6 @@
+
+ of_node_put(root);
+
+- pas_fixup_phb_resources();
+-
+ /* Setup the linkage between OF nodes and PHBs */
+ pci_devs_phb_init();
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1006,19 +1006,6 @@
+ #endif /* CONFIG_PPC32 */
+ }
+
+-#ifdef CONFIG_PPC64
+-static void __init pmac_fixup_phb_resources(void)
+-{
+- struct pci_controller *hose, *tmp;
+-
+- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+- printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+- hose->global_number,
+- hose->io_resource.start, hose->io_resource.end);
+- }
+-}
+-#endif
+-
+ void __init pmac_pci_init(void)
+ {
+ struct device_node *np, *root;
+@@ -1053,25 +1040,6 @@
+ if (ht && add_bridge(ht) != 0)
+ of_node_put(ht);
+
+- /*
+- * We need to call pci_setup_phb_io for the HT bridge first
+- * so it gets the I/O port numbers starting at 0, and we
+- * need to call it for the AGP bridge after that so it gets
+- * small positive I/O port numbers.
+- */
+- if (u3_ht)
+- pci_setup_phb_io(u3_ht, 1);
+- if (u3_agp)
+- pci_setup_phb_io(u3_agp, 0);
+- if (u4_pcie)
+- pci_setup_phb_io(u4_pcie, 0);
+-
+- /*
+- * On ppc64, fixup the IO resources on our host bridges as
+- * the common code does it only for children of the host bridges
+- */
+- pmac_fixup_phb_resources();
+-
+ /* Setup the linkage between OF nodes and PHBs */
+ pci_devs_phb_init();
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c
+--- linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -98,8 +98,6 @@
+ int sccdbg;
+ #endif
+
+-extern void zs_kgdb_hook(int tty_num);
+-
+ sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+ EXPORT_SYMBOL(sys_ctrler);
+
+@@ -330,10 +328,6 @@
+ l2cr_init();
+ #endif /* CONFIG_PPC32 */
+
+-#ifdef CONFIG_KGDB
+- zs_kgdb_hook(0);
+-#endif
+-
+ find_via_cuda();
+ find_via_pmu();
+ smu_init();
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c
+--- linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,7 +34,7 @@
+ #define DBG(fmt...) do{if(0)printk(fmt);}while(0)
+ #endif
+
+-static hpte_t *htab;
++static struct hash_pte *htab;
+ static unsigned long htab_addr;
+ static unsigned char *bolttab;
+ static unsigned char *inusetab;
+@@ -44,8 +44,8 @@
+ #define debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g) \
+ _debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__)
+ static void _debug_dump_hpte(unsigned long pa, unsigned long va,
+- unsigned long group, unsigned long bitmap, hpte_t lhpte, int psize,
+- unsigned long slot, const char* func, int line)
++ unsigned long group, unsigned long bitmap, struct hash_pte lhpte,
++ int psize, unsigned long slot, const char* func, int line)
+ {
+ DBG("%s:%d: pa = %lxh\n", func, line, pa);
+ DBG("%s:%d: lpar = %lxh\n", func, line,
+@@ -63,7 +63,7 @@
+ unsigned long pa, unsigned long rflags, unsigned long vflags, int psize)
+ {
+ unsigned long slot;
+- hpte_t lhpte;
++ struct hash_pte lhpte;
+ int secondary = 0;
+ unsigned long result;
+ unsigned long bitmap;
+@@ -255,7 +255,7 @@
+
+ ppc64_pft_size = __ilog2(htab_size);
+
+- bitmap_size = htab_size / sizeof(hpte_t) / 8;
++ bitmap_size = htab_size / sizeof(struct hash_pte) / 8;
+
+ bolttab = __va(lmb_alloc(bitmap_size, 1));
+ inusetab = __va(lmb_alloc(bitmap_size, 1));
+@@ -273,7 +273,7 @@
+
+ result = lv1_map_htab(0, &htab_addr);
+
+- htab = (hpte_t *)__ioremap(htab_addr, htab_size,
++ htab = (struct hash_pte *)__ioremap(htab_addr, htab_size,
+ pgprot_val(PAGE_READONLY_X));
+
+ DBG("%s:%d: lpar %016lxh, virt %016lxh\n", __func__, __LINE__,
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -8,7 +8,7 @@
+ obj-$(CONFIG_SMP) += smp.o
+ obj-$(CONFIG_XICS) += xics.o
+ obj-$(CONFIG_SCANLOG) += scanlog.o
+-obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o
++obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
+ obj-$(CONFIG_KEXEC) += kexec.o
+ obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
+ obj-$(CONFIG_PCI_MSI) += msi.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1,6 +1,8 @@
+ /*
+ * eeh.c
+- * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
++ * Copyright IBM Corporation 2001, 2005, 2006
++ * Copyright Dave Engebretsen & Todd Inglett 2001
++ * Copyright Linas Vepstas 2005, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -15,6 +17,8 @@
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+
+ #include <linux/delay.h>
+@@ -117,7 +121,6 @@
+ static unsigned long ignored_check;
+ static unsigned long total_mmio_ffs;
+ static unsigned long false_positives;
+-static unsigned long ignored_failures;
+ static unsigned long slot_resets;
+
+ #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+@@ -505,6 +508,7 @@
+ printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
+ ret, dn->full_name);
+ false_positives++;
++ pdn->eeh_false_positives ++;
+ rc = 0;
+ goto dn_unlock;
+ }
+@@ -513,6 +517,7 @@
+ * they are empty when they don't have children. */
+ if ((rets[0] == 5) && (dn->child == NULL)) {
+ false_positives++;
++ pdn->eeh_false_positives ++;
+ rc = 0;
+ goto dn_unlock;
+ }
+@@ -522,6 +527,7 @@
+ printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
+ ret, dn->full_name);
+ false_positives++;
++ pdn->eeh_false_positives ++;
+ rc = 0;
+ goto dn_unlock;
+ }
+@@ -529,6 +535,7 @@
+ /* If not the kind of error we know about, punt. */
+ if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ false_positives++;
++ pdn->eeh_false_positives ++;
+ rc = 0;
+ goto dn_unlock;
+ }
+@@ -921,6 +928,7 @@
+ pdn->eeh_mode = 0;
+ pdn->eeh_check_count = 0;
+ pdn->eeh_freeze_count = 0;
++ pdn->eeh_false_positives = 0;
+
+ if (status && strcmp(status, "ok") != 0)
+ return NULL; /* ignore devices with bad status */
+@@ -1139,7 +1147,8 @@
+ pdn = PCI_DN(dn);
+ pdn->pcidev = dev;
+
+- pci_addr_cache_insert_device (dev);
++ pci_addr_cache_insert_device(dev);
++ eeh_sysfs_add_device(dev);
+ }
+
+ void eeh_add_device_tree_late(struct pci_bus *bus)
+@@ -1178,6 +1187,7 @@
+ printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
+ #endif
+ pci_addr_cache_remove_device(dev);
++ eeh_sysfs_remove_device(dev);
+
+ dn = pci_device_to_OF_node(dev);
+ if (PCI_DN(dn)->pcidev) {
+@@ -1214,11 +1224,10 @@
+ "check not wanted=%ld\n"
+ "eeh_total_mmio_ffs=%ld\n"
+ "eeh_false_positives=%ld\n"
+- "eeh_ignored_failures=%ld\n"
+ "eeh_slot_resets=%ld\n",
+ no_device, no_dn, no_cfg_addr,
+ ignored_check, total_mmio_ffs,
+- false_positives, ignored_failures,
++ false_positives,
+ slot_resets);
+ }
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c 2007-12-21 15:36:11.000000000 -0500
+@@ -2,7 +2,8 @@
+ * eeh_cache.c
+ * PCI address cache; allows the lookup of PCI devices based on I/O address
+ *
+- * Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation
++ * Copyright IBM Corporation 2004
++ * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -295,6 +296,8 @@
+ continue;
+ pci_dev_get (dev); /* matching put is in eeh_remove_device() */
+ PCI_DN(dn)->pcidev = dev;
++
++ eeh_sysfs_add_device(dev);
+ }
+
+ #ifdef DEBUG
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1,6 +1,7 @@
+ /*
+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+- * Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org>
++ * Copyright IBM Corp. 2004 2005
++ * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
+ *
+ * All rights reserved.
+ *
+@@ -19,8 +20,7 @@
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+- * Send feedback to <linas@us.ibm.com>
+- *
++ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+ #include <linux/delay.h>
+ #include <linux/interrupt.h>
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,87 @@
++/*
++ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
++ * Copyright IBM Corporation 2007
++ * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
++ *
++ * All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or (at
++ * your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
++ * NON INFRINGEMENT. See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
++ */
++#include <linux/pci.h>
++#include <asm/ppc-pci.h>
++#include <asm/pci-bridge.h>
++#include <linux/kobject.h>
++
++/**
++ * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
++ * @_name: name of file in sysfs directory
++ * @_memb: name of member in struct pci_dn to access
++ * @_format: printf format for display
++ *
++ * All of the attributes look very similar, so just
++ * auto-gen a cut-n-paste routine to display them.
++ */
++#define EEH_SHOW_ATTR(_name,_memb,_format) \
++static ssize_t eeh_show_##_name(struct device *dev, \
++ struct device_attribute *attr, char *buf) \
++{ \
++ struct pci_dev *pdev = to_pci_dev(dev); \
++ struct device_node *dn = pci_device_to_OF_node(pdev); \
++ struct pci_dn *pdn; \
++ \
++ if (!dn || PCI_DN(dn) == NULL) \
++ return 0; \
++ \
++ pdn = PCI_DN(dn); \
++ return sprintf(buf, _format "\n", pdn->_memb); \
++} \
++static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
++
++
++EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x");
++EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x");
++EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x");
++EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d");
++EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d");
++EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d");
++
++void eeh_sysfs_add_device(struct pci_dev *pdev)
++{
++ int rc=0;
++
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count);
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives);
++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count);
++
++ if (rc)
++ printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
++}
++
++void eeh_sysfs_remove_device(struct pci_dev *pdev)
++{
++ device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
++ device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
++ device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
++ device_remove_file(&pdev->dev, &dev_attr_eeh_check_count);
++ device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives);
++ device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count);
++}
++
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-12-21 15:36:11.000000000 -0500
+@@ -110,8 +110,6 @@
+ }
+ }
+ }
+-
+- eeh_add_device_tree_late(bus);
+ }
+ EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices);
+
+@@ -139,6 +137,8 @@
+
+ /* Make the discovered devices available */
+ pci_bus_add_devices(child_bus);
++
++ eeh_add_device_tree_late(child_bus);
+ return 0;
+ }
+
+@@ -171,6 +171,7 @@
+ if (!list_empty(&bus->devices)) {
+ pcibios_fixup_new_pci_devices(bus, 0);
+ pci_bus_add_devices(bus);
++ eeh_add_device_tree_late(bus);
+ }
+ } else if (mode == PCI_PROBE_NORMAL) {
+ /* use legacy probe */
+@@ -179,6 +180,7 @@
+ if (num) {
+ pcibios_fixup_new_pci_devices(bus, 1);
+ pci_bus_add_devices(bus);
++ eeh_add_device_tree_late(bus);
+ }
+
+ list_for_each_entry(dev, &bus->devices, bus_list)
+@@ -200,8 +202,6 @@
+ rtas_setup_phb(phb);
+ pci_process_bridge_OF_ranges(phb, dn, 0);
+
+- pci_setup_phb_io_dynamic(phb, primary);
+-
+ pci_devs_phb_init_dynamic(phb);
+
+ if (dn->child)
+@@ -210,6 +210,7 @@
+ scan_phb(phb);
+ pcibios_fixup_new_pci_devices(phb->bus, 0);
+ pci_bus_add_devices(phb->bus);
++ eeh_add_device_tree_late(phb->bus);
+
+ return phb;
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h 2007-12-21 15:36:11.000000000 -0500
+@@ -33,6 +33,8 @@
+ static inline void setup_kexec_cpu_down_mpic(void) { }
+ #endif
+
++extern void pSeries_final_fixup(void);
++
+ /* Poweron flag used for enabling auto ups restart */
+ extern unsigned long rtas_poweron_auto;
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -399,6 +399,7 @@
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
++ get_lppaca()->donate_dedicated_cpu = 1;
+
+ /*
+ * We come in with interrupts disabled, and need_resched()
+@@ -431,6 +432,7 @@
+
+ out:
+ HMT_medium();
++ get_lppaca()->donate_dedicated_cpu = 0;
+ get_lppaca()->idle = 0;
+ }
+
+diff -Nurb linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c
+--- linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c 2007-12-21 15:36:11.000000000 -0500
+@@ -72,12 +72,11 @@
+ int ret;
+
+ for (np = NULL, i = 0;
+- (np = of_find_compatible_node(np, "network", "tsi-ethernet")) != NULL;
++ (np = of_find_compatible_node(np, "network", "tsi108-ethernet")) != NULL;
+ i++) {
+ struct resource r[2];
+- struct device_node *phy;
++ struct device_node *phy, *mdio;
+ hw_info tsi_eth_data;
+- const unsigned int *id;
+ const unsigned int *phy_id;
+ const void *mac_addr;
+ const phandle *ph;
+@@ -111,6 +110,13 @@
+ if (mac_addr)
+ memcpy(tsi_eth_data.mac_addr, mac_addr, 6);
+
++ ph = of_get_property(np, "mdio-handle", NULL);
++ mdio = of_find_node_by_phandle(*ph);
++ ret = of_address_to_resource(mdio, 0, &res);
++ of_node_put(mdio);
++ if (ret)
++ goto unreg;
++
+ ph = of_get_property(np, "phy-handle", NULL);
+ phy = of_find_node_by_phandle(*ph);
+
+@@ -119,20 +125,25 @@
+ goto unreg;
+ }
+
+- id = of_get_property(phy, "reg", NULL);
+- phy_id = of_get_property(phy, "phy-id", NULL);
+- ret = of_address_to_resource(phy, 0, &res);
+- if (ret) {
+- of_node_put(phy);
+- goto unreg;
+- }
++ phy_id = of_get_property(phy, "reg", NULL);
++
+ tsi_eth_data.regs = r[0].start;
+ tsi_eth_data.phyregs = res.start;
+ tsi_eth_data.phy = *phy_id;
+ tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0);
+- if (of_device_is_compatible(phy, "bcm54xx"))
++
++ /* Some boards with the TSI108 bridge (e.g. Holly)
++ * have a miswiring of the ethernet PHYs which
++ * requires a workaround. The special
++ * "txc-rxc-delay-disable" property enables this
++ * workaround. FIXME: Need to port the tsi108_eth
++ * driver itself to phylib and use a non-misleading
++ * name for the workaround flag - it's not actually to
++ * do with the model of PHY in use */
++ if (of_get_property(phy, "txc-rxc-delay-disable", NULL))
+ tsi_eth_data.phy_type = TSI108_PHY_BCM54XX;
+ of_node_put(phy);
++
+ ret =
+ platform_device_add_data(tsi_eth_dev, &tsi_eth_data,
+ sizeof(hw_info));
+diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/enet.c linux-2.6.22-591/arch/ppc/8260_io/enet.c
+--- linux-2.6.22-570/arch/ppc/8260_io/enet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/8260_io/enet.c 2007-12-21 15:36:11.000000000 -0500
+@@ -477,9 +477,9 @@
+ }
+ else {
+ skb_put(skb,pkt_len-4); /* Make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)__va(bdp->cbd_bufaddr),
+- pkt_len-4, 0);
++ pkt_len-4);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c
+--- linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c 2007-12-21 15:36:11.000000000 -0500
+@@ -734,9 +734,9 @@
+ }
+ else {
+ skb_put(skb,pkt_len); /* Make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)__va(bdp->cbd_bufaddr),
+- pkt_len, 0);
++ pkt_len);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/enet.c linux-2.6.22-591/arch/ppc/8xx_io/enet.c
+--- linux-2.6.22-570/arch/ppc/8xx_io/enet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/8xx_io/enet.c 2007-12-21 15:36:11.000000000 -0500
+@@ -506,9 +506,9 @@
+ }
+ else {
+ skb_put(skb,pkt_len-4); /* Make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ cep->rx_vaddr[bdp - cep->rx_bd_base],
+- pkt_len-4, 0);
++ pkt_len-4);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/fec.c linux-2.6.22-591/arch/ppc/8xx_io/fec.c
+--- linux-2.6.22-570/arch/ppc/8xx_io/fec.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/8xx_io/fec.c 2007-12-21 15:36:11.000000000 -0500
+@@ -725,7 +725,7 @@
+ fep->stats.rx_dropped++;
+ } else {
+ skb_put(skb,pkt_len-4); /* Make room */
+- eth_copy_and_sum(skb, data, pkt_len-4, 0);
++ skb_copy_to_linear_data(skb, data, pkt_len-4);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/Kconfig.debug linux-2.6.22-591/arch/ppc/Kconfig.debug
+--- linux-2.6.22-570/arch/ppc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -2,42 +2,6 @@
+
+ source "lib/Kconfig.debug"
+
+-config KGDB
+- bool "Include kgdb kernel debugger"
+- depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx)
+- select DEBUG_INFO
+- help
+- Include in-kernel hooks for kgdb, the Linux kernel source level
+- debugger. See <http://kgdb.sourceforge.net/> for more information.
+- Unless you are intending to debug the kernel, say N here.
+-
+-choice
+- prompt "Serial Port"
+- depends on KGDB
+- default KGDB_TTYS1
+-
+-config KGDB_TTYS0
+- bool "ttyS0"
+-
+-config KGDB_TTYS1
+- bool "ttyS1"
+-
+-config KGDB_TTYS2
+- bool "ttyS2"
+-
+-config KGDB_TTYS3
+- bool "ttyS3"
+-
+-endchoice
+-
+-config KGDB_CONSOLE
+- bool "Enable serial console thru kgdb port"
+- depends on KGDB && 8xx || CPM2
+- help
+- If you enable this, all serial console messages will be sent
+- over the gdb stub.
+- If unsure, say N.
+-
+ config XMON
+ bool "Include xmon kernel debugger"
+ depends on DEBUG_KERNEL
+diff -Nurb linux-2.6.22-570/arch/ppc/amiga/config.c linux-2.6.22-591/arch/ppc/amiga/config.c
+--- linux-2.6.22-570/arch/ppc/amiga/config.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/amiga/config.c 2007-12-21 15:36:11.000000000 -0500
+@@ -753,17 +753,11 @@
+ void amiga_serial_console_write(struct console *co, const char *s,
+ unsigned int count)
+ {
+-#if 0 /* def CONFIG_KGDB */
+- /* FIXME:APUS GDB doesn't seem to like O-packages before it is
+- properly connected with the target. */
+- __gdb_output_string (s, count);
+-#else
+ while (count--) {
+ if (*s == '\n')
+ amiga_serial_putc('\r');
+ amiga_serial_putc(*s++);
+ }
+-#endif
+ }
+
+ #ifdef CONFIG_SERIAL_CONSOLE
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/Makefile linux-2.6.22-591/arch/ppc/kernel/Makefile
+--- linux-2.6.22-570/arch/ppc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -14,7 +14,7 @@
+ obj-$(CONFIG_MODULES) += ppc_ksyms.o
+ obj-$(CONFIG_PCI) += pci.o
+ obj-$(CONFIG_RAPIDIO) += rio.o
+-obj-$(CONFIG_KGDB) += ppc-stub.o
++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o
+ obj-$(CONFIG_SMP) += smp.o smp-tbsync.o
+ obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb.c linux-2.6.22-591/arch/ppc/kernel/kgdb.c
+--- linux-2.6.22-570/arch/ppc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/ppc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,348 @@
++/*
++ * arch/ppc/kernel/kgdb.c
++ *
++ * PowerPC backend to the KGDB stub.
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
++ * Copyright (C) 2003 Timesys Corporation.
++ * Copyright (C) 2004, 2006 MontaVista Software, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/smp.h>
++#include <linux/signal.h>
++#include <linux/ptrace.h>
++#include <asm/current.h>
++#include <asm/ptrace.h>
++#include <asm/processor.h>
++#include <asm/machdep.h>
++
++/*
++ * This table contains the mapping between PowerPC hardware trap types, and
++ * signals, which are primarily what GDB understands. GDB and the kernel
++ * don't always agree on values, so we use constants taken from gdb-6.2.
++ */
++static struct hard_trap_info
++{
++ unsigned int tt; /* Trap type code for powerpc */
++ unsigned char signo; /* Signal that we map this trap into */
++} hard_trap_info[] = {
++ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */
++ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */
++ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */
++ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */
++ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */
++ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */
++ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */
++ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
++ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
++ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
++#if defined(CONFIG_FSL_BOOKE)
++ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
++ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
++ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
++ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */
++ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */
++ { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */
++ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
++ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
++ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
++#else
++ { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */
++ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
++ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
++ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
++ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
++#endif
++#else
++ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
++#if defined(CONFIG_8xx)
++ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
++#else
++ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
++ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
++ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
++ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */
++ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */
++ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */
++ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */
++#endif
++#endif
++ { 0x0000, 0x00 } /* Must be last */
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int computeSignal(unsigned int tt)
++{
++ struct hard_trap_info *ht;
++
++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++ if (ht->tt == tt)
++ return ht->signo;
++
++ return SIGHUP; /* default for things we don't know about */
++}
++
++/* KGDB functions to use existing PowerPC hooks. */
++static void kgdb_debugger(struct pt_regs *regs)
++{
++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++}
++
++static int kgdb_breakpoint(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
++ regs->nip += 4;
++
++ return 1;
++}
++
++static int kgdb_singlestep(struct pt_regs *regs)
++{
++ struct thread_info *thread_info, *exception_thread_info;
++
++ if (user_mode(regs))
++ return 0;
++ /*
++ * On Book E and perhaps other processsors, singlestep is handled on
++ * the critical exception stack. This causes current_thread_info()
++ * to fail, since it it locates the thread_info by masking off
++ * the low bits of the current stack pointer. We work around
++ * this issue by copying the thread_info from the kernel stack
++ * before calling kgdb_handle_exception, and copying it back
++ * afterwards. On most processors the copy is avoided since
++ * exception_thread_info == thread_info.
++ */
++ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
++ exception_thread_info = current_thread_info();
++
++ if (thread_info != exception_thread_info)
++ memcpy(exception_thread_info, thread_info, sizeof *thread_info);
++
++ kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++ if (thread_info != exception_thread_info)
++ memcpy(thread_info, exception_thread_info, sizeof *thread_info);
++
++ return 1;
++}
++
++int kgdb_iabr_match(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++ return 1;
++}
++
++int kgdb_dabr_match(struct pt_regs *regs)
++{
++ if (user_mode(regs))
++ return 0;
++
++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++ return 1;
++}
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ unsigned long *ptr = gdb_regs;
++ int reg;
++
++ memset(gdb_regs, 0, MAXREG * 4);
++
++ for (reg = 0; reg < 32; reg++)
++ *(ptr++) = regs->gpr[reg];
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ *(ptr++) = current->thread.evr[reg];
++#else
++ ptr += 32;
++#endif
++#else
++ ptr += 64;
++#endif
++
++ *(ptr++) = regs->nip;
++ *(ptr++) = regs->msr;
++ *(ptr++) = regs->ccr;
++ *(ptr++) = regs->link;
++ *(ptr++) = regs->ctr;
++ *(ptr++) = regs->xer;
++
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ *(ptr++) = current->thread.acc >> 32;
++ *(ptr++) = current->thread.acc & 0xffffffff;
++ *(ptr++) = current->thread.spefscr;
++#endif
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
++ STACK_FRAME_OVERHEAD);
++ unsigned long *ptr = gdb_regs;
++ int reg;
++
++ memset(gdb_regs, 0, MAXREG * 4);
++
++ /* Regs GPR0-2 */
++ for (reg = 0; reg < 3; reg++)
++ *(ptr++) = regs->gpr[reg];
++
++ /* Regs GPR3-13 are not saved */
++ ptr += 11;
++
++ /* Regs GPR14-31 */
++ for (reg = 14; reg < 32; reg++)
++ *(ptr++) = regs->gpr[reg];
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ *(ptr++) = p->thread.evr[reg];
++#else
++ ptr += 32;
++#endif
++#else
++ ptr += 64;
++#endif
++
++ *(ptr++) = regs->nip;
++ *(ptr++) = regs->msr;
++ *(ptr++) = regs->ccr;
++ *(ptr++) = regs->link;
++ *(ptr++) = regs->ctr;
++ *(ptr++) = regs->xer;
++
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ *(ptr++) = p->thread.acc >> 32;
++ *(ptr++) = p->thread.acc & 0xffffffff;
++ *(ptr++) = p->thread.spefscr;
++#endif
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ unsigned long *ptr = gdb_regs;
++ int reg;
++#ifdef CONFIG_SPE
++ union {
++ u32 v32[2];
++ u64 v64;
++ } acc;
++#endif
++
++ for (reg = 0; reg < 32; reg++)
++ regs->gpr[reg] = *(ptr++);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++ for (reg = 0; reg < 32; reg++)
++ current->thread.evr[reg] = *(ptr++);
++#else
++ ptr += 32;
++#endif
++#else
++ ptr += 64;
++#endif
++
++ regs->nip = *(ptr++);
++ regs->msr = *(ptr++);
++ regs->ccr = *(ptr++);
++ regs->link = *(ptr++);
++ regs->ctr = *(ptr++);
++ regs->xer = *(ptr++);
++
++#ifdef CONFIG_SPE
++ /* u64 acc */
++ acc.v32[0] = *(ptr++);
++ acc.v32[1] = *(ptr++);
++ current->thread.acc = acc.v64;
++ current->thread.spefscr = *(ptr++);
++#endif
++}
++
++/*
++ * This function does PowerPC specific processing for interfacing to gdb.
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++ char *remcom_in_buffer, char *remcom_out_buffer,
++ struct pt_regs *linux_regs)
++{
++ char *ptr = &remcom_in_buffer[1];
++ unsigned long addr;
++
++ switch (remcom_in_buffer[0])
++ {
++ /*
++ * sAA..AA Step one instruction from AA..AA
++ * This will return an error to gdb ..
++ */
++ case 's':
++ case 'c':
++ /* handle the optional parameter */
++ if (kgdb_hex2long (&ptr, &addr))
++ linux_regs->nip = addr;
++
++ atomic_set(&cpu_doing_single_step, -1);
++ /* set the trace bit if we're stepping */
++ if (remcom_in_buffer[0] == 's') {
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++ mtspr(SPRN_DBCR0,
++ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
++ linux_regs->msr |= MSR_DE;
++#else
++ linux_regs->msr |= MSR_SE;
++#endif
++ debugger_step = 1;
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step,
++ smp_processor_id());
++ }
++ return 0;
++ }
++
++ return -1;
++}
++
++/*
++ * Global data
++ */
++struct kgdb_arch arch_kgdb_ops = {
++ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
++};
++
++int kgdb_arch_init(void)
++{
++ debugger = kgdb_debugger;
++ debugger_bpt = kgdb_breakpoint;
++ debugger_sstep = kgdb_singlestep;
++ debugger_iabr_match = kgdb_iabr_match;
++ debugger_dabr_match = kgdb_dabr_match;
++
++ return 0;
++}
++
++arch_initcall(kgdb_arch_init);
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S
+--- linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++ .text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++ mflr r0
++ stw r0,0(r3)
++ stw r1,4(r3)
++ stw r2,8(r3)
++ mfcr r0
++ stw r0,12(r3)
++ stmw r13,16(r3)
++ li r3,0
++ blr
++
++_GLOBAL(kgdb_fault_longjmp)
++ lmw r13,16(r3)
++ lwz r0,12(r3)
++ mtcrf 0x38,r0
++ lwz r0,0(r3)
++ lwz r1,4(r3)
++ lwz r2,8(r3)
++ mtlr r0
++ mr r3,r1
++ blr
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/misc.S linux-2.6.22-591/arch/ppc/kernel/misc.S
+--- linux-2.6.22-570/arch/ppc/kernel/misc.S 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/ppc/kernel/misc.S 2007-12-21 15:36:11.000000000 -0500
+@@ -328,7 +328,7 @@
+ mtspr SPRN_L1CSR0,r3
+ isync
+ blr
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+ mfspr r3,SPRN_L1CSR1
+ ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
+ mtspr SPRN_L1CSR1,r3
+@@ -355,7 +355,7 @@
+ _GLOBAL(__flush_icache_range)
+ BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ li r5,L1_CACHE_BYTES-1
+ andc r3,r3,r5
+ subf r4,r3,r4
+@@ -472,7 +472,7 @@
+ _GLOBAL(__flush_dcache_icache)
+ BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ rlwinm r3,r3,0,0,19 /* Get page base address */
+ li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */
+ mtctr r4
+@@ -500,7 +500,7 @@
+ _GLOBAL(__flush_dcache_icache_phys)
+ BEGIN_FTR_SECTION
+ blr /* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ mfmsr r10
+ rlwinm r0,r10,0,28,26 /* clear DR */
+ mtmsr r0
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c
+--- linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,866 +0,0 @@
+-/*
+- * ppc-stub.c: KGDB support for the Linux kernel.
+- *
+- * adapted from arch/sparc/kernel/sparc-stub.c for the PowerPC
+- * some stuff borrowed from Paul Mackerras' xmon
+- * Copyright (C) 1998 Michael AK Tesch (tesch@cs.wisc.edu)
+- *
+- * Modifications to run under Linux
+- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+- *
+- * This file originally came from the gdb sources, and the
+- * copyright notices have been retained below.
+- */
+-
+-/****************************************************************************
+-
+- THIS SOFTWARE IS NOT COPYRIGHTED
+-
+- HP offers the following for use in the public domain. HP makes no
+- warranty with regard to the software or its performance and the
+- user accepts the software "AS IS" with all faults.
+-
+- HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD
+- TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+-
+-****************************************************************************/
+-
+-/****************************************************************************
+- * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $
+- *
+- * Module name: remcom.c $
+- * Revision: 1.34 $
+- * Date: 91/03/09 12:29:49 $
+- * Contributor: Lake Stevens Instrument Division$
+- *
+- * Description: low level support for gdb debugger. $
+- *
+- * Considerations: only works on target hardware $
+- *
+- * Written by: Glenn Engel $
+- * ModuleState: Experimental $
+- *
+- * NOTES: See Below $
+- *
+- * Modified for SPARC by Stu Grossman, Cygnus Support.
+- *
+- * This code has been extensively tested on the Fujitsu SPARClite demo board.
+- *
+- * To enable debugger support, two things need to happen. One, a
+- * call to set_debug_traps() is necessary in order to allow any breakpoints
+- * or error conditions to be properly intercepted and reported to gdb.
+- * Two, a breakpoint needs to be generated to begin communication. This
+- * is most easily accomplished by a call to breakpoint(). Breakpoint()
+- * simulates a breakpoint by executing a trap #1.
+- *
+- *************
+- *
+- * The following gdb commands are supported:
+- *
+- * command function Return value
+- *
+- * g return the value of the CPU registers hex data or ENN
+- * G set the value of the CPU registers OK or ENN
+- * qOffsets Get section offsets. Reply is Text=xxx;Data=yyy;Bss=zzz
+- *
+- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN
+- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN
+- *
+- * c Resume at current address SNN ( signal NN)
+- * cAA..AA Continue at address AA..AA SNN
+- *
+- * s Step one instruction SNN
+- * sAA..AA Step one instruction from AA..AA SNN
+- *
+- * k kill
+- *
+- * ? What was the last sigval ? SNN (signal NN)
+- *
+- * bBB..BB Set baud rate to BB..BB OK or BNN, then sets
+- * baud rate
+- *
+- * All commands and responses are sent with a packet which includes a
+- * checksum. A packet consists of
+- *
+- * $<packet info>#<checksum>.
+- *
+- * where
+- * <packet info> :: <characters representing the command or response>
+- * <checksum> :: <two hex digits computed as modulo 256 sum of <packetinfo>>
+- *
+- * When a packet is received, it is first acknowledged with either '+' or '-'.
+- * '+' indicates a successful transfer. '-' indicates a failed transfer.
+- *
+- * Example:
+- *
+- * Host: Reply:
+- * $m0,10#2a +$00010203040506070809101112131415#42
+- *
+- ****************************************************************************/
+-
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/mm.h>
+-#include <linux/smp.h>
+-#include <linux/smp_lock.h>
+-#include <linux/init.h>
+-#include <linux/sysrq.h>
+-
+-#include <asm/cacheflush.h>
+-#include <asm/system.h>
+-#include <asm/signal.h>
+-#include <asm/kgdb.h>
+-#include <asm/pgtable.h>
+-#include <asm/ptrace.h>
+-
+-void breakinst(void);
+-
+-/*
+- * BUFMAX defines the maximum number of characters in inbound/outbound buffers
+- * at least NUMREGBYTES*2 are needed for register packets
+- */
+-#define BUFMAX 2048
+-static char remcomInBuffer[BUFMAX];
+-static char remcomOutBuffer[BUFMAX];
+-
+-static int initialized;
+-static int kgdb_active;
+-static int kgdb_started;
+-static u_int fault_jmp_buf[100];
+-static int kdebug;
+-
+-
+-static const char hexchars[]="0123456789abcdef";
+-
+-/* Place where we save old trap entries for restoration - sparc*/
+-/* struct tt_entry kgdb_savettable[256]; */
+-/* typedef void (*trapfunc_t)(void); */
+-
+-static void kgdb_fault_handler(struct pt_regs *regs);
+-static int handle_exception (struct pt_regs *regs);
+-
+-#if 0
+-/* Install an exception handler for kgdb */
+-static void exceptionHandler(int tnum, unsigned int *tfunc)
+-{
+- /* We are dorking with a live trap table, all irqs off */
+-}
+-#endif
+-
+-int
+-kgdb_setjmp(long *buf)
+-{
+- asm ("mflr 0; stw 0,0(%0);"
+- "stw 1,4(%0); stw 2,8(%0);"
+- "mfcr 0; stw 0,12(%0);"
+- "stmw 13,16(%0)"
+- : : "r" (buf));
+- /* XXX should save fp regs as well */
+- return 0;
+-}
+-void
+-kgdb_longjmp(long *buf, int val)
+-{
+- if (val == 0)
+- val = 1;
+- asm ("lmw 13,16(%0);"
+- "lwz 0,12(%0); mtcrf 0x38,0;"
+- "lwz 0,0(%0); lwz 1,4(%0); lwz 2,8(%0);"
+- "mtlr 0; mr 3,%1"
+- : : "r" (buf), "r" (val));
+-}
+-/* Convert ch from a hex digit to an int */
+-static int
+-hex(unsigned char ch)
+-{
+- if (ch >= 'a' && ch <= 'f')
+- return ch-'a'+10;
+- if (ch >= '0' && ch <= '9')
+- return ch-'0';
+- if (ch >= 'A' && ch <= 'F')
+- return ch-'A'+10;
+- return -1;
+-}
+-
+-/* Convert the memory pointed to by mem into hex, placing result in buf.
+- * Return a pointer to the last char put in buf (null), in case of mem fault,
+- * return 0.
+- */
+-static unsigned char *
+-mem2hex(const char *mem, char *buf, int count)
+-{
+- unsigned char ch;
+- unsigned short tmp_s;
+- unsigned long tmp_l;
+-
+- if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+- debugger_fault_handler = kgdb_fault_handler;
+-
+- /* Accessing 16 bit and 32 bit objects in a single
+- ** load instruction is required to avoid bad side
+- ** effects for some IO registers.
+- */
+-
+- if ((count == 2) && (((long)mem & 1) == 0)) {
+- tmp_s = *(unsigned short *)mem;
+- mem += 2;
+- *buf++ = hexchars[(tmp_s >> 12) & 0xf];
+- *buf++ = hexchars[(tmp_s >> 8) & 0xf];
+- *buf++ = hexchars[(tmp_s >> 4) & 0xf];
+- *buf++ = hexchars[tmp_s & 0xf];
+-
+- } else if ((count == 4) && (((long)mem & 3) == 0)) {
+- tmp_l = *(unsigned int *)mem;
+- mem += 4;
+- *buf++ = hexchars[(tmp_l >> 28) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 24) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 20) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 16) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 12) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 8) & 0xf];
+- *buf++ = hexchars[(tmp_l >> 4) & 0xf];
+- *buf++ = hexchars[tmp_l & 0xf];
+-
+- } else {
+- while (count-- > 0) {
+- ch = *mem++;
+- *buf++ = hexchars[ch >> 4];
+- *buf++ = hexchars[ch & 0xf];
+- }
+- }
+-
+- } else {
+- /* error condition */
+- }
+- debugger_fault_handler = NULL;
+- *buf = 0;
+- return buf;
+-}
+-
+-/* convert the hex array pointed to by buf into binary to be placed in mem
+- * return a pointer to the character AFTER the last byte written.
+-*/
+-static char *
+-hex2mem(char *buf, char *mem, int count)
+-{
+- unsigned char ch;
+- int i;
+- char *orig_mem;
+- unsigned short tmp_s;
+- unsigned long tmp_l;
+-
+- orig_mem = mem;
+-
+- if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+- debugger_fault_handler = kgdb_fault_handler;
+-
+- /* Accessing 16 bit and 32 bit objects in a single
+- ** store instruction is required to avoid bad side
+- ** effects for some IO registers.
+- */
+-
+- if ((count == 2) && (((long)mem & 1) == 0)) {
+- tmp_s = hex(*buf++) << 12;
+- tmp_s |= hex(*buf++) << 8;
+- tmp_s |= hex(*buf++) << 4;
+- tmp_s |= hex(*buf++);
+-
+- *(unsigned short *)mem = tmp_s;
+- mem += 2;
+-
+- } else if ((count == 4) && (((long)mem & 3) == 0)) {
+- tmp_l = hex(*buf++) << 28;
+- tmp_l |= hex(*buf++) << 24;
+- tmp_l |= hex(*buf++) << 20;
+- tmp_l |= hex(*buf++) << 16;
+- tmp_l |= hex(*buf++) << 12;
+- tmp_l |= hex(*buf++) << 8;
+- tmp_l |= hex(*buf++) << 4;
+- tmp_l |= hex(*buf++);
+-
+- *(unsigned long *)mem = tmp_l;
+- mem += 4;
+-
+- } else {
+- for (i=0; i<count; i++) {
+- ch = hex(*buf++) << 4;
+- ch |= hex(*buf++);
+- *mem++ = ch;
+- }
+- }
+-
+-
+- /*
+- ** Flush the data cache, invalidate the instruction cache.
+- */
+- flush_icache_range((int)orig_mem, (int)orig_mem + count - 1);
+-
+- } else {
+- /* error condition */
+- }
+- debugger_fault_handler = NULL;
+- return mem;
+-}
+-
+-/*
+- * While we find nice hex chars, build an int.
+- * Return number of chars processed.
+- */
+-static int
+-hexToInt(char **ptr, int *intValue)
+-{
+- int numChars = 0;
+- int hexValue;
+-
+- *intValue = 0;
+-
+- if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+- debugger_fault_handler = kgdb_fault_handler;
+- while (**ptr) {
+- hexValue = hex(**ptr);
+- if (hexValue < 0)
+- break;
+-
+- *intValue = (*intValue << 4) | hexValue;
+- numChars ++;
+-
+- (*ptr)++;
+- }
+- } else {
+- /* error condition */
+- }
+- debugger_fault_handler = NULL;
+-
+- return (numChars);
+-}
+-
+-/* scan for the sequence $<data>#<checksum> */
+-static void
+-getpacket(char *buffer)
+-{
+- unsigned char checksum;
+- unsigned char xmitcsum;
+- int i;
+- int count;
+- unsigned char ch;
+-
+- do {
+- /* wait around for the start character, ignore all other
+- * characters */
+- while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+-
+- checksum = 0;
+- xmitcsum = -1;
+-
+- count = 0;
+-
+- /* now, read until a # or end of buffer is found */
+- while (count < BUFMAX) {
+- ch = getDebugChar() & 0x7f;
+- if (ch == '#')
+- break;
+- checksum = checksum + ch;
+- buffer[count] = ch;
+- count = count + 1;
+- }
+-
+- if (count >= BUFMAX)
+- continue;
+-
+- buffer[count] = 0;
+-
+- if (ch == '#') {
+- xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+- xmitcsum |= hex(getDebugChar() & 0x7f);
+- if (checksum != xmitcsum)
+- putDebugChar('-'); /* failed checksum */
+- else {
+- putDebugChar('+'); /* successful transfer */
+- /* if a sequence char is present, reply the ID */
+- if (buffer[2] == ':') {
+- putDebugChar(buffer[0]);
+- putDebugChar(buffer[1]);
+- /* remove sequence chars from buffer */
+- count = strlen(buffer);
+- for (i=3; i <= count; i++)
+- buffer[i-3] = buffer[i];
+- }
+- }
+- }
+- } while (checksum != xmitcsum);
+-}
+-
+-/* send the packet in buffer. */
+-static void putpacket(unsigned char *buffer)
+-{
+- unsigned char checksum;
+- int count;
+- unsigned char ch, recv;
+-
+- /* $<packet info>#<checksum>. */
+- do {
+- putDebugChar('$');
+- checksum = 0;
+- count = 0;
+-
+- while ((ch = buffer[count])) {
+- putDebugChar(ch);
+- checksum += ch;
+- count += 1;
+- }
+-
+- putDebugChar('#');
+- putDebugChar(hexchars[checksum >> 4]);
+- putDebugChar(hexchars[checksum & 0xf]);
+- recv = getDebugChar();
+- } while ((recv & 0x7f) != '+');
+-}
+-
+-static void kgdb_flush_cache_all(void)
+-{
+- flush_instruction_cache();
+-}
+-
+-/* Set up exception handlers for tracing and breakpoints
+- * [could be called kgdb_init()]
+- */
+-void set_debug_traps(void)
+-{
+-#if 0
+- unsigned char c;
+-
+- save_and_cli(flags);
+-
+- /* In case GDB is started before us, ack any packets (presumably
+- * "$?#xx") sitting there.
+- *
+- * I've found this code causes more problems than it solves,
+- * so that's why it's commented out. GDB seems to work fine
+- * now starting either before or after the kernel -bwb
+- */
+-
+- while((c = getDebugChar()) != '$');
+- while((c = getDebugChar()) != '#');
+- c = getDebugChar(); /* eat first csum byte */
+- c = getDebugChar(); /* eat second csum byte */
+- putDebugChar('+'); /* ack it */
+-#endif
+- debugger = kgdb;
+- debugger_bpt = kgdb_bpt;
+- debugger_sstep = kgdb_sstep;
+- debugger_iabr_match = kgdb_iabr_match;
+- debugger_dabr_match = kgdb_dabr_match;
+-
+- initialized = 1;
+-}
+-
+-static void kgdb_fault_handler(struct pt_regs *regs)
+-{
+- kgdb_longjmp((long*)fault_jmp_buf, 1);
+-}
+-
+-int kgdb_bpt(struct pt_regs *regs)
+-{
+- return handle_exception(regs);
+-}
+-
+-int kgdb_sstep(struct pt_regs *regs)
+-{
+- return handle_exception(regs);
+-}
+-
+-void kgdb(struct pt_regs *regs)
+-{
+- handle_exception(regs);
+-}
+-
+-int kgdb_iabr_match(struct pt_regs *regs)
+-{
+- printk(KERN_ERR "kgdb doesn't support iabr, what?!?\n");
+- return handle_exception(regs);
+-}
+-
+-int kgdb_dabr_match(struct pt_regs *regs)
+-{
+- printk(KERN_ERR "kgdb doesn't support dabr, what?!?\n");
+- return handle_exception(regs);
+-}
+-
+-/* Convert the hardware trap type code to a unix signal number. */
+-/*
+- * This table contains the mapping between PowerPC hardware trap types, and
+- * signals, which are primarily what GDB understands.
+- */
+-static struct hard_trap_info
+-{
+- unsigned int tt; /* Trap type code for powerpc */
+- unsigned char signo; /* Signal that we map this trap into */
+-} hard_trap_info[] = {
+-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+- { 0x100, SIGINT }, /* critical input interrupt */
+- { 0x200, SIGSEGV }, /* machine check */
+- { 0x300, SIGSEGV }, /* data storage */
+- { 0x400, SIGBUS }, /* instruction storage */
+- { 0x500, SIGINT }, /* interrupt */
+- { 0x600, SIGBUS }, /* alignment */
+- { 0x700, SIGILL }, /* program */
+- { 0x800, SIGILL }, /* reserved */
+- { 0x900, SIGILL }, /* reserved */
+- { 0xa00, SIGILL }, /* reserved */
+- { 0xb00, SIGILL }, /* reserved */
+- { 0xc00, SIGCHLD }, /* syscall */
+- { 0xd00, SIGILL }, /* reserved */
+- { 0xe00, SIGILL }, /* reserved */
+- { 0xf00, SIGILL }, /* reserved */
+- /*
+- ** 0x1000 PIT
+- ** 0x1010 FIT
+- ** 0x1020 watchdog
+- ** 0x1100 data TLB miss
+- ** 0x1200 instruction TLB miss
+- */
+- { 0x2002, SIGTRAP}, /* debug */
+-#else
+- { 0x200, SIGSEGV }, /* machine check */
+- { 0x300, SIGSEGV }, /* address error (store) */
+- { 0x400, SIGBUS }, /* instruction bus error */
+- { 0x500, SIGINT }, /* interrupt */
+- { 0x600, SIGBUS }, /* alingment */
+- { 0x700, SIGTRAP }, /* breakpoint trap */
+- { 0x800, SIGFPE }, /* fpu unavail */
+- { 0x900, SIGALRM }, /* decrementer */
+- { 0xa00, SIGILL }, /* reserved */
+- { 0xb00, SIGILL }, /* reserved */
+- { 0xc00, SIGCHLD }, /* syscall */
+- { 0xd00, SIGTRAP }, /* single-step/watch */
+- { 0xe00, SIGFPE }, /* fp assist */
+-#endif
+- { 0, 0} /* Must be last */
+-
+-};
+-
+-static int computeSignal(unsigned int tt)
+-{
+- struct hard_trap_info *ht;
+-
+- for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+- if (ht->tt == tt)
+- return ht->signo;
+-
+- return SIGHUP; /* default for things we don't know about */
+-}
+-
+-#define PC_REGNUM 64
+-#define SP_REGNUM 1
+-
+-/*
+- * This function does all command processing for interfacing to gdb.
+- */
+-static int
+-handle_exception (struct pt_regs *regs)
+-{
+- int sigval;
+- int addr;
+- int length;
+- char *ptr;
+- unsigned int msr;
+-
+- /* We don't handle user-mode breakpoints. */
+- if (user_mode(regs))
+- return 0;
+-
+- if (debugger_fault_handler) {
+- debugger_fault_handler(regs);
+- panic("kgdb longjump failed!\n");
+- }
+- if (kgdb_active) {
+- printk(KERN_ERR "interrupt while in kgdb, returning\n");
+- return 0;
+- }
+-
+- kgdb_active = 1;
+- kgdb_started = 1;
+-
+-#ifdef KGDB_DEBUG
+- printk("kgdb: entering handle_exception; trap [0x%x]\n",
+- (unsigned int)regs->trap);
+-#endif
+-
+- kgdb_interruptible(0);
+- lock_kernel();
+- msr = mfmsr();
+- mtmsr(msr & ~MSR_EE); /* disable interrupts */
+-
+- if (regs->nip == (unsigned long)breakinst) {
+- /* Skip over breakpoint trap insn */
+- regs->nip += 4;
+- }
+-
+- /* reply to host that an exception has occurred */
+- sigval = computeSignal(regs->trap);
+- ptr = remcomOutBuffer;
+-
+- *ptr++ = 'T';
+- *ptr++ = hexchars[sigval >> 4];
+- *ptr++ = hexchars[sigval & 0xf];
+- *ptr++ = hexchars[PC_REGNUM >> 4];
+- *ptr++ = hexchars[PC_REGNUM & 0xf];
+- *ptr++ = ':';
+- ptr = mem2hex((char *)®s->nip, ptr, 4);
+- *ptr++ = ';';
+- *ptr++ = hexchars[SP_REGNUM >> 4];
+- *ptr++ = hexchars[SP_REGNUM & 0xf];
+- *ptr++ = ':';
+- ptr = mem2hex(((char *)regs) + SP_REGNUM*4, ptr, 4);
+- *ptr++ = ';';
+- *ptr++ = 0;
+-
+- putpacket(remcomOutBuffer);
+- if (kdebug)
+- printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+-
+- /* XXX We may want to add some features dealing with poking the
+- * XXX page tables, ... (look at sparc-stub.c for more info)
+- * XXX also required hacking to the gdb sources directly...
+- */
+-
+- while (1) {
+- remcomOutBuffer[0] = 0;
+-
+- getpacket(remcomInBuffer);
+- switch (remcomInBuffer[0]) {
+- case '?': /* report most recent signal */
+- remcomOutBuffer[0] = 'S';
+- remcomOutBuffer[1] = hexchars[sigval >> 4];
+- remcomOutBuffer[2] = hexchars[sigval & 0xf];
+- remcomOutBuffer[3] = 0;
+- break;
+-#if 0
+- case 'q': /* this screws up gdb for some reason...*/
+- {
+- extern long _start, sdata, __bss_start;
+-
+- ptr = &remcomInBuffer[1];
+- if (strncmp(ptr, "Offsets", 7) != 0)
+- break;
+-
+- ptr = remcomOutBuffer;
+- sprintf(ptr, "Text=%8.8x;Data=%8.8x;Bss=%8.8x",
+- &_start, &sdata, &__bss_start);
+- break;
+- }
+-#endif
+- case 'd':
+- /* toggle debug flag */
+- kdebug ^= 1;
+- break;
+-
+- case 'g': /* return the value of the CPU registers.
+- * some of them are non-PowerPC names :(
+- * they are stored in gdb like:
+- * struct {
+- * u32 gpr[32];
+- * f64 fpr[32];
+- * u32 pc, ps, cnd, lr; (ps=msr)
+- * u32 cnt, xer, mq;
+- * }
+- */
+- {
+- int i;
+- ptr = remcomOutBuffer;
+- /* General Purpose Regs */
+- ptr = mem2hex((char *)regs, ptr, 32 * 4);
+- /* Floating Point Regs - FIXME */
+- /*ptr = mem2hex((char *), ptr, 32 * 8);*/
+- for(i=0; i<(32*8*2); i++) { /* 2chars/byte */
+- ptr[i] = '0';
+- }
+- ptr += 32*8*2;
+- /* pc, msr, cr, lr, ctr, xer, (mq is unused) */
+- ptr = mem2hex((char *)®s->nip, ptr, 4);
+- ptr = mem2hex((char *)®s->msr, ptr, 4);
+- ptr = mem2hex((char *)®s->ccr, ptr, 4);
+- ptr = mem2hex((char *)®s->link, ptr, 4);
+- ptr = mem2hex((char *)®s->ctr, ptr, 4);
+- ptr = mem2hex((char *)®s->xer, ptr, 4);
+- }
+- break;
+-
+- case 'G': /* set the value of the CPU registers */
+- {
+- ptr = &remcomInBuffer[1];
+-
+- /*
+- * If the stack pointer has moved, you should pray.
+- * (cause only god can help you).
+- */
+-
+- /* General Purpose Regs */
+- hex2mem(ptr, (char *)regs, 32 * 4);
+-
+- /* Floating Point Regs - FIXME?? */
+- /*ptr = hex2mem(ptr, ??, 32 * 8);*/
+- ptr += 32*8*2;
+-
+- /* pc, msr, cr, lr, ctr, xer, (mq is unused) */
+- ptr = hex2mem(ptr, (char *)®s->nip, 4);
+- ptr = hex2mem(ptr, (char *)®s->msr, 4);
+- ptr = hex2mem(ptr, (char *)®s->ccr, 4);
+- ptr = hex2mem(ptr, (char *)®s->link, 4);
+- ptr = hex2mem(ptr, (char *)®s->ctr, 4);
+- ptr = hex2mem(ptr, (char *)®s->xer, 4);
+-
+- strcpy(remcomOutBuffer,"OK");
+- }
+- break;
+- case 'H':
+- /* don't do anything, yet, just acknowledge */
+- hexToInt(&ptr, &addr);
+- strcpy(remcomOutBuffer,"OK");
+- break;
+-
+- case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
+- /* Try to read %x,%x. */
+-
+- ptr = &remcomInBuffer[1];
+-
+- if (hexToInt(&ptr, &addr) && *ptr++ == ','
+- && hexToInt(&ptr, &length)) {
+- if (mem2hex((char *)addr, remcomOutBuffer,
+- length))
+- break;
+- strcpy(remcomOutBuffer, "E03");
+- } else
+- strcpy(remcomOutBuffer, "E01");
+- break;
+-
+- case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
+- /* Try to read '%x,%x:'. */
+-
+- ptr = &remcomInBuffer[1];
+-
+- if (hexToInt(&ptr, &addr) && *ptr++ == ','
+- && hexToInt(&ptr, &length)
+- && *ptr++ == ':') {
+- if (hex2mem(ptr, (char *)addr, length))
+- strcpy(remcomOutBuffer, "OK");
+- else
+- strcpy(remcomOutBuffer, "E03");
+- flush_icache_range(addr, addr+length);
+- } else
+- strcpy(remcomOutBuffer, "E02");
+- break;
+-
+-
+- case 'k': /* kill the program, actually just continue */
+- case 'c': /* cAA..AA Continue; address AA..AA optional */
+- /* try to read optional parameter, pc unchanged if no parm */
+-
+- ptr = &remcomInBuffer[1];
+- if (hexToInt(&ptr, &addr))
+- regs->nip = addr;
+-
+-/* Need to flush the instruction cache here, as we may have deposited a
+- * breakpoint, and the icache probably has no way of knowing that a data ref to
+- * some location may have changed something that is in the instruction cache.
+- */
+- kgdb_flush_cache_all();
+- mtmsr(msr);
+-
+- kgdb_interruptible(1);
+- unlock_kernel();
+- kgdb_active = 0;
+- if (kdebug) {
+- printk("remcomInBuffer: %s\n", remcomInBuffer);
+- printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+- }
+- return 1;
+-
+- case 's':
+- kgdb_flush_cache_all();
+-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+- mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC);
+- regs->msr |= MSR_DE;
+-#else
+- regs->msr |= MSR_SE;
+-#endif
+- unlock_kernel();
+- kgdb_active = 0;
+- if (kdebug) {
+- printk("remcomInBuffer: %s\n", remcomInBuffer);
+- printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+- }
+- return 1;
+-
+- case 'r': /* Reset (if user process..exit ???)*/
+- panic("kgdb reset.");
+- break;
+- } /* switch */
+- if (remcomOutBuffer[0] && kdebug) {
+- printk("remcomInBuffer: %s\n", remcomInBuffer);
+- printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+- }
+- /* reply to the request */
+- putpacket(remcomOutBuffer);
+- } /* while(1) */
+-}
+-
+-/* This function will generate a breakpoint exception. It is used at the
+- beginning of a program to sync up with a debugger and can be used
+- otherwise as a quick means to stop program execution and "break" into
+- the debugger. */
+-
+-void
+-breakpoint(void)
+-{
+- if (!initialized) {
+- printk("breakpoint() called b4 kgdb init\n");
+- return;
+- }
+-
+- asm(" .globl breakinst \n\
+- breakinst: .long 0x7d821008");
+-}
+-
+-#ifdef CONFIG_KGDB_CONSOLE
+-/* Output string in GDB O-packet format if GDB has connected. If nothing
+- output, returns 0 (caller must then handle output). */
+-int
+-kgdb_output_string (const char* s, unsigned int count)
+-{
+- char buffer[512];
+-
+- if (!kgdb_started)
+- return 0;
+-
+- count = (count <= (sizeof(buffer) / 2 - 2))
+- ? count : (sizeof(buffer) / 2 - 2);
+-
+- buffer[0] = 'O';
+- mem2hex (s, &buffer[1], count);
+- putpacket(buffer);
+-
+- return 1;
+-}
+-#endif
+-
+-static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs,
+- struct tty_struct *tty)
+-{
+- printk("Entering GDB stub\n");
+- breakpoint();
+-}
+-static struct sysrq_key_op sysrq_gdb_op = {
+- .handler = sysrq_handle_gdb,
+- .help_msg = "Gdb",
+- .action_msg = "GDB",
+-};
+-
+-static int gdb_register_sysrq(void)
+-{
+- printk("Registering GDB sysrq handler\n");
+- register_sysrq_key('g', &sysrq_gdb_op);
+- return 0;
+-}
+-module_init(gdb_register_sysrq);
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/setup.c linux-2.6.22-591/arch/ppc/kernel/setup.c
+--- linux-2.6.22-570/arch/ppc/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -48,10 +48,6 @@
+ #include <asm/ppc_sys.h>
+ #endif
+
+-#if defined CONFIG_KGDB
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void platform_init(unsigned long r3, unsigned long r4,
+ unsigned long r5, unsigned long r6, unsigned long r7);
+ extern void reloc_got2(unsigned long offset);
+@@ -509,24 +505,12 @@
+ #endif /* CONFIG_XMON */
+ if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+
+-#if defined(CONFIG_KGDB)
+- if (ppc_md.kgdb_map_scc)
+- ppc_md.kgdb_map_scc();
+- set_debug_traps();
+- if (strstr(cmd_line, "gdb")) {
+- if (ppc_md.progress)
+- ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
+- printk("kgdb breakpoint activated\n");
+- breakpoint();
+- }
+-#endif
+-
+ /*
+ * Set cache line size based on type of cpu as a default.
+ * Systems with OF can look in the properties on the cpu node(s)
+ * for a possibly more accurate value.
+ */
+- if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
++ if (! cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) {
+ dcache_bsize = cur_cpu_spec->dcache_bsize;
+ icache_bsize = cur_cpu_spec->icache_bsize;
+ ucache_bsize = 0;
+diff -Nurb linux-2.6.22-570/arch/ppc/mm/fault.c linux-2.6.22-591/arch/ppc/mm/fault.c
+--- linux-2.6.22-570/arch/ppc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/ppc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/highmem.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -330,6 +331,14 @@
+ return;
+ }
+
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault) {
++ /* Restore our previous state. */
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Not reached. */
++ }
++#endif
++
+ /* kernel has accessed a bad area */
+ #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
+ if (debugger_kernel_faults)
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+ #include <linux/ethtool.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -337,10 +338,13 @@
+ printk("Early serial init of port 0 failed\n");
+ }
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(0, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
+ port.membase = ioremap64(PPC440EP_UART1_ADDR, 8);
+ port.irq = 1;
+@@ -351,10 +355,13 @@
+ printk("Early serial init of port 1 failed\n");
+ }
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &port);
++#endif
+
+ port.membase = ioremap64(PPC440EP_UART2_ADDR, 8);
+ port.irq = 3;
+@@ -365,10 +372,13 @@
+ printk("Early serial init of port 2 failed\n");
+ }
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(2, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(2, &port);
++#endif
+
+ port.membase = ioremap64(PPC440EP_UART3_ADDR, 8);
+ port.irq = 4;
+@@ -378,6 +388,10 @@
+ if (early_serial_setup(&port) != 0) {
+ printk("Early serial init of port 3 failed\n");
+ }
++
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(3, &port);
++#endif
+ }
+
+ static void __init
+@@ -435,8 +449,5 @@
+
+ ppc_md.nvram_read_val = todc_direct_read_val;
+ ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = bamboo_early_serial_map;
+-#endif
+ }
+
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c 2007-12-21 15:36:11.000000000 -0500
+@@ -4,7 +4,7 @@
+ * Author: SAW (IBM), derived from walnut.c.
+ * Maintained by MontaVista Software <source@mvista.com>
+ *
+- * 2003 (c) MontaVista Softare Inc. This file is licensed under the
++ * 2003-2004 (c) MontaVista Softare Inc. This file is licensed under the
+ * terms of the GNU General Public License version 2. This program is
+ * licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+@@ -21,6 +21,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pci-bridge.h>
+@@ -30,7 +31,6 @@
+ #include <asm/time.h>
+ #include <asm/io.h>
+ #include <asm/todc.h>
+-#include <asm/kgdb.h>
+ #include <asm/ocp.h>
+ #include <asm/ibm_ocp_pci.h>
+
+@@ -100,17 +100,26 @@
+ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ port.line = 0;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 0 failed\n");
+- }
++#endif
++
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
+ port.membase = (void*)ACTING_UART1_IO_BASE;
+ port.irq = ACTING_UART1_INT;
+ port.line = 1;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 1 failed\n");
+- }
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &port);
++#endif
+ }
+
+ void __init
+@@ -257,8 +266,4 @@
+ ppc_md.nvram_read_val = todc_direct_read_val;
+ ppc_md.nvram_write_val = todc_direct_write_val;
+ #endif
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = bubinga_early_serial_map;
+-#endif
+ }
+-
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c 2007-12-21 15:36:11.000000000 -0500
+@@ -32,6 +32,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -226,14 +227,20 @@
+ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ port.line = 0;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 0 failed\n");
+- }
++#endif
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(0, &port);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ /* Purge TLB entry added in head_44x.S for early serial access */
+ _tlbie(UART0_IO_BASE);
+ #endif
+@@ -243,14 +250,18 @@
+ port.uartclk = clocks.uart1;
+ port.line = 1;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 1)
+ printk("Early serial init of port 1 failed\n");
+- }
++#endif
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &port);
++#endif
+ }
+
+ static void __init
+@@ -327,8 +338,4 @@
+
+ ppc_md.nvram_read_val = todc_direct_read_val;
+ ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = ebony_early_serial_map;
+-#endif
+ }
+-
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -283,6 +284,9 @@
+ if (early_serial_setup(&port) != 0) {
+ printk("Early serial init of port 0 failed\n");
+ }
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
+ port.membase = ioremap64(PPC440SP_UART1_ADDR, 8);
+ port.irq = UART1_INT;
+@@ -292,6 +296,9 @@
+ if (early_serial_setup(&port) != 0) {
+ printk("Early serial init of port 1 failed\n");
+ }
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &port);
++#endif
+
+ port.membase = ioremap64(PPC440SP_UART2_ADDR, 8);
+ port.irq = UART2_INT;
+@@ -301,6 +308,9 @@
+ if (early_serial_setup(&port) != 0) {
+ printk("Early serial init of port 2 failed\n");
+ }
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(2, &port);
++#endif
+ }
+
+ static void __init
+@@ -360,7 +370,4 @@
+ ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */
+
+ ppc_md.calibrate_decr = luan_calibrate_decr;
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = luan_early_serial_map;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -249,14 +250,20 @@
+ port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ port.line = 0;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 0 failed\n");
+- }
++#endif
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(0, &port);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ /* Purge TLB entry added in head_44x.S for early serial access */
+ _tlbie(UART0_IO_BASE);
+ #endif
+@@ -266,14 +273,18 @@
+ port.uartclk = clocks.uart1;
+ port.line = 1;
+
+- if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++ if (early_serial_setup(&port) != 1)
+ printk("Early serial init of port 1 failed\n");
+- }
++#endif
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &port);
++#endif
+ }
+
+ static void __init
+@@ -343,8 +354,5 @@
+
+ ppc_md.nvram_read_val = todc_direct_read_val;
+ ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = ocotea_early_serial_map;
+-#endif
+ ppc_md.init = ocotea_init;
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c 2007-12-21 15:36:11.000000000 -0500
+@@ -310,7 +310,7 @@
+ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 0 failed\n");
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(0, &port);
+
+@@ -326,7 +326,7 @@
+ if (early_serial_setup(&port) != 0)
+ printk("Early serial init of port 1 failed\n");
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Configure debug serial access */
+ gen550_init(1, &port);
+ #endif
+@@ -387,9 +387,6 @@
+
+ ppc_md.calibrate_decr = taishan_calibrate_decr;
+
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = taishan_early_serial_map;
+-#endif
+ ppc_md.init = taishan_init;
+ }
+
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-12-21 15:36:11.000000000 -0500
+@@ -16,6 +16,8 @@
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
+ #include <linux/serialP.h>
++#include <linux/kgdb.h>
++
+ #include <asm/io.h>
+ #include <asm/machdep.h>
+
+@@ -41,9 +43,6 @@
+ * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c
+ * start_kernel init/main.c
+ * setup_arch arch/ppc/kernel/setup.c
+- * #if defined(CONFIG_KGDB)
+- * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc
+- * #endif
+ * *ppc_md.setup_arch == ml300_setup_arch this file
+ * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c
+ * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-12-21 15:36:11.000000000 -0500
+@@ -43,9 +43,6 @@
+ * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c
+ * start_kernel init/main.c
+ * setup_arch arch/ppc/kernel/setup.c
+- * #if defined(CONFIG_KGDB)
+- * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc
+- * #endif
+ * *ppc_md.setup_arch == ml403_setup_arch this file
+ * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c
+ * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c 2007-12-21 15:36:11.000000000 -0500
+@@ -386,7 +386,4 @@
+ ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */
+
+ ppc_md.calibrate_decr = yucca_calibrate_decr;
+-#ifdef CONFIG_KGDB
+- ppc_md.early_serial_map = yucca_early_serial_map;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c
+--- linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-12-21 15:36:11.000000000 -0500
+@@ -42,11 +42,11 @@
+ #include <asm/pci-bridge.h>
+ #include <asm/mpc83xx.h>
+ #include <asm/irq.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+
+ #include <syslib/ppc83xx_setup.h>
++#include <syslib/gen550.h>
+
+ #ifndef CONFIG_PCI
+ unsigned long isa_io_base = 0;
+@@ -114,7 +114,9 @@
+ /* setup PCI host bridges */
+ mpc83xx_setup_hose();
+ #endif
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ mpc83xx_early_serial_map();
++#endif
+
+ /* setup the board related info for the MDIO bus */
+ mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC83xx_MDIO);
+@@ -334,7 +336,6 @@
+ ppc_md.get_rtc_time = NULL;
+ ppc_md.calibrate_decr = mpc83xx_calibrate_decr;
+
+- ppc_md.early_serial_map = mpc83xx_early_serial_map;
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-12-21 15:36:11.000000000 -0500
+@@ -43,11 +43,11 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+
+ /* ************************************************************************
+ *
+@@ -77,7 +77,7 @@
+ mpc85xx_setup_hose();
+ #endif
+
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ mpc85xx_early_serial_map();
+ #endif
+
+@@ -215,9 +215,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+- ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+
+ if (ppc_md.progress)
+ ppc_md.progress("mpc8540ads_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-12-21 15:36:11.000000000 -0500
+@@ -44,7 +44,6 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <asm/cpm2.h>
+ #include <mm/mmu_decl.h>
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-12-21 15:36:11.000000000 -0500
+@@ -47,12 +47,12 @@
+ #include <asm/immap_85xx.h>
+ #include <asm/cpm2.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+
+ #include <mm/mmu_decl.h>
+ #include <syslib/cpm2_pic.h>
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+
+
+ #ifndef CONFIG_PCI
+@@ -436,7 +436,7 @@
+ mpc85xx_setup_hose();
+ #endif
+
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ mpc85xx_early_serial_map();
+ #endif
+
+@@ -590,9 +590,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+- ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+
+ if (ppc_md.progress)
+ ppc_md.progress("mpc85xx_cds_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c 2007-12-21 15:36:11.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/initrd.h>
+ #include <linux/module.h>
+ #include <linux/fsl_devices.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -43,14 +44,13 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+
+-#ifdef CONFIG_SERIAL_8250
+ static void __init
+ sbc8560_early_serial_map(void)
+ {
+@@ -66,12 +66,16 @@
+ uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART0_SIZE);
+ uart_req.type = PORT_16650;
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+- gen550_init(0, &uart_req);
+-#endif
+-
++#ifdef CONFIG_SERIAL_8250
+ if (early_serial_setup(&uart_req) != 0)
+ printk("Early serial init of port 0 failed\n");
++#endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++ gen550_init(0, &uart_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &uart_req);
++#endif
+
+ /* Assume early_serial_setup() doesn't modify uart_req */
+ uart_req.line = 1;
+@@ -79,14 +83,17 @@
+ uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART1_SIZE);
+ uart_req.irq = MPC85xx_IRQ_EXT10;
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+- gen550_init(1, &uart_req);
+-#endif
+-
++#ifdef CONFIG_SERIAL_8250
+ if (early_serial_setup(&uart_req) != 0)
+- printk("Early serial init of port 1 failed\n");
+-}
++ printk("Early serial init of port 0 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++ gen550_init(0, &uart_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &uart_req);
++#endif
++}
+
+ /* ************************************************************************
+ *
+@@ -115,9 +122,7 @@
+ /* setup PCI host bridges */
+ mpc85xx_setup_hose();
+ #endif
+-#ifdef CONFIG_SERIAL_8250
+ sbc8560_early_serial_map();
+-#endif
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ /* Invalidate the entry we stole earlier the serial ports
+ * should be properly mapped */
+@@ -224,9 +229,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+- ppc_md.early_serial_map = sbc8560_early_serial_map;
+-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+
+ if (ppc_md.progress)
+ ppc_md.progress("sbc8560_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c 2007-12-21 15:36:11.000000000 -0500
+@@ -46,7 +46,6 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <asm/cpm2.h>
+ #include <mm/mmu_decl.h>
+@@ -55,6 +54,7 @@
+ #include <syslib/cpm2_pic.h>
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_rio.h>
++#include <syslib/gen550.h>
+
+ #ifndef CONFIG_PCI
+ unsigned long isa_io_base = 0;
+@@ -121,7 +121,7 @@
+ #endif
+
+ #ifndef CONFIG_MPC8560
+-#if defined(CONFIG_SERIAL_8250)
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ mpc85xx_early_serial_map();
+ #endif
+
+@@ -400,9 +400,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+- ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+ #endif /* CONFIG_MPC8560 */
+
+ if (ppc_md.progress)
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/apus_setup.c linux-2.6.22-591/arch/ppc/platforms/apus_setup.c
+--- linux-2.6.22-570/arch/ppc/platforms/apus_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/apus_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -598,12 +598,6 @@
+ ciab.ddra |= (SER_DTR | SER_RTS); /* outputs */
+ ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR); /* inputs */
+
+-#ifdef CONFIG_KGDB
+- /* turn Rx interrupts on for GDB */
+- amiga_custom.intena = IF_SETCLR | IF_RBF;
+- ser_RTSon();
+-#endif
+-
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/chestnut.c linux-2.6.22-591/arch/ppc/platforms/chestnut.c
+--- linux-2.6.22-570/arch/ppc/platforms/chestnut.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/chestnut.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,9 +34,9 @@
+ #include <asm/io.h>
+ #include <asm/hw_irq.h>
+ #include <asm/machdep.h>
+-#include <asm/kgdb.h>
+ #include <asm/bootinfo.h>
+ #include <asm/mv64x60.h>
++#include <syslib/gen550.h>
+ #include <platforms/chestnut.h>
+
+ static void __iomem *sram_base; /* Virtual addr of Internal SRAM */
+@@ -492,7 +492,7 @@
+ static void __init
+ chestnut_map_io(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ io_block_mapping(CHESTNUT_UART_BASE, CHESTNUT_UART_BASE, 0x100000,
+ _PAGE_IO);
+ #endif
+@@ -566,9 +566,6 @@
+ #if defined(CONFIG_SERIAL_TEXT_DEBUG)
+ ppc_md.progress = gen550_progress;
+ #endif
+-#if defined(CONFIG_KGDB)
+- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+
+ if (ppc_md.progress)
+ ppc_md.progress("chestnut_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/ev64260.c linux-2.6.22-591/arch/ppc/platforms/ev64260.c
+--- linux-2.6.22-570/arch/ppc/platforms/ev64260.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/ev64260.c 2007-12-21 15:36:11.000000000 -0500
+@@ -330,7 +330,7 @@
+ port.iotype = UPIO_MEM;
+ port.flags = STD_COM_FLAGS;
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ gen550_init(0, &port);
+ #endif
+
+@@ -568,7 +568,7 @@
+ return;
+ }
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ static void __init
+ ev64260_map_io(void)
+ {
+@@ -624,20 +624,12 @@
+ ppc_md.setup_io_mappings = ev64260_map_io;
+ ppc_md.progress = gen550_progress;
+ #endif
+-#if defined(CONFIG_KGDB)
+- ppc_md.setup_io_mappings = ev64260_map_io;
+- ppc_md.early_serial_map = ev64260_early_serial_map;
+-#endif
+ #elif defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ ppc_md.setup_io_mappings = ev64260_map_io;
+ ppc_md.progress = mv64x60_mpsc_progress;
+ mv64x60_progress_init(CONFIG_MV64X60_NEW_BASE);
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+- ppc_md.setup_io_mappings = ev64260_map_io;
+- ppc_md.early_serial_map = ev64260_early_serial_map;
+-#endif /* CONFIG_KGDB */
+
+ #endif
+
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/hdpu.c linux-2.6.22-591/arch/ppc/platforms/hdpu.c
+--- linux-2.6.22-570/arch/ppc/platforms/hdpu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/hdpu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -281,25 +281,6 @@
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ static void __init hdpu_early_serial_map(void)
+ {
+-#ifdef CONFIG_KGDB
+- static char first_time = 1;
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#else
+-#error "Invalid kgdb_tty port"
+-#endif
+-
+- if (first_time) {
+- gt_early_mpsc_init(KGDB_PORT,
+- B9600 | CS8 | CREAD | HUPCL | CLOCAL);
+- first_time = 0;
+- }
+-
+- return;
+-#endif
+ }
+ #endif
+
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/lopec.c linux-2.6.22-591/arch/ppc/platforms/lopec.c
+--- linux-2.6.22-570/arch/ppc/platforms/lopec.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/lopec.c 2007-12-21 15:36:11.000000000 -0500
+@@ -32,7 +32,8 @@
+ #include <asm/mpc10x.h>
+ #include <asm/hw_irq.h>
+ #include <asm/prep_nvram.h>
+-#include <asm/kgdb.h>
++
++#include <syslib/gen550.h>
+
+ /*
+ * Define all of the IRQ senses and polarities. Taken from the
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/pplus.c linux-2.6.22-591/arch/ppc/platforms/pplus.c
+--- linux-2.6.22-570/arch/ppc/platforms/pplus.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/pplus.c 2007-12-21 15:36:11.000000000 -0500
+@@ -35,9 +35,9 @@
+ #include <asm/hawk.h>
+ #include <asm/todc.h>
+ #include <asm/bootinfo.h>
+-#include <asm/kgdb.h>
+ #include <asm/reg.h>
+
++#include <syslib/gen550.h>
+ #include "pplus.h"
+
+ #undef DUMP_DBATS
+@@ -893,9 +893,6 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ #ifdef CONFIG_SMP
+ smp_ops = &pplus_smp_ops;
+ #endif /* CONFIG_SMP */
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c
+--- linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c 2007-12-21 15:36:11.000000000 -0500
+@@ -84,7 +84,7 @@
+ * Serial port code
+ *****************************************************************************/
+
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ static void __init ppc7d_early_serial_map(void)
+ {
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+@@ -113,10 +113,10 @@
+ if (early_serial_setup(&serial_req) != 0)
+ printk(KERN_ERR "Early serial init of port 1 failed\n");
+ #else
+-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
+ #endif
+ }
+-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */
++#endif /* CONFIG_SERIAL_TEXT_DEBUG */
+
+ /*****************************************************************************
+ * Low-level board support code
+@@ -1459,18 +1459,16 @@
+ PPC7D_CPLD_COMS_COM4_TXEN, PPC7D_CPLD_COMS);
+ #endif /* CONFIG_SERIAL_MPSC */
+
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+- ppc7d_early_serial_map();
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
++ ppc7d_early_serial_map();
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ ppc_md.progress = mv64x60_mpsc_progress;
+ #elif defined(CONFIG_SERIAL_8250)
+ ppc_md.progress = gen550_progress;
+ #else
+-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
+ #endif /* CONFIG_SERIAL_8250 */
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */
+
+ /* Enable write access to user flash. This is necessary for
+ * flash probe.
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/sandpoint.c linux-2.6.22-591/arch/ppc/platforms/sandpoint.c
+--- linux-2.6.22-570/arch/ppc/platforms/sandpoint.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/sandpoint.c 2007-12-21 15:36:11.000000000 -0500
+@@ -95,9 +95,9 @@
+ #include <asm/bootinfo.h>
+ #include <asm/mpc10x.h>
+ #include <asm/pci-bridge.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+
++#include <syslib/gen550.h>
+ #include "sandpoint.h"
+
+ /* Set non-zero if an X2 Sandpoint detected. */
+@@ -730,9 +730,6 @@
+ ppc_md.nvram_read_val = todc_mc146818_read_val;
+ ppc_md.nvram_write_val = todc_mc146818_write_val;
+
+-#ifdef CONFIG_KGDB
+- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ ppc_md.progress = gen550_progress;
+ #endif
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/spruce.c linux-2.6.22-591/arch/ppc/platforms/spruce.c
+--- linux-2.6.22-570/arch/ppc/platforms/spruce.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/platforms/spruce.c 2007-12-21 15:36:11.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/serial.h>
+ #include <linux/tty.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -37,9 +38,9 @@
+ #include <asm/time.h>
+ #include <asm/todc.h>
+ #include <asm/bootinfo.h>
+-#include <asm/kgdb.h>
+
+ #include <syslib/cpc700.h>
++#include <syslib/gen550.h>
+
+ #include "spruce.h"
+
+@@ -178,26 +179,32 @@
+ serial_req.membase = (u_char *)UART0_IO_BASE;
+ serial_req.regshift = 0;
+
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+- gen550_init(0, &serial_req);
+-#endif
+ #ifdef CONFIG_SERIAL_8250
+ if (early_serial_setup(&serial_req) != 0)
+ printk("Early serial init of port 0 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++ gen550_init(0, &serial_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &port);
++#endif
+
+ /* Assume early_serial_setup() doesn't modify serial_req */
+ serial_req.line = 1;
+ serial_req.irq = UART1_INT;
+ serial_req.membase = (u_char *)UART1_IO_BASE;
+
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+- gen550_init(1, &serial_req);
+-#endif
+ #ifdef CONFIG_SERIAL_8250
+ if (early_serial_setup(&serial_req) != 0)
+ printk("Early serial init of port 1 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++ gen550_init(1, &serial_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &serial_req);
++#endif
+ }
+
+ TODC_ALLOC();
+@@ -316,7 +323,4 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/Makefile linux-2.6.22-591/arch/ppc/syslib/Makefile
+--- linux-2.6.22-570/arch/ppc/syslib/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -77,7 +77,6 @@
+ obj-$(CONFIG_8260_PCI9) += m8260_pci_erratum9.o
+ obj-$(CONFIG_CPM2) += cpm2_common.o cpm2_pic.o
+ ifeq ($(CONFIG_PPC_GEN550),y)
+-obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o
+ obj-$(CONFIG_SERIAL_TEXT_DEBUG) += gen550_dbg.o
+ endif
+ ifeq ($(CONFIG_SERIAL_MPSC_CONSOLE),y)
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550.h linux-2.6.22-591/arch/ppc/syslib/gen550.h
+--- linux-2.6.22-570/arch/ppc/syslib/gen550.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/gen550.h 2007-12-21 15:36:11.000000000 -0500
+@@ -11,4 +11,3 @@
+
+ extern void gen550_progress(char *, unsigned short);
+ extern void gen550_init(int, struct uart_port *);
+-extern void gen550_kgdb_map_scc(void);
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c
+--- linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,83 +0,0 @@
+-/*
+- * Generic 16550 kgdb support intended to be useful on a variety
+- * of platforms. To enable this support, it is necessary to set
+- * the CONFIG_GEN550 option. Any virtual mapping of the serial
+- * port(s) to be used can be accomplished by setting
+- * ppc_md.early_serial_map to a platform-specific mapping function.
+- *
+- * Adapted from ppc4xx_kgdb.c.
+- *
+- * Author: Matt Porter <mporter@kernel.crashing.org>
+- *
+- * 2002-2004 (c) MontaVista Software, Inc. This file is licensed under
+- * the terms of the GNU General Public License version 2. This program
+- * is licensed "as is" without any warranty of any kind, whether express
+- * or implied.
+- */
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-
+-#include <asm/machdep.h>
+-
+-extern unsigned long serial_init(int, void *);
+-extern unsigned long serial_getc(unsigned long);
+-extern unsigned long serial_putc(unsigned long, unsigned char);
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#elif defined(CONFIG_KGDB_TTYS2)
+-#define KGDB_PORT 2
+-#elif defined(CONFIG_KGDB_TTYS3)
+-#define KGDB_PORT 3
+-#else
+-#error "invalid kgdb_tty port"
+-#endif
+-
+-static volatile unsigned int kgdb_debugport;
+-
+-void putDebugChar(unsigned char c)
+-{
+- if (kgdb_debugport == 0)
+- kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-
+- serial_putc(kgdb_debugport, c);
+-}
+-
+-int getDebugChar(void)
+-{
+- if (kgdb_debugport == 0)
+- kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-
+- return(serial_getc(kgdb_debugport));
+-}
+-
+-void kgdb_interruptible(int enable)
+-{
+- return;
+-}
+-
+-void putDebugString(char* str)
+-{
+- while (*str != '\0') {
+- putDebugChar(*str);
+- str++;
+- }
+- putDebugChar('\r');
+- return;
+-}
+-
+-/*
+- * Note: gen550_init() must be called already on the port we are going
+- * to use.
+- */
+-void
+-gen550_kgdb_map_scc(void)
+-{
+- printk(KERN_DEBUG "kgdb init\n");
+- if (ppc_md.early_serial_map)
+- ppc_md.early_serial_map();
+- kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-}
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c
+--- linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c 2007-12-21 15:36:11.000000000 -0500
+@@ -192,9 +192,6 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+
+ /*
+ * The Abatron BDI JTAG debugger does not tolerate others
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60.c linux-2.6.22-591/arch/ppc/syslib/mv64x60.c
+--- linux-2.6.22-570/arch/ppc/syslib/mv64x60.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/mv64x60.c 2007-12-21 15:36:11.000000000 -0500
+@@ -241,6 +241,12 @@
+ .end = MV64x60_IRQ_SDMA_0,
+ .flags = IORESOURCE_IRQ,
+ },
++ [4] = {
++ .name = "mpsc 0 irq",
++ .start = MV64x60_IRQ_MPSC_0,
++ .end = MV64x60_IRQ_MPSC_0,
++ .flags = IORESOURCE_IRQ,
++ },
+ };
+
+ static struct platform_device mpsc0_device = {
+@@ -298,6 +304,12 @@
+ .end = MV64360_IRQ_SDMA_1,
+ .flags = IORESOURCE_IRQ,
+ },
++ [4] = {
++ .name = "mpsc 1 irq",
++ .start = MV64360_IRQ_MPSC_1,
++ .end = MV64360_IRQ_MPSC_1,
++ .flags = IORESOURCE_IRQ,
++ },
+ };
+
+ static struct platform_device mpsc1_device = {
+@@ -1432,12 +1444,46 @@
+ static int __init
+ mv64x60_add_pds(void)
+ {
+- return platform_add_devices(mv64x60_pd_devs,
+- ARRAY_SIZE(mv64x60_pd_devs));
++ int i, ret = 0;
++
++ for (i = 0; i < ARRAY_SIZE(mv64x60_pd_devs); i++) {
++ if (mv64x60_pd_devs[i]) {
++ ret = platform_device_register(mv64x60_pd_devs[i]);
++ }
++ if (ret) {
++ while (--i >= 0)
++ platform_device_unregister(mv64x60_pd_devs[i]);
++ break;
++ }
++ }
++ return ret;
+ }
+ arch_initcall(mv64x60_add_pds);
+
+ /*
++ * mv64x60_early_get_pdev_data()
++ *
++ * Get the data associated with a platform device by name and number.
++ */
++struct platform_device * __init
++mv64x60_early_get_pdev_data(const char *name, int id, int remove)
++{
++ int i;
++ struct platform_device *pdev;
++
++ for (i = 0; i <ARRAY_SIZE(mv64x60_pd_devs); i++) {
++ if ((pdev = mv64x60_pd_devs[i]) &&
++ pdev->id == id &&
++ !strcmp(pdev->name, name)) {
++ if (remove)
++ mv64x60_pd_devs[i] = NULL;
++ return pdev;
++ }
++ }
++ return NULL;
++}
++
++/*
+ *****************************************************************************
+ *
+ * GT64260-Specific Routines
+@@ -1770,6 +1816,11 @@
+ r->start = MV64x60_IRQ_SDMA_0;
+ r->end = MV64x60_IRQ_SDMA_0;
+ }
++ if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 1))
++ != NULL) {
++ r->start = GT64260_IRQ_MPSC_1;
++ r->end = GT64260_IRQ_MPSC_1;
++ }
+ #endif
+ }
+
+@@ -2415,7 +2466,6 @@
+ .attr = {
+ .name = "hs_reg",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = VAL_LEN_MAX,
+ .read = mv64xxx_hs_reg_read,
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c
+--- linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,7 +34,7 @@
+ void
+ mv64x60_progress_init(u32 base)
+ {
+- mv64x60_dbg_bh.v_base = base;
++ mv64x60_dbg_bh.v_base = (void*)base;
+ return;
+ }
+
+@@ -69,53 +69,3 @@
+ return;
+ }
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-
+-
+-#if defined(CONFIG_KGDB)
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#else
+-#error "Invalid kgdb_tty port"
+-#endif
+-
+-void
+-putDebugChar(unsigned char c)
+-{
+- mv64x60_polled_putc(KGDB_PORT, (char)c);
+-}
+-
+-int
+-getDebugChar(void)
+-{
+- unsigned char c;
+-
+- while (!mv64x60_polled_getc(KGDB_PORT, &c));
+- return (int)c;
+-}
+-
+-void
+-putDebugString(char* str)
+-{
+- while (*str != '\0') {
+- putDebugChar(*str);
+- str++;
+- }
+- putDebugChar('\r');
+- return;
+-}
+-
+-void
+-kgdb_interruptible(int enable)
+-{
+-}
+-
+-void
+-kgdb_map_scc(void)
+-{
+- if (ppc_md.early_serial_map)
+- ppc_md.early_serial_map();
+-}
+-#endif /* CONFIG_KGDB */
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -32,7 +32,6 @@
+ #include <asm/processor.h>
+ #include <asm/machdep.h>
+ #include <asm/page.h>
+-#include <asm/kgdb.h>
+ #include <asm/ibm4xx.h>
+ #include <asm/time.h>
+ #include <asm/todc.h>
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,12 +30,12 @@
+ #include <linux/tty.h> /* for linux/serial_core.h */
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
++#include <linux/kgdb.h>
+
+ #include <asm/time.h>
+ #include <asm/mpc83xx.h>
+ #include <asm/mmu.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+ #include <asm/delay.h>
+ #include <asm/machdep.h>
+
+@@ -44,6 +44,7 @@
+ #include <asm/delay.h>
+ #include <syslib/ppc83xx_pci.h>
+ #endif
++#include <syslib/gen550.h>
+
+ phys_addr_t immrbar;
+
+@@ -87,11 +88,11 @@
+ tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ }
+
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ void __init
+ mpc83xx_early_serial_map(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ struct uart_port serial_req;
+ #endif
+ struct plat_serial8250_port *pdata;
+@@ -103,27 +104,40 @@
+ pdata[0].mapbase += binfo->bi_immr_base;
+ pdata[0].membase = ioremap(pdata[0].mapbase, 0x100);
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ memset(&serial_req, 0, sizeof (serial_req));
+ serial_req.iotype = UPIO_MEM;
+ serial_req.mapbase = pdata[0].mapbase;
+ serial_req.membase = pdata[0].membase;
+ serial_req.regshift = 0;
++ serial_req.irq = pdata[0].irq;
++ serial_req.flags = pdata[0].flags;
++ serial_req.uartclk = pdata[0].uartclk;
+
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ gen550_init(0, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &serial_req);
++#endif
++#endif
+
+ pdata[1].uartclk = binfo->bi_busfreq;
+ pdata[1].mapbase += binfo->bi_immr_base;
+ pdata[1].membase = ioremap(pdata[1].mapbase, 0x100);
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ /* Assume gen550_init() doesn't modify serial_req */
+ serial_req.mapbase = pdata[1].mapbase;
+ serial_req.membase = pdata[1].membase;
+
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ gen550_init(1, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &serial_req);
++#endif
++#endif
+ }
+ #endif
+
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c 2007-12-21 15:36:11.000000000 -0500
+@@ -19,16 +19,17 @@
+ #include <linux/tty.h> /* for linux/serial_core.h */
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
++#include <linux/kgdb.h>
+
+ #include <asm/time.h>
+ #include <asm/mpc85xx.h>
+ #include <asm/immap_85xx.h>
+ #include <asm/mmu.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+ #include <asm/machdep.h>
+
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+
+ extern void abort(void);
+
+@@ -69,11 +70,11 @@
+ mtspr(SPRN_TCR, TCR_DIE);
+ }
+
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ void __init
+ mpc85xx_early_serial_map(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ struct uart_port serial_req;
+ #endif
+ struct plat_serial8250_port *pdata;
+@@ -85,27 +86,40 @@
+ pdata[0].mapbase += binfo->bi_immr_base;
+ pdata[0].membase = ioremap(pdata[0].mapbase, MPC85xx_UART0_SIZE);
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ memset(&serial_req, 0, sizeof (serial_req));
+ serial_req.iotype = UPIO_MEM;
+ serial_req.mapbase = pdata[0].mapbase;
+ serial_req.membase = pdata[0].membase;
+ serial_req.regshift = 0;
++ serial_req.irq = pdata[0].irq;
++ serial_req.flags = pdata[0].flags;
++ serial_req.uartclk = pdata[0].uartclk;
+
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ gen550_init(0, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(0, &serial_req);
++#endif
++#endif
+
+ pdata[1].uartclk = binfo->bi_busfreq;
+ pdata[1].mapbase += binfo->bi_immr_base;
+ pdata[1].membase = ioremap(pdata[1].mapbase, MPC85xx_UART0_SIZE);
+
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ /* Assume gen550_init() doesn't modify serial_req */
+ serial_req.mapbase = pdata[1].mapbase;
+ serial_req.membase = pdata[1].membase;
+
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ gen550_init(1, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++ kgdb8250_add_port(1, &serial_req);
++#endif
++#endif
+ }
+ #endif
+
+@@ -363,5 +377,3 @@
+ return;
+ }
+ #endif /* CONFIG_PCI */
+-
+-
+diff -Nurb linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c
+--- linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c 2007-12-21 15:36:14.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/errno.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+
+ #include "appldata.h"
+
+@@ -107,7 +108,7 @@
+ tx_dropped = 0;
+ collisions = 0;
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ stats = dev->get_stats(dev);
+ rx_packets += stats->rx_packets;
+ tx_packets += stats->tx_packets;
+diff -Nurb linux-2.6.22-570/arch/s390/kernel/ipl.c linux-2.6.22-591/arch/s390/kernel/ipl.c
+--- linux-2.6.22-570/arch/s390/kernel/ipl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/s390/kernel/ipl.c 2007-12-21 15:36:11.000000000 -0500
+@@ -314,7 +314,6 @@
+ .attr = {
+ .name = "binary_parameter",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = PAGE_SIZE,
+ .read = &ipl_parameter_read,
+@@ -338,7 +337,6 @@
+ .attr = {
+ .name = "scp_data",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = PAGE_SIZE,
+ .read = &ipl_scp_data_read,
+diff -Nurb linux-2.6.22-570/arch/sh/Kconfig.debug linux-2.6.22-591/arch/sh/Kconfig.debug
+--- linux-2.6.22-570/arch/sh/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -78,82 +78,4 @@
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
+-config SH_KGDB
+- bool "Include KGDB kernel debugger"
+- select FRAME_POINTER
+- select DEBUG_INFO
+- help
+- Include in-kernel hooks for kgdb, the Linux kernel source level
+- debugger. See <http://kgdb.sourceforge.net/> for more information.
+- Unless you are intending to debug the kernel, say N here.
+-
+-menu "KGDB configuration options"
+- depends on SH_KGDB
+-
+-config MORE_COMPILE_OPTIONS
+- bool "Add any additional compile options"
+- help
+- If you want to add additional CFLAGS to the kernel build, enable this
+- option and then enter what you would like to add in the next question.
+- Note however that -g is already appended with the selection of KGDB.
+-
+-config COMPILE_OPTIONS
+- string "Additional compile arguments"
+- depends on MORE_COMPILE_OPTIONS
+-
+-config KGDB_NMI
+- bool "Enter KGDB on NMI"
+- default n
+-
+-config SH_KGDB_CONSOLE
+- bool "Console messages through GDB"
+- depends on !SERIAL_SH_SCI_CONSOLE
+- select SERIAL_CORE_CONSOLE
+- default n
+-
+-config KGDB_SYSRQ
+- bool "Allow SysRq 'G' to enter KGDB"
+- default y
+-
+-comment "Serial port setup"
+-
+-config KGDB_DEFPORT
+- int "Port number (ttySCn)"
+- default "1"
+-
+-config KGDB_DEFBAUD
+- int "Baud rate"
+- default "115200"
+-
+-choice
+- prompt "Parity"
+- depends on SH_KGDB
+- default KGDB_DEFPARITY_N
+-
+-config KGDB_DEFPARITY_N
+- bool "None"
+-
+-config KGDB_DEFPARITY_E
+- bool "Even"
+-
+-config KGDB_DEFPARITY_O
+- bool "Odd"
+-
+-endchoice
+-
+-choice
+- prompt "Data bits"
+- depends on SH_KGDB
+- default KGDB_DEFBITS_8
+-
+-config KGDB_DEFBITS_8
+- bool "8"
+-
+-config KGDB_DEFBITS_7
+- bool "7"
+-
+-endchoice
+-
+-endmenu
+-
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/Makefile linux-2.6.22-591/arch/sh/kernel/Makefile
+--- linux-2.6.22-570/arch/sh/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -15,7 +15,7 @@
+ obj-$(CONFIG_SMP) += smp.o
+ obj-$(CONFIG_CF_ENABLER) += cf-enabler.o
+ obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o
+-obj-$(CONFIG_SH_KGDB) += kgdb_stub.o kgdb_jmp.o
++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o
+ obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S
+--- linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S 2007-12-21 15:36:11.000000000 -0500
+@@ -45,7 +45,7 @@
+ .long exception_error ! reserved_instruction (filled by trap_init) /* 180 */
+ .long exception_error ! illegal_slot_instruction (filled by trap_init) /*1A0*/
+ ENTRY(nmi_slot)
+-#if defined (CONFIG_KGDB_NMI)
++#if defined (CONFIG_KGDB)
+ .long debug_enter /* 1C0 */ ! Allow trap to debugger
+ #else
+ .long exception_none /* 1C0 */ ! Not implemented yet
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,32 @@
++#include <linux/linkage.h>
++
++ENTRY(kgdb_fault_setjmp)
++ add #(9*4), r4
++ sts.l pr, @-r4
++ mov.l r15, @-r4
++ mov.l r14, @-r4
++ mov.l r13, @-r4
++ mov.l r12, @-r4
++ mov.l r11, @-r4
++ mov.l r10, @-r4
++ mov.l r9, @-r4
++ mov.l r8, @-r4
++ rts
++ mov #0, r0
++
++ENTRY(kgdb_fault_longjmp)
++ mov.l @r4+, r8
++ mov.l @r4+, r9
++ mov.l @r4+, r10
++ mov.l @r4+, r11
++ mov.l @r4+, r12
++ mov.l @r4+, r13
++ mov.l @r4+, r14
++ mov.l @r4+, r15
++ lds.l @r4+, pr
++ mov r5, r0
++ tst r0, r0
++ bf 1f
++ mov #1, r0
++1: rts
++ nop
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb.c linux-2.6.22-591/arch/sh/kernel/kgdb.c
+--- linux-2.6.22-570/arch/sh/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/sh/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,363 @@
++/*
++ * arch/sh/kernel/kgdb.c
++ *
++ * Contains SH-specific low-level support for KGDB.
++ *
++ * Containes extracts from code by Glenn Engel, Jim Kingdon,
++ * David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
++ * Amit S. Kale <akale@veritas.com>, William Gatliff <bgat@open-widgets.com>,
++ * Ben Lee, Steve Chamberlain and Benoit Miller <fulg@iname.com>,
++ * Henry Bell <henry.bell@st.com> and Jeremy Siegel <jsiegel@mvista.com>
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2004 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <linux/linkage.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/system.h>
++#include <asm/current.h>
++#include <asm/signal.h>
++#include <asm/pgtable.h>
++#include <asm/ptrace.h>
++
++extern void per_cpu_trap_init(void);
++extern atomic_t cpu_doing_single_step;
++
++/* Function pointers for linkage */
++static struct kgdb_regs trap_registers;
++
++/* Globals. */
++char in_nmi; /* Set during NMI to prevent reentry */
++
++/* TRA differs sh3/4 */
++#if defined(CONFIG_CPU_SH3)
++#define TRA 0xffffffd0
++#elif defined(CONFIG_CPU_SH4)
++#define TRA 0xff000020
++#endif
++
++/* Macros for single step instruction identification */
++#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900)
++#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00)
++#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \
++ (((op) & 0x7f ) << 1))
++#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00)
++#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00)
++#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000)
++#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
++ (((op) & 0x7ff) << 1))
++#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023)
++#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8)
++#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000)
++#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
++ (((op) & 0x7ff) << 1))
++#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003)
++#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf)
++#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b)
++#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf)
++#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b)
++#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf)
++#define OPCODE_RTS(op) ((op) == 0xb)
++#define OPCODE_RTE(op) ((op) == 0x2b)
++
++#define SR_T_BIT_MASK 0x1
++#define STEP_OPCODE 0xc320
++#define BIOS_CALL_TRAP 0x3f
++
++/* Exception codes as per SH-4 core manual */
++#define ADDRESS_ERROR_LOAD_VEC 7
++#define ADDRESS_ERROR_STORE_VEC 8
++#define TRAP_VEC 11
++#define INVALID_INSN_VEC 12
++#define INVALID_SLOT_VEC 13
++#define NMI_VEC 14
++#define SERIAL_BREAK_VEC 58
++
++/* Misc static */
++static int stepped_address;
++static short stepped_opcode;
++
++/* Translate SH-3/4 exception numbers to unix-like signal values */
++static int compute_signal(const int excep_code)
++{
++ switch (excep_code) {
++ case INVALID_INSN_VEC:
++ case INVALID_SLOT_VEC:
++ return SIGILL;
++ case ADDRESS_ERROR_LOAD_VEC:
++ case ADDRESS_ERROR_STORE_VEC:
++ return SIGSEGV;
++ case SERIAL_BREAK_VEC:
++ case NMI_VEC:
++ return SIGINT;
++ default:
++ /* Act like it was a break/trap. */
++ return SIGTRAP;
++ }
++}
++
++/*
++ * Translate the registers of the system into the format that GDB wants. Since
++ * we use a local structure to store things, instead of getting them out
++ * of pt_regs, we can just do a memcpy.
++ */
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *ign)
++{
++ memcpy(gdb_regs, &trap_registers, sizeof(trap_registers));
++}
++
++/*
++ * On SH we save: r1 (prev->thread.sp) r2 (prev->thread.pc) r4 (prev) r5 (next)
++ * r6 (next->thread.sp) r7 (next->thread.pc)
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ int count;
++
++ for (count = 0; count < 16; count++)
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = p->thread.pc;
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = 0;
++ *(gdb_regs++) = 0;
++}
++
++/*
++ * Translate the registers values that GDB has given us back into the
++ * format of the system. See the comment above about memcpy.
++ */
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *ign)
++{
++ memcpy(&trap_registers, gdb_regs, sizeof(trap_registers));
++}
++
++/* Calculate the new address for after a step */
++static short *get_step_address(void)
++{
++ short op = *(short *)trap_registers.pc;
++ long addr;
++
++ /* BT */
++ if (OPCODE_BT(op)) {
++ if (trap_registers.sr & SR_T_BIT_MASK)
++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++ else
++ addr = trap_registers.pc + 2;
++ }
++
++ /* BTS */
++ else if (OPCODE_BTS(op)) {
++ if (trap_registers.sr & SR_T_BIT_MASK)
++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++ else
++ addr = trap_registers.pc + 4; /* Not in delay slot */
++ }
++
++ /* BF */
++ else if (OPCODE_BF(op)) {
++ if (!(trap_registers.sr & SR_T_BIT_MASK))
++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++ else
++ addr = trap_registers.pc + 2;
++ }
++
++ /* BFS */
++ else if (OPCODE_BFS(op)) {
++ if (!(trap_registers.sr & SR_T_BIT_MASK))
++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++ else
++ addr = trap_registers.pc + 4; /* Not in delay slot */
++ }
++
++ /* BRA */
++ else if (OPCODE_BRA(op))
++ addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op);
++
++ /* BRAF */
++ else if (OPCODE_BRAF(op))
++ addr = trap_registers.pc + 4
++ + trap_registers.regs[OPCODE_BRAF_REG(op)];
++
++ /* BSR */
++ else if (OPCODE_BSR(op))
++ addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op);
++
++ /* BSRF */
++ else if (OPCODE_BSRF(op))
++ addr = trap_registers.pc + 4
++ + trap_registers.regs[OPCODE_BSRF_REG(op)];
++
++ /* JMP */
++ else if (OPCODE_JMP(op))
++ addr = trap_registers.regs[OPCODE_JMP_REG(op)];
++
++ /* JSR */
++ else if (OPCODE_JSR(op))
++ addr = trap_registers.regs[OPCODE_JSR_REG(op)];
++
++ /* RTS */
++ else if (OPCODE_RTS(op))
++ addr = trap_registers.pr;
++
++ /* RTE */
++ else if (OPCODE_RTE(op))
++ addr = trap_registers.regs[15];
++
++ /* Other */
++ else
++ addr = trap_registers.pc + 2;
++
++ kgdb_flush_icache_range(addr, addr + 2);
++ return (short *)addr;
++}
++
++/* The command loop, read and act on requests */
++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
++ char *remcom_in_buffer, char *remcom_out_buffer,
++ struct pt_regs *ign)
++{
++ unsigned long addr;
++ char *ptr = &remcom_in_buffer[1];
++
++ /* Examine first char of buffer to see what we need to do */
++ switch (remcom_in_buffer[0]) {
++ case 'c': /* Continue at address AA..AA (optional) */
++ case 's': /* Step one instruction from AA..AA */
++ /* Try to read optional parameter, PC unchanged if none */
++ if (kgdb_hex2long(&ptr, &addr))
++ trap_registers.pc = addr;
++
++ atomic_set(&cpu_doing_single_step, -1);
++ if (remcom_in_buffer[0] == 's') {
++ /* Replace the instruction immediately after the
++ * current instruction (i.e. next in the expected
++ * flow of control) with a trap instruction, so that
++ * returning will cause only a single instruction to
++ * be executed. Note that this model is slightly
++ * broken for instructions with delay slots
++ * (e.g. B[TF]S, BSR, BRA etc), where both the branch
++ * and the instruction in the delay slot will be
++ * executed.
++ */
++ /* Determine where the target instruction will send
++ * us to */
++ unsigned short *next_addr = get_step_address();
++ stepped_address = (int)next_addr;
++
++ /* Replace it */
++ stepped_opcode = *(short *)next_addr;
++ *next_addr = STEP_OPCODE;
++
++ /* Flush and return */
++ kgdb_flush_icache_range((long)next_addr,
++ (long)next_addr + 2);
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step,
++ smp_processor_id());
++ }
++ return 0;
++ }
++ return -1;
++}
++
++/*
++ * When an exception has occured, we are called. We need to set things
++ * up so that we can call kgdb_handle_exception to handle requests from
++ * the remote GDB.
++ */
++void kgdb_exception_handler(struct pt_regs *regs)
++{
++ int excep_code, vbr_val;
++ int count;
++
++ /* Copy kernel regs (from stack) */
++ for (count = 0; count < 16; count++)
++ trap_registers.regs[count] = regs->regs[count];
++ trap_registers.pc = regs->pc;
++ trap_registers.pr = regs->pr;
++ trap_registers.sr = regs->sr;
++ trap_registers.gbr = regs->gbr;
++ trap_registers.mach = regs->mach;
++ trap_registers.macl = regs->macl;
++
++ __asm__ __volatile__("stc vbr, %0":"=r"(vbr_val));
++ trap_registers.vbr = vbr_val;
++
++ /* Get the execption code. */
++ __asm__ __volatile__("stc r2_bank, %0":"=r"(excep_code));
++
++ excep_code >>= 5;
++
++ /* If we got an NMI, and KGDB is not yet initialized, call
++ * breakpoint() to try and initialize everything for us. */
++ if (excep_code == NMI_VEC && !kgdb_initialized) {
++ breakpoint();
++ return;
++ }
++
++ /* TRAP_VEC exception indicates a software trap inserted in place of
++ * code by GDB so back up PC by one instruction, as this instruction
++ * will later be replaced by its original one. Do NOT do this for
++ * trap 0xff, since that indicates a compiled-in breakpoint which
++ * will not be replaced (and we would retake the trap forever) */
++ if (excep_code == TRAP_VEC &&
++ (*(volatile unsigned long *)TRA != (0xff << 2)))
++ trap_registers.pc -= 2;
++
++ /* If we have been single-stepping, put back the old instruction.
++ * We use stepped_address in case we have stopped more than one
++ * instruction away. */
++ if (stepped_opcode != 0) {
++ *(short *)stepped_address = stepped_opcode;
++ kgdb_flush_icache_range(stepped_address, stepped_address + 2);
++ }
++ stepped_opcode = 0;
++
++ /* Call the stub to do the processing. Note that not everything we
++ * need to send back and forth lives in pt_regs. */
++ kgdb_handle_exception(excep_code, compute_signal(excep_code), 0, regs);
++
++ /* Copy back the (maybe modified) registers */
++ for (count = 0; count < 16; count++)
++ regs->regs[count] = trap_registers.regs[count];
++ regs->pc = trap_registers.pc;
++ regs->pr = trap_registers.pr;
++ regs->sr = trap_registers.sr;
++ regs->gbr = trap_registers.gbr;
++ regs->mach = trap_registers.mach;
++ regs->macl = trap_registers.macl;
++
++ vbr_val = trap_registers.vbr;
++ __asm__ __volatile__("ldc %0, vbr": :"r"(vbr_val));
++}
++
++int __init kgdb_arch_init(void)
++{
++ per_cpu_trap_init();
++
++ return 0;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifdef CONFIG_CPU_LITTLE_ENDIAN
++ .gdb_bpt_instr = {0xff, 0xc3},
++#else
++ .gdb_bpt_instr = {0xc3, 0xff},
++#endif
++};
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S
+--- linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S 1969-12-31 19:00:00.000000000 -0500
+@@ -1,33 +0,0 @@
+-#include <linux/linkage.h>
+-
+-ENTRY(setjmp)
+- add #(9*4), r4
+- sts.l pr, @-r4
+- mov.l r15, @-r4
+- mov.l r14, @-r4
+- mov.l r13, @-r4
+- mov.l r12, @-r4
+- mov.l r11, @-r4
+- mov.l r10, @-r4
+- mov.l r9, @-r4
+- mov.l r8, @-r4
+- rts
+- mov #0, r0
+-
+-ENTRY(longjmp)
+- mov.l @r4+, r8
+- mov.l @r4+, r9
+- mov.l @r4+, r10
+- mov.l @r4+, r11
+- mov.l @r4+, r12
+- mov.l @r4+, r13
+- mov.l @r4+, r14
+- mov.l @r4+, r15
+- lds.l @r4+, pr
+- mov r5, r0
+- tst r0, r0
+- bf 1f
+- mov #1, r0 ! in case val==0
+-1: rts
+- nop
+-
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c
+--- linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,1093 +0,0 @@
+-/*
+- * May be copied or modified under the terms of the GNU General Public
+- * License. See linux/COPYING for more information.
+- *
+- * Contains extracts from code by Glenn Engel, Jim Kingdon,
+- * David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
+- * Amit S. Kale <akale@veritas.com>, William Gatliff <bgat@open-widgets.com>,
+- * Ben Lee, Steve Chamberlain and Benoit Miller <fulg@iname.com>.
+- *
+- * This version by Henry Bell <henry.bell@st.com>
+- * Minor modifications by Jeremy Siegel <jsiegel@mvista.com>
+- *
+- * Contains low-level support for remote debug using GDB.
+- *
+- * To enable debugger support, two things need to happen. A call to
+- * set_debug_traps() is necessary in order to allow any breakpoints
+- * or error conditions to be properly intercepted and reported to gdb.
+- * A breakpoint also needs to be generated to begin communication. This
+- * is most easily accomplished by a call to breakpoint() which does
+- * a trapa if the initialisation phase has been successfully completed.
+- *
+- * In this case, set_debug_traps() is not used to "take over" exceptions;
+- * other kernel code is modified instead to enter the kgdb functions here
+- * when appropriate (see entry.S for breakpoint traps and NMI interrupts,
+- * see traps.c for kernel error exceptions).
+- *
+- * The following gdb commands are supported:
+- *
+- * Command Function Return value
+- *
+- * g return the value of the CPU registers hex data or ENN
+- * G set the value of the CPU registers OK or ENN
+- *
+- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN
+- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN
+- * XAA..AA,LLLL: Same, but data is binary (not hex) OK or ENN
+- *
+- * c Resume at current address SNN ( signal NN)
+- * cAA..AA Continue at address AA..AA SNN
+- * CNN; Resume at current address with signal SNN
+- * CNN;AA..AA Resume at address AA..AA with signal SNN
+- *
+- * s Step one instruction SNN
+- * sAA..AA Step one instruction from AA..AA SNN
+- * SNN; Step one instruction with signal SNN
+- * SNNAA..AA Step one instruction from AA..AA w/NN SNN
+- *
+- * k kill (Detach GDB)
+- *
+- * d Toggle debug flag
+- * D Detach GDB
+- *
+- * Hct Set thread t for operations, OK or ENN
+- * c = 'c' (step, cont), c = 'g' (other
+- * operations)
+- *
+- * qC Query current thread ID QCpid
+- * qfThreadInfo Get list of current threads (first) m<id>
+- * qsThreadInfo " " " " " (subsequent)
+- * qOffsets Get section offsets Text=x;Data=y;Bss=z
+- *
+- * TXX Find if thread XX is alive OK or ENN
+- * ? What was the last sigval ? SNN (signal NN)
+- * O Output to GDB console
+- *
+- * Remote communication protocol.
+- *
+- * A debug packet whose contents are <data> is encapsulated for
+- * transmission in the form:
+- *
+- * $ <data> # CSUM1 CSUM2
+- *
+- * <data> must be ASCII alphanumeric and cannot include characters
+- * '$' or '#'. If <data> starts with two characters followed by
+- * ':', then the existing stubs interpret this as a sequence number.
+- *
+- * CSUM1 and CSUM2 are ascii hex representation of an 8-bit
+- * checksum of <data>, the most significant nibble is sent first.
+- * the hex digits 0-9,a-f are used.
+- *
+- * Receiver responds with:
+- *
+- * + - if CSUM is correct and ready for next packet
+- * - - if CSUM is incorrect
+- *
+- * Responses can be run-length encoded to save space. A '*' means that
+- * the next character is an ASCII encoding giving a repeat count which
+- * stands for that many repetitions of the character preceding the '*'.
+- * The encoding is n+29, yielding a printable character where n >=3
+- * (which is where RLE starts to win). Don't use an n > 126.
+- *
+- * So "0* " means the same as "0000".
+- */
+-
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/sched.h>
+-#include <linux/smp.h>
+-#include <linux/spinlock.h>
+-#include <linux/delay.h>
+-#include <linux/linkage.h>
+-#include <linux/init.h>
+-#include <linux/console.h>
+-#include <linux/sysrq.h>
+-#include <asm/system.h>
+-#include <asm/cacheflush.h>
+-#include <asm/current.h>
+-#include <asm/signal.h>
+-#include <asm/pgtable.h>
+-#include <asm/ptrace.h>
+-#include <asm/kgdb.h>
+-#include <asm/io.h>
+-
+-/* Function pointers for linkage */
+-kgdb_debug_hook_t *kgdb_debug_hook;
+-kgdb_bus_error_hook_t *kgdb_bus_err_hook;
+-
+-int (*kgdb_getchar)(void);
+-void (*kgdb_putchar)(int);
+-
+-static void put_debug_char(int c)
+-{
+- if (!kgdb_putchar)
+- return;
+- (*kgdb_putchar)(c);
+-}
+-static int get_debug_char(void)
+-{
+- if (!kgdb_getchar)
+- return -1;
+- return (*kgdb_getchar)();
+-}
+-
+-/* Num chars in in/out bound buffers, register packets need NUMREGBYTES * 2 */
+-#define BUFMAX 1024
+-#define NUMREGBYTES (MAXREG*4)
+-#define OUTBUFMAX (NUMREGBYTES*2+512)
+-
+-enum regs {
+- R0 = 0, R1, R2, R3, R4, R5, R6, R7,
+- R8, R9, R10, R11, R12, R13, R14, R15,
+- PC, PR, GBR, VBR, MACH, MACL, SR,
+- /* */
+- MAXREG
+-};
+-
+-static unsigned int registers[MAXREG];
+-struct kgdb_regs trap_registers;
+-
+-char kgdb_in_gdb_mode;
+-char in_nmi; /* Set during NMI to prevent reentry */
+-int kgdb_nofault; /* Boolean to ignore bus errs (i.e. in GDB) */
+-int kgdb_enabled = 1; /* Default to enabled, cmdline can disable */
+-
+-/* Exposed for user access */
+-struct task_struct *kgdb_current;
+-unsigned int kgdb_g_imask;
+-int kgdb_trapa_val;
+-int kgdb_excode;
+-
+-/* Default values for SCI (can override via kernel args in setup.c) */
+-#ifndef CONFIG_KGDB_DEFPORT
+-#define CONFIG_KGDB_DEFPORT 1
+-#endif
+-
+-#ifndef CONFIG_KGDB_DEFBAUD
+-#define CONFIG_KGDB_DEFBAUD 115200
+-#endif
+-
+-#if defined(CONFIG_KGDB_DEFPARITY_E)
+-#define CONFIG_KGDB_DEFPARITY 'E'
+-#elif defined(CONFIG_KGDB_DEFPARITY_O)
+-#define CONFIG_KGDB_DEFPARITY 'O'
+-#else /* CONFIG_KGDB_DEFPARITY_N */
+-#define CONFIG_KGDB_DEFPARITY 'N'
+-#endif
+-
+-#ifdef CONFIG_KGDB_DEFBITS_7
+-#define CONFIG_KGDB_DEFBITS '7'
+-#else /* CONFIG_KGDB_DEFBITS_8 */
+-#define CONFIG_KGDB_DEFBITS '8'
+-#endif
+-
+-/* SCI/UART settings, used in kgdb_console_setup() */
+-int kgdb_portnum = CONFIG_KGDB_DEFPORT;
+-int kgdb_baud = CONFIG_KGDB_DEFBAUD;
+-char kgdb_parity = CONFIG_KGDB_DEFPARITY;
+-char kgdb_bits = CONFIG_KGDB_DEFBITS;
+-
+-/* Jump buffer for setjmp/longjmp */
+-static jmp_buf rem_com_env;
+-
+-/* TRA differs sh3/4 */
+-#if defined(CONFIG_CPU_SH3)
+-#define TRA 0xffffffd0
+-#elif defined(CONFIG_CPU_SH4)
+-#define TRA 0xff000020
+-#endif
+-
+-/* Macros for single step instruction identification */
+-#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900)
+-#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00)
+-#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \
+- (((op) & 0x7f ) << 1))
+-#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00)
+-#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00)
+-#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000)
+-#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
+- (((op) & 0x7ff) << 1))
+-#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023)
+-#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8)
+-#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000)
+-#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
+- (((op) & 0x7ff) << 1))
+-#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003)
+-#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf)
+-#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b)
+-#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf)
+-#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b)
+-#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf)
+-#define OPCODE_RTS(op) ((op) == 0xb)
+-#define OPCODE_RTE(op) ((op) == 0x2b)
+-
+-#define SR_T_BIT_MASK 0x1
+-#define STEP_OPCODE 0xc320
+-#define BIOS_CALL_TRAP 0x3f
+-
+-/* Exception codes as per SH-4 core manual */
+-#define ADDRESS_ERROR_LOAD_VEC 7
+-#define ADDRESS_ERROR_STORE_VEC 8
+-#define TRAP_VEC 11
+-#define INVALID_INSN_VEC 12
+-#define INVALID_SLOT_VEC 13
+-#define NMI_VEC 14
+-#define USER_BREAK_VEC 15
+-#define SERIAL_BREAK_VEC 58
+-
+-/* Misc static */
+-static int stepped_address;
+-static short stepped_opcode;
+-static char in_buffer[BUFMAX];
+-static char out_buffer[OUTBUFMAX];
+-
+-static void kgdb_to_gdb(const char *s);
+-
+-/* Convert ch to hex */
+-static int hex(const char ch)
+-{
+- if ((ch >= 'a') && (ch <= 'f'))
+- return (ch - 'a' + 10);
+- if ((ch >= '0') && (ch <= '9'))
+- return (ch - '0');
+- if ((ch >= 'A') && (ch <= 'F'))
+- return (ch - 'A' + 10);
+- return (-1);
+-}
+-
+-/* Convert the memory pointed to by mem into hex, placing result in buf.
+- Returns a pointer to the last char put in buf (null) */
+-static char *mem_to_hex(const char *mem, char *buf, const int count)
+-{
+- int i;
+- int ch;
+- unsigned short s_val;
+- unsigned long l_val;
+-
+- /* Check for 16 or 32 */
+- if (count == 2 && ((long) mem & 1) == 0) {
+- s_val = *(unsigned short *) mem;
+- mem = (char *) &s_val;
+- } else if (count == 4 && ((long) mem & 3) == 0) {
+- l_val = *(unsigned long *) mem;
+- mem = (char *) &l_val;
+- }
+- for (i = 0; i < count; i++) {
+- ch = *mem++;
+- *buf++ = highhex(ch);
+- *buf++ = lowhex(ch);
+- }
+- *buf = 0;
+- return (buf);
+-}
+-
+-/* Convert the hex array pointed to by buf into binary, to be placed in mem.
+- Return a pointer to the character after the last byte written */
+-static char *hex_to_mem(const char *buf, char *mem, const int count)
+-{
+- int i;
+- unsigned char ch;
+-
+- for (i = 0; i < count; i++) {
+- ch = hex(*buf++) << 4;
+- ch = ch + hex(*buf++);
+- *mem++ = ch;
+- }
+- return (mem);
+-}
+-
+-/* While finding valid hex chars, convert to an integer, then return it */
+-static int hex_to_int(char **ptr, int *int_value)
+-{
+- int num_chars = 0;
+- int hex_value;
+-
+- *int_value = 0;
+-
+- while (**ptr) {
+- hex_value = hex(**ptr);
+- if (hex_value >= 0) {
+- *int_value = (*int_value << 4) | hex_value;
+- num_chars++;
+- } else
+- break;
+- (*ptr)++;
+- }
+- return num_chars;
+-}
+-
+-/* Copy the binary array pointed to by buf into mem. Fix $, #,
+- and 0x7d escaped with 0x7d. Return a pointer to the character
+- after the last byte written. */
+-static char *ebin_to_mem(const char *buf, char *mem, int count)
+-{
+- for (; count > 0; count--, buf++) {
+- if (*buf == 0x7d)
+- *mem++ = *(++buf) ^ 0x20;
+- else
+- *mem++ = *buf;
+- }
+- return mem;
+-}
+-
+-/* Pack a hex byte */
+-static char *pack_hex_byte(char *pkt, int byte)
+-{
+- *pkt++ = hexchars[(byte >> 4) & 0xf];
+- *pkt++ = hexchars[(byte & 0xf)];
+- return pkt;
+-}
+-
+-/* Scan for the start char '$', read the packet and check the checksum */
+-static void get_packet(char *buffer, int buflen)
+-{
+- unsigned char checksum;
+- unsigned char xmitcsum;
+- int i;
+- int count;
+- char ch;
+-
+- do {
+- /* Ignore everything until the start character */
+- while ((ch = get_debug_char()) != '$');
+-
+- checksum = 0;
+- xmitcsum = -1;
+- count = 0;
+-
+- /* Now, read until a # or end of buffer is found */
+- while (count < (buflen - 1)) {
+- ch = get_debug_char();
+-
+- if (ch == '#')
+- break;
+-
+- checksum = checksum + ch;
+- buffer[count] = ch;
+- count = count + 1;
+- }
+-
+- buffer[count] = 0;
+-
+- /* Continue to read checksum following # */
+- if (ch == '#') {
+- xmitcsum = hex(get_debug_char()) << 4;
+- xmitcsum += hex(get_debug_char());
+-
+- /* Checksum */
+- if (checksum != xmitcsum)
+- put_debug_char('-'); /* Failed checksum */
+- else {
+- /* Ack successful transfer */
+- put_debug_char('+');
+-
+- /* If a sequence char is present, reply
+- the sequence ID */
+- if (buffer[2] == ':') {
+- put_debug_char(buffer[0]);
+- put_debug_char(buffer[1]);
+-
+- /* Remove sequence chars from buffer */
+- count = strlen(buffer);
+- for (i = 3; i <= count; i++)
+- buffer[i - 3] = buffer[i];
+- }
+- }
+- }
+- }
+- while (checksum != xmitcsum); /* Keep trying while we fail */
+-}
+-
+-/* Send the packet in the buffer with run-length encoding */
+-static void put_packet(char *buffer)
+-{
+- int checksum;
+- char *src;
+- int runlen;
+- int encode;
+-
+- do {
+- src = buffer;
+- put_debug_char('$');
+- checksum = 0;
+-
+- /* Continue while we still have chars left */
+- while (*src) {
+- /* Check for runs up to 99 chars long */
+- for (runlen = 1; runlen < 99; runlen++) {
+- if (src[0] != src[runlen])
+- break;
+- }
+-
+- if (runlen > 3) {
+- /* Got a useful amount, send encoding */
+- encode = runlen + ' ' - 4;
+- put_debug_char(*src); checksum += *src;
+- put_debug_char('*'); checksum += '*';
+- put_debug_char(encode); checksum += encode;
+- src += runlen;
+- } else {
+- /* Otherwise just send the current char */
+- put_debug_char(*src); checksum += *src;
+- src += 1;
+- }
+- }
+-
+- /* '#' Separator, put high and low components of checksum */
+- put_debug_char('#');
+- put_debug_char(highhex(checksum));
+- put_debug_char(lowhex(checksum));
+- }
+- while ((get_debug_char()) != '+'); /* While no ack */
+-}
+-
+-/* A bus error has occurred - perform a longjmp to return execution and
+- allow handling of the error */
+-static void kgdb_handle_bus_error(void)
+-{
+- longjmp(rem_com_env, 1);
+-}
+-
+-/* Translate SH-3/4 exception numbers to unix-like signal values */
+-static int compute_signal(const int excep_code)
+-{
+- int sigval;
+-
+- switch (excep_code) {
+-
+- case INVALID_INSN_VEC:
+- case INVALID_SLOT_VEC:
+- sigval = SIGILL;
+- break;
+- case ADDRESS_ERROR_LOAD_VEC:
+- case ADDRESS_ERROR_STORE_VEC:
+- sigval = SIGSEGV;
+- break;
+-
+- case SERIAL_BREAK_VEC:
+- case NMI_VEC:
+- sigval = SIGINT;
+- break;
+-
+- case USER_BREAK_VEC:
+- case TRAP_VEC:
+- sigval = SIGTRAP;
+- break;
+-
+- default:
+- sigval = SIGBUS; /* "software generated" */
+- break;
+- }
+-
+- return (sigval);
+-}
+-
+-/* Make a local copy of the registers passed into the handler (bletch) */
+-static void kgdb_regs_to_gdb_regs(const struct kgdb_regs *regs,
+- int *gdb_regs)
+-{
+- gdb_regs[R0] = regs->regs[R0];
+- gdb_regs[R1] = regs->regs[R1];
+- gdb_regs[R2] = regs->regs[R2];
+- gdb_regs[R3] = regs->regs[R3];
+- gdb_regs[R4] = regs->regs[R4];
+- gdb_regs[R5] = regs->regs[R5];
+- gdb_regs[R6] = regs->regs[R6];
+- gdb_regs[R7] = regs->regs[R7];
+- gdb_regs[R8] = regs->regs[R8];
+- gdb_regs[R9] = regs->regs[R9];
+- gdb_regs[R10] = regs->regs[R10];
+- gdb_regs[R11] = regs->regs[R11];
+- gdb_regs[R12] = regs->regs[R12];
+- gdb_regs[R13] = regs->regs[R13];
+- gdb_regs[R14] = regs->regs[R14];
+- gdb_regs[R15] = regs->regs[R15];
+- gdb_regs[PC] = regs->pc;
+- gdb_regs[PR] = regs->pr;
+- gdb_regs[GBR] = regs->gbr;
+- gdb_regs[MACH] = regs->mach;
+- gdb_regs[MACL] = regs->macl;
+- gdb_regs[SR] = regs->sr;
+- gdb_regs[VBR] = regs->vbr;
+-}
+-
+-/* Copy local gdb registers back to kgdb regs, for later copy to kernel */
+-static void gdb_regs_to_kgdb_regs(const int *gdb_regs,
+- struct kgdb_regs *regs)
+-{
+- regs->regs[R0] = gdb_regs[R0];
+- regs->regs[R1] = gdb_regs[R1];
+- regs->regs[R2] = gdb_regs[R2];
+- regs->regs[R3] = gdb_regs[R3];
+- regs->regs[R4] = gdb_regs[R4];
+- regs->regs[R5] = gdb_regs[R5];
+- regs->regs[R6] = gdb_regs[R6];
+- regs->regs[R7] = gdb_regs[R7];
+- regs->regs[R8] = gdb_regs[R8];
+- regs->regs[R9] = gdb_regs[R9];
+- regs->regs[R10] = gdb_regs[R10];
+- regs->regs[R11] = gdb_regs[R11];
+- regs->regs[R12] = gdb_regs[R12];
+- regs->regs[R13] = gdb_regs[R13];
+- regs->regs[R14] = gdb_regs[R14];
+- regs->regs[R15] = gdb_regs[R15];
+- regs->pc = gdb_regs[PC];
+- regs->pr = gdb_regs[PR];
+- regs->gbr = gdb_regs[GBR];
+- regs->mach = gdb_regs[MACH];
+- regs->macl = gdb_regs[MACL];
+- regs->sr = gdb_regs[SR];
+- regs->vbr = gdb_regs[VBR];
+-}
+-
+-/* Calculate the new address for after a step */
+-static short *get_step_address(void)
+-{
+- short op = *(short *) trap_registers.pc;
+- long addr;
+-
+- /* BT */
+- if (OPCODE_BT(op)) {
+- if (trap_registers.sr & SR_T_BIT_MASK)
+- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+- else
+- addr = trap_registers.pc + 2;
+- }
+-
+- /* BTS */
+- else if (OPCODE_BTS(op)) {
+- if (trap_registers.sr & SR_T_BIT_MASK)
+- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+- else
+- addr = trap_registers.pc + 4; /* Not in delay slot */
+- }
+-
+- /* BF */
+- else if (OPCODE_BF(op)) {
+- if (!(trap_registers.sr & SR_T_BIT_MASK))
+- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+- else
+- addr = trap_registers.pc + 2;
+- }
+-
+- /* BFS */
+- else if (OPCODE_BFS(op)) {
+- if (!(trap_registers.sr & SR_T_BIT_MASK))
+- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+- else
+- addr = trap_registers.pc + 4; /* Not in delay slot */
+- }
+-
+- /* BRA */
+- else if (OPCODE_BRA(op))
+- addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op);
+-
+- /* BRAF */
+- else if (OPCODE_BRAF(op))
+- addr = trap_registers.pc + 4
+- + trap_registers.regs[OPCODE_BRAF_REG(op)];
+-
+- /* BSR */
+- else if (OPCODE_BSR(op))
+- addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op);
+-
+- /* BSRF */
+- else if (OPCODE_BSRF(op))
+- addr = trap_registers.pc + 4
+- + trap_registers.regs[OPCODE_BSRF_REG(op)];
+-
+- /* JMP */
+- else if (OPCODE_JMP(op))
+- addr = trap_registers.regs[OPCODE_JMP_REG(op)];
+-
+- /* JSR */
+- else if (OPCODE_JSR(op))
+- addr = trap_registers.regs[OPCODE_JSR_REG(op)];
+-
+- /* RTS */
+- else if (OPCODE_RTS(op))
+- addr = trap_registers.pr;
+-
+- /* RTE */
+- else if (OPCODE_RTE(op))
+- addr = trap_registers.regs[15];
+-
+- /* Other */
+- else
+- addr = trap_registers.pc + 2;
+-
+- kgdb_flush_icache_range(addr, addr + 2);
+- return (short *) addr;
+-}
+-
+-/* Set up a single-step. Replace the instruction immediately after the
+- current instruction (i.e. next in the expected flow of control) with a
+- trap instruction, so that returning will cause only a single instruction
+- to be executed. Note that this model is slightly broken for instructions
+- with delay slots (e.g. B[TF]S, BSR, BRA etc), where both the branch
+- and the instruction in the delay slot will be executed. */
+-static void do_single_step(void)
+-{
+- unsigned short *addr = 0;
+-
+- /* Determine where the target instruction will send us to */
+- addr = get_step_address();
+- stepped_address = (int)addr;
+-
+- /* Replace it */
+- stepped_opcode = *(short *)addr;
+- *addr = STEP_OPCODE;
+-
+- /* Flush and return */
+- kgdb_flush_icache_range((long) addr, (long) addr + 2);
+- return;
+-}
+-
+-/* Undo a single step */
+-static void undo_single_step(void)
+-{
+- /* If we have stepped, put back the old instruction */
+- /* Use stepped_address in case we stopped elsewhere */
+- if (stepped_opcode != 0) {
+- *(short*)stepped_address = stepped_opcode;
+- kgdb_flush_icache_range(stepped_address, stepped_address + 2);
+- }
+- stepped_opcode = 0;
+-}
+-
+-/* Send a signal message */
+-static void send_signal_msg(const int signum)
+-{
+- out_buffer[0] = 'S';
+- out_buffer[1] = highhex(signum);
+- out_buffer[2] = lowhex(signum);
+- out_buffer[3] = 0;
+- put_packet(out_buffer);
+-}
+-
+-/* Reply that all was well */
+-static void send_ok_msg(void)
+-{
+- strcpy(out_buffer, "OK");
+- put_packet(out_buffer);
+-}
+-
+-/* Reply that an error occurred */
+-static void send_err_msg(void)
+-{
+- strcpy(out_buffer, "E01");
+- put_packet(out_buffer);
+-}
+-
+-/* Empty message indicates unrecognised command */
+-static void send_empty_msg(void)
+-{
+- put_packet("");
+-}
+-
+-/* Read memory due to 'm' message */
+-static void read_mem_msg(void)
+-{
+- char *ptr;
+- int addr;
+- int length;
+-
+- /* Jmp, disable bus error handler */
+- if (setjmp(rem_com_env) == 0) {
+-
+- kgdb_nofault = 1;
+-
+- /* Walk through, have m<addr>,<length> */
+- ptr = &in_buffer[1];
+- if (hex_to_int(&ptr, &addr) && (*ptr++ == ','))
+- if (hex_to_int(&ptr, &length)) {
+- ptr = 0;
+- if (length * 2 > OUTBUFMAX)
+- length = OUTBUFMAX / 2;
+- mem_to_hex((char *) addr, out_buffer, length);
+- }
+- if (ptr)
+- send_err_msg();
+- else
+- put_packet(out_buffer);
+- } else
+- send_err_msg();
+-
+- /* Restore bus error handler */
+- kgdb_nofault = 0;
+-}
+-
+-/* Write memory due to 'M' or 'X' message */
+-static void write_mem_msg(int binary)
+-{
+- char *ptr;
+- int addr;
+- int length;
+-
+- if (setjmp(rem_com_env) == 0) {
+-
+- kgdb_nofault = 1;
+-
+- /* Walk through, have M<addr>,<length>:<data> */
+- ptr = &in_buffer[1];
+- if (hex_to_int(&ptr, &addr) && (*ptr++ == ','))
+- if (hex_to_int(&ptr, &length) && (*ptr++ == ':')) {
+- if (binary)
+- ebin_to_mem(ptr, (char*)addr, length);
+- else
+- hex_to_mem(ptr, (char*)addr, length);
+- kgdb_flush_icache_range(addr, addr + length);
+- ptr = 0;
+- send_ok_msg();
+- }
+- if (ptr)
+- send_err_msg();
+- } else
+- send_err_msg();
+-
+- /* Restore bus error handler */
+- kgdb_nofault = 0;
+-}
+-
+-/* Continue message */
+-static void continue_msg(void)
+-{
+- /* Try to read optional parameter, PC unchanged if none */
+- char *ptr = &in_buffer[1];
+- int addr;
+-
+- if (hex_to_int(&ptr, &addr))
+- trap_registers.pc = addr;
+-}
+-
+-/* Continue message with signal */
+-static void continue_with_sig_msg(void)
+-{
+- int signal;
+- char *ptr = &in_buffer[1];
+- int addr;
+-
+- /* Report limitation */
+- kgdb_to_gdb("Cannot force signal in kgdb, continuing anyway.\n");
+-
+- /* Signal */
+- hex_to_int(&ptr, &signal);
+- if (*ptr == ';')
+- ptr++;
+-
+- /* Optional address */
+- if (hex_to_int(&ptr, &addr))
+- trap_registers.pc = addr;
+-}
+-
+-/* Step message */
+-static void step_msg(void)
+-{
+- continue_msg();
+- do_single_step();
+-}
+-
+-/* Step message with signal */
+-static void step_with_sig_msg(void)
+-{
+- continue_with_sig_msg();
+- do_single_step();
+-}
+-
+-/* Send register contents */
+-static void send_regs_msg(void)
+-{
+- kgdb_regs_to_gdb_regs(&trap_registers, registers);
+- mem_to_hex((char *) registers, out_buffer, NUMREGBYTES);
+- put_packet(out_buffer);
+-}
+-
+-/* Set register contents - currently can't set other thread's registers */
+-static void set_regs_msg(void)
+-{
+- kgdb_regs_to_gdb_regs(&trap_registers, registers);
+- hex_to_mem(&in_buffer[1], (char *) registers, NUMREGBYTES);
+- gdb_regs_to_kgdb_regs(registers, &trap_registers);
+- send_ok_msg();
+-}
+-
+-#ifdef CONFIG_SH_KGDB_CONSOLE
+-/*
+- * Bring up the ports..
+- */
+-static int kgdb_serial_setup(void)
+-{
+- extern int kgdb_console_setup(struct console *co, char *options);
+- struct console dummy;
+-
+- kgdb_console_setup(&dummy, 0);
+-
+- return 0;
+-}
+-#else
+-#define kgdb_serial_setup() 0
+-#endif
+-
+-/* The command loop, read and act on requests */
+-static void kgdb_command_loop(const int excep_code, const int trapa_value)
+-{
+- int sigval;
+-
+- if (excep_code == NMI_VEC) {
+-#ifndef CONFIG_KGDB_NMI
+- printk(KERN_NOTICE "KGDB: Ignoring unexpected NMI?\n");
+- return;
+-#else /* CONFIG_KGDB_NMI */
+- if (!kgdb_enabled) {
+- kgdb_enabled = 1;
+- kgdb_init();
+- }
+-#endif /* CONFIG_KGDB_NMI */
+- }
+-
+- /* Ignore if we're disabled */
+- if (!kgdb_enabled)
+- return;
+-
+- /* Enter GDB mode (e.g. after detach) */
+- if (!kgdb_in_gdb_mode) {
+- /* Do serial setup, notify user, issue preemptive ack */
+- printk(KERN_NOTICE "KGDB: Waiting for GDB\n");
+- kgdb_in_gdb_mode = 1;
+- put_debug_char('+');
+- }
+-
+- /* Reply to host that an exception has occurred */
+- sigval = compute_signal(excep_code);
+- send_signal_msg(sigval);
+-
+- /* TRAP_VEC exception indicates a software trap inserted in place of
+- code by GDB so back up PC by one instruction, as this instruction
+- will later be replaced by its original one. Do NOT do this for
+- trap 0xff, since that indicates a compiled-in breakpoint which
+- will not be replaced (and we would retake the trap forever) */
+- if ((excep_code == TRAP_VEC) && (trapa_value != (0x3c << 2)))
+- trap_registers.pc -= 2;
+-
+- /* Undo any stepping we may have done */
+- undo_single_step();
+-
+- while (1) {
+- out_buffer[0] = 0;
+- get_packet(in_buffer, BUFMAX);
+-
+- /* Examine first char of buffer to see what we need to do */
+- switch (in_buffer[0]) {
+- case '?': /* Send which signal we've received */
+- send_signal_msg(sigval);
+- break;
+-
+- case 'g': /* Return the values of the CPU registers */
+- send_regs_msg();
+- break;
+-
+- case 'G': /* Set the value of the CPU registers */
+- set_regs_msg();
+- break;
+-
+- case 'm': /* Read LLLL bytes address AA..AA */
+- read_mem_msg();
+- break;
+-
+- case 'M': /* Write LLLL bytes address AA..AA, ret OK */
+- write_mem_msg(0); /* 0 = data in hex */
+- break;
+-
+- case 'X': /* Write LLLL bytes esc bin address AA..AA */
+- if (kgdb_bits == '8')
+- write_mem_msg(1); /* 1 = data in binary */
+- else
+- send_empty_msg();
+- break;
+-
+- case 'C': /* Continue, signum included, we ignore it */
+- continue_with_sig_msg();
+- return;
+-
+- case 'c': /* Continue at address AA..AA (optional) */
+- continue_msg();
+- return;
+-
+- case 'S': /* Step, signum included, we ignore it */
+- step_with_sig_msg();
+- return;
+-
+- case 's': /* Step one instruction from AA..AA */
+- step_msg();
+- return;
+-
+- case 'k': /* 'Kill the program' with a kernel ? */
+- break;
+-
+- case 'D': /* Detach from program, send reply OK */
+- kgdb_in_gdb_mode = 0;
+- send_ok_msg();
+- get_debug_char();
+- return;
+-
+- default:
+- send_empty_msg();
+- break;
+- }
+- }
+-}
+-
+-/* There has been an exception, most likely a breakpoint. */
+-static void handle_exception(struct pt_regs *regs)
+-{
+- int excep_code, vbr_val;
+- int count;
+- int trapa_value = ctrl_inl(TRA);
+-
+- /* Copy kernel regs (from stack) */
+- for (count = 0; count < 16; count++)
+- trap_registers.regs[count] = regs->regs[count];
+- trap_registers.pc = regs->pc;
+- trap_registers.pr = regs->pr;
+- trap_registers.sr = regs->sr;
+- trap_registers.gbr = regs->gbr;
+- trap_registers.mach = regs->mach;
+- trap_registers.macl = regs->macl;
+-
+- asm("stc vbr, %0":"=r"(vbr_val));
+- trap_registers.vbr = vbr_val;
+-
+- /* Get excode for command loop call, user access */
+- asm("stc r2_bank, %0":"=r"(excep_code));
+- kgdb_excode = excep_code;
+-
+- /* Other interesting environment items for reference */
+- asm("stc r6_bank, %0":"=r"(kgdb_g_imask));
+- kgdb_current = current;
+- kgdb_trapa_val = trapa_value;
+-
+- /* Act on the exception */
+- kgdb_command_loop(excep_code, trapa_value);
+-
+- kgdb_current = NULL;
+-
+- /* Copy back the (maybe modified) registers */
+- for (count = 0; count < 16; count++)
+- regs->regs[count] = trap_registers.regs[count];
+- regs->pc = trap_registers.pc;
+- regs->pr = trap_registers.pr;
+- regs->sr = trap_registers.sr;
+- regs->gbr = trap_registers.gbr;
+- regs->mach = trap_registers.mach;
+- regs->macl = trap_registers.macl;
+-
+- vbr_val = trap_registers.vbr;
+- asm("ldc %0, vbr": :"r"(vbr_val));
+-}
+-
+-asmlinkage void kgdb_handle_exception(unsigned long r4, unsigned long r5,
+- unsigned long r6, unsigned long r7,
+- struct pt_regs __regs)
+-{
+- struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
+- handle_exception(regs);
+-}
+-
+-/* Initialise the KGDB data structures and serial configuration */
+-int kgdb_init(void)
+-{
+- if (!kgdb_enabled)
+- return 1;
+-
+- in_nmi = 0;
+- kgdb_nofault = 0;
+- stepped_opcode = 0;
+- kgdb_in_gdb_mode = 0;
+-
+- if (kgdb_serial_setup() != 0) {
+- printk(KERN_NOTICE "KGDB: serial setup error\n");
+- return -1;
+- }
+-
+- /* Init ptr to exception handler */
+- kgdb_debug_hook = handle_exception;
+- kgdb_bus_err_hook = kgdb_handle_bus_error;
+-
+- /* Enter kgdb now if requested, or just report init done */
+- printk(KERN_NOTICE "KGDB: stub is initialized.\n");
+-
+- return 0;
+-}
+-
+-/* Make function available for "user messages"; console will use it too. */
+-
+-char gdbmsgbuf[BUFMAX];
+-#define MAXOUT ((BUFMAX-2)/2)
+-
+-static void kgdb_msg_write(const char *s, unsigned count)
+-{
+- int i;
+- int wcount;
+- char *bufptr;
+-
+- /* 'O'utput */
+- gdbmsgbuf[0] = 'O';
+-
+- /* Fill and send buffers... */
+- while (count > 0) {
+- bufptr = gdbmsgbuf + 1;
+-
+- /* Calculate how many this time */
+- wcount = (count > MAXOUT) ? MAXOUT : count;
+-
+- /* Pack in hex chars */
+- for (i = 0; i < wcount; i++)
+- bufptr = pack_hex_byte(bufptr, s[i]);
+- *bufptr = '\0';
+-
+- /* Move up */
+- s += wcount;
+- count -= wcount;
+-
+- /* Write packet */
+- put_packet(gdbmsgbuf);
+- }
+-}
+-
+-static void kgdb_to_gdb(const char *s)
+-{
+- kgdb_msg_write(s, strlen(s));
+-}
+-
+-#ifdef CONFIG_SH_KGDB_CONSOLE
+-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+-{
+- /* Bail if we're not talking to GDB */
+- if (!kgdb_in_gdb_mode)
+- return;
+-
+- kgdb_msg_write(s, count);
+-}
+-#endif
+-
+-#ifdef CONFIG_KGDB_SYSRQ
+-static void sysrq_handle_gdb(int key, struct tty_struct *tty)
+-{
+- printk("Entering GDB stub\n");
+- breakpoint();
+-}
+-
+-static struct sysrq_key_op sysrq_gdb_op = {
+- .handler = sysrq_handle_gdb,
+- .help_msg = "Gdb",
+- .action_msg = "GDB",
+-};
+-
+-static int gdb_register_sysrq(void)
+-{
+- printk("Registering GDB sysrq handler\n");
+- register_sysrq_key('g', &sysrq_gdb_op);
+- return 0;
+-}
+-module_init(gdb_register_sysrq);
+-#endif
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/time.c linux-2.6.22-591/arch/sh/kernel/time.c
+--- linux-2.6.22-570/arch/sh/kernel/time.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/time.c 2007-12-21 15:36:11.000000000 -0500
+@@ -259,11 +259,4 @@
+ ((sh_hpt_frequency + 500) / 1000) / 1000,
+ ((sh_hpt_frequency + 500) / 1000) % 1000);
+
+-#if defined(CONFIG_SH_KGDB)
+- /*
+- * Set up kgdb as requested. We do it here because the serial
+- * init uses the timer vars we just set up for figuring baud.
+- */
+- kgdb_init();
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/traps.c linux-2.6.22-591/arch/sh/kernel/traps.c
+--- linux-2.6.22-570/arch/sh/kernel/traps.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -25,16 +25,10 @@
+ #include <linux/limits.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <linux/kgdb.h>
+
+-#ifdef CONFIG_SH_KGDB
+-#include <asm/kgdb.h>
+-#define CHK_REMOTE_DEBUG(regs) \
+-{ \
+- if (kgdb_debug_hook && !user_mode(regs))\
+- (*kgdb_debug_hook)(regs); \
+-}
+-#else
+-#define CHK_REMOTE_DEBUG(regs)
++#ifndef CONFIG_KGDB
++#define kgdb_handle_exception(t, s, e, r)
+ #endif
+
+ #ifdef CONFIG_CPU_SH2
+@@ -91,7 +85,9 @@
+
+ printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+
+- CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++ kgdb_handle_exception(1, SIGTRAP, err, regs);
++#endif
+ print_modules();
+ show_regs(regs);
+
+@@ -700,7 +696,9 @@
+ lookup_exception_vector(error_code);
+
+ local_irq_enable();
+- CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++ kgdb_handle_exception(1, SIGILL, err, regs);
++#endif
+ force_sig(SIGILL, tsk);
+ die_if_no_fixup("reserved instruction", regs, error_code);
+ }
+@@ -771,7 +769,9 @@
+ lookup_exception_vector(error_code);
+
+ local_irq_enable();
+- CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++ kgdb_handle_exception(1, SIGILL, err, regs);
++#endif
+ force_sig(SIGILL, tsk);
+ die_if_no_fixup("illegal slot instruction", regs, error_code);
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/mm/extable.c linux-2.6.22-591/arch/sh/mm/extable.c
+--- linux-2.6.22-570/arch/sh/mm/extable.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/mm/extable.c 2007-12-21 15:36:11.000000000 -0500
+@@ -5,6 +5,7 @@
+ */
+
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/uaccess.h>
+
+ int fixup_exception(struct pt_regs *regs)
+@@ -16,6 +17,12 @@
+ regs->pc = fixup->fixup;
+ return 1;
+ }
++#ifdef CONFIG_KGDB
++ if (atomic_read(&debugger_active) && kgdb_may_fault)
++ /* Restore our previous state. */
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ /* Never reached. */
++#endif
+
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/mm/fault-nommu.c linux-2.6.22-591/arch/sh/mm/fault-nommu.c
+--- linux-2.6.22-570/arch/sh/mm/fault-nommu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sh/mm/fault-nommu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -28,10 +28,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/cacheflush.h>
+
+-#if defined(CONFIG_SH_KGDB)
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void die(const char *,struct pt_regs *,long);
+
+ /*
+@@ -42,11 +38,6 @@
+ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+ unsigned long address)
+ {
+-#if defined(CONFIG_SH_KGDB)
+- if (kgdb_nofault && kgdb_bus_err_hook)
+- kgdb_bus_err_hook();
+-#endif
+-
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+@@ -68,11 +59,6 @@
+ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+ unsigned long address)
+ {
+-#if defined(CONFIG_SH_KGDB)
+- if (kgdb_nofault && kgdb_bus_err_hook)
+- kgdb_bus_err_hook();
+-#endif
+-
+ if (address >= TASK_SIZE)
+ return 1;
+
+diff -Nurb linux-2.6.22-570/arch/sh/mm/fault.c linux-2.6.22-591/arch/sh/mm/fault.c
+--- linux-2.6.22-570/arch/sh/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/sh/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -18,7 +18,6 @@
+ #include <asm/system.h>
+ #include <asm/mmu_context.h>
+ #include <asm/tlbflush.h>
+-#include <asm/kgdb.h>
+
+ /*
+ * This routine handles page faults. It determines the address,
+@@ -39,11 +38,6 @@
+ trace_hardirqs_on();
+ local_irq_enable();
+
+-#ifdef CONFIG_SH_KGDB
+- if (kgdb_nofault && kgdb_bus_err_hook)
+- kgdb_bus_err_hook();
+-#endif
+-
+ tsk = current;
+ mm = tsk->mm;
+ si_code = SEGV_MAPERR;
+@@ -189,6 +183,7 @@
+ }
+ die("Oops", regs, writeaccess);
+ do_exit(SIGKILL);
++ dump_stack();
+
+ /*
+ * We ran out of memory, or some other thing happened to us that made
+@@ -252,11 +247,6 @@
+ spinlock_t *ptl = NULL;
+ int ret = 1;
+
+-#ifdef CONFIG_SH_KGDB
+- if (kgdb_nofault && kgdb_bus_err_hook)
+- kgdb_bus_err_hook();
+-#endif
+-
+ /*
+ * We don't take page faults for P1, P2, and parts of P4, these
+ * are always mapped, whether it be due to legacy behaviour in
+diff -Nurb linux-2.6.22-570/arch/sparc64/kernel/power.c linux-2.6.22-591/arch/sparc64/kernel/power.c
+--- linux-2.6.22-570/arch/sparc64/kernel/power.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sparc64/kernel/power.c 2007-12-21 15:36:11.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/pm.h>
+ #include <linux/syscalls.h>
++#include <linux/reboot.h>
+
+ #include <asm/system.h>
+ #include <asm/auxio.h>
+@@ -33,14 +34,13 @@
+ #include <linux/pci.h>
+ static void __iomem *power_reg;
+
+-static DECLARE_WAIT_QUEUE_HEAD(powerd_wait);
+ static int button_pressed;
+
+ static irqreturn_t power_handler(int irq, void *dev_id)
+ {
+ if (button_pressed == 0) {
+ button_pressed = 1;
+- wake_up(&powerd_wait);
++ orderly_poweroff(true);
+ }
+
+ /* FIXME: Check registers for status... */
+@@ -77,36 +77,6 @@
+ EXPORT_SYMBOL(pm_power_off);
+
+ #ifdef CONFIG_PCI
+-static int powerd(void *__unused)
+-{
+- static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+- char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
+- DECLARE_WAITQUEUE(wait, current);
+-
+- daemonize("powerd");
+-
+- add_wait_queue(&powerd_wait, &wait);
+-again:
+- for (;;) {
+- set_task_state(current, TASK_INTERRUPTIBLE);
+- if (button_pressed)
+- break;
+- flush_signals(current);
+- schedule();
+- }
+- __set_current_state(TASK_RUNNING);
+- remove_wait_queue(&powerd_wait, &wait);
+-
+- /* Ok, down we go... */
+- button_pressed = 0;
+- if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
+- printk("powerd: shutdown execution failed\n");
+- add_wait_queue(&powerd_wait, &wait);
+- goto again;
+- }
+- return 0;
+-}
+-
+ static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
+ {
+ if (irq == PCI_IRQ_NONE)
+@@ -130,12 +100,6 @@
+ poweroff_method = machine_halt; /* able to use the standard halt */
+
+ if (has_button_interrupt(irq, op->node)) {
+- if (kernel_thread(powerd, NULL, CLONE_FS) < 0) {
+- printk("Failed to start power daemon.\n");
+- return 0;
+- }
+- printk("powerd running.\n");
+-
+ if (request_irq(irq,
+ power_handler, 0, "power", NULL) < 0)
+ printk("power: Error, cannot register IRQ handler.\n");
+diff -Nurb linux-2.6.22-570/arch/sparc64/solaris/ioctl.c linux-2.6.22-591/arch/sparc64/solaris/ioctl.c
+--- linux-2.6.22-570/arch/sparc64/solaris/ioctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/sparc64/solaris/ioctl.c 2007-12-21 15:36:14.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/compat.h>
+
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/termios.h>
+@@ -686,7 +687,7 @@
+ int i = 0;
+
+ read_lock_bh(&dev_base_lock);
+- for_each_netdev(d)
++ for_each_netdev(&init_net, d)
+ i++;
+ read_unlock_bh(&dev_base_lock);
+
+diff -Nurb linux-2.6.22-570/arch/um/Kconfig.debug linux-2.6.22-591/arch/um/Kconfig.debug
+--- linux-2.6.22-570/arch/um/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/um/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -47,4 +47,13 @@
+ If you're involved in UML kernel development and want to use gcov,
+ say Y. If you're unsure, say N.
+
++config DEBUG_STACK_USAGE
++ bool "Stack utilization instrumentation"
++ default N
++ help
++ Track the maximum kernel stack usage - this will look at each
++ kernel stack at process exit and log it if it's the deepest
++ stack seen so far.
++
++ This option will slow down process creation and destruction somewhat.
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/um/defconfig linux-2.6.22-591/arch/um/defconfig
+--- linux-2.6.22-570/arch/um/defconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/um/defconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -527,3 +527,4 @@
+ # CONFIG_RCU_TORTURE_TEST is not set
+ # CONFIG_GPROF is not set
+ # CONFIG_GCOV is not set
++# CONFIG_DEBUG_STACK_USAGE is not set
+diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig linux-2.6.22-591/arch/x86_64/Kconfig
+--- linux-2.6.22-570/arch/x86_64/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -698,6 +698,8 @@
+
+ source "arch/x86_64/kernel/cpufreq/Kconfig"
+
++source "drivers/cpuidle/Kconfig"
++
+ endmenu
+
+ menu "Bus options (PCI etc.)"
+diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig.debug linux-2.6.22-591/arch/x86_64/Kconfig.debug
+--- linux-2.6.22-570/arch/x86_64/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/x86_64/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500
+@@ -55,7 +55,4 @@
+
+ This option will slow down process creation somewhat.
+
+-#config X86_REMOTE_DEBUG
+-# bool "kgdb debugging stub"
+-
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/x86_64/Makefile linux-2.6.22-591/arch/x86_64/Makefile
+--- linux-2.6.22-570/arch/x86_64/Makefile 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -41,7 +41,9 @@
+ cflags-y += -mcmodel=kernel
+ cflags-y += -pipe
+ cflags-y += -Wno-sign-compare
++ifneq ($(CONFIG_UNWIND_INFO),y)
+ cflags-y += -fno-asynchronous-unwind-tables
++endif
+ ifneq ($(CONFIG_DEBUG_INFO),y)
+ # -fweb shrinks the kernel a bit, but the difference is very small
+ # it also messes up debugging, so don't use it for now.
+diff -Nurb linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S
+--- linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:36:11.000000000 -0500
+@@ -731,4 +731,7 @@
+ .quad compat_sys_signalfd
+ .quad compat_sys_timerfd
+ .quad sys_eventfd
++ .quad sys_revokeat
++ .quad sys_frevoke /* 325 */
++ .quad sys_fallocate
+ ia32_syscall_end:
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/Makefile linux-2.6.22-591/arch/x86_64/kernel/Makefile
+--- linux-2.6.22-570/arch/x86_64/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -33,10 +33,12 @@
+ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
+ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o
+ obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
+ obj-$(CONFIG_X86_VSMP) += vsmp.o
+ obj-$(CONFIG_K8_NB) += k8.o
+ obj-$(CONFIG_AUDIT) += audit.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_PCI) += early-quirks.o
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,65 @@
++/*
++ * arch/x86_64/kernel/kgdb-jmp.S
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 2001, 2003, 2004 Free Software Foundation, Inc.
++ * Copyright (C) 2005 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/linkage.h>
++
++#define JB_RBX 0
++#define JB_RBP 1
++#define JB_R12 2
++#define JB_R13 3
++#define JB_R14 4
++#define JB_R15 5
++#define JB_RSP 6
++#define JB_PC 7
++
++ .code64
++
++/* This must be called prior to kgdb_fault_longjmp and
++ * kgdb_fault_longjmp must not be called outside of the context of the
++ * last call to kgdb_fault_setjmp.
++ */
++ENTRY(kgdb_fault_setjmp)
++ /* Save registers. */
++ movq %rbx, (JB_RBX*8)(%rdi)
++ movq %rbp, (JB_RBP*8)(%rdi)
++ movq %r12, (JB_R12*8)(%rdi)
++ movq %r13, (JB_R13*8)(%rdi)
++ movq %r14, (JB_R14*8)(%rdi)
++ movq %r15, (JB_R15*8)(%rdi)
++ leaq 8(%rsp), %rdx /* Save SP as it will be after we return. */
++ movq %rdx, (JB_RSP*8)(%rdi)
++ movq (%rsp), %rax /* Save PC we are returning to now. */
++ movq %rax, (JB_PC*8)(%rdi)
++ /* Set return value for setjmp. */
++ mov $0,%eax
++ movq (JB_PC*8)(%rdi),%rdx
++ movq (JB_RSP*8)(%rdi),%rsp
++ jmpq *%rdx
++
++ENTRY(kgdb_fault_longjmp)
++ /* Restore registers. */
++ movq (JB_RBX*8)(%rdi),%rbx
++ movq (JB_RBP*8)(%rdi),%rbp
++ movq (JB_R12*8)(%rdi),%r12
++ movq (JB_R13*8)(%rdi),%r13
++ movq (JB_R14*8)(%rdi),%r14
++ movq (JB_R15*8)(%rdi),%r15
++ /* Set return value for setjmp. */
++ movq (JB_PC*8)(%rdi),%rdx
++ movq (JB_RSP*8)(%rdi),%rsp
++ mov $1,%eax
++ jmpq *%rdx
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb.c linux-2.6.22-591/arch/x86_64/kernel/kgdb.c
+--- linux-2.6.22-570/arch/x86_64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,461 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2004 Amit S. Kale <amitkale@linsyssoft.com>
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2002 Andi Kleen, SuSE Labs
++ * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd.
++ * Copyright (C) 2007 Jason Wessel, Wind River Systems, Inc.
++ */
++/****************************************************************************
++ * Contributor: Lake Stevens Instrument Division$
++ * Written by: Glenn Engel $
++ * Updated by: Amit Kale<akale@veritas.com>
++ * Modified for 386 by Jim Kingdon, Cygnus Support.
++ * Origianl kgdb, compatibility with 2.1.xx kernel by
++ * David Grothe <dave@gcom.com>
++ * Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
++ * X86_64 changes from Andi Kleen's patch merged by Jim Houston
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h> /* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/apicdef.h>
++#include <asm/mach_apic.h>
++#include <asm/kdebug.h>
++#include <asm/debugreg.h>
++
++/* Put the error code here just in case the user cares. */
++int gdb_x86_64errcode;
++/* Likewise, the vector number here (since GDB only gets the signal
++ number through the usual means, and that's not very specific). */
++int gdb_x86_64vector = -1;
++
++extern atomic_t cpu_doing_single_step;
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ gdb_regs[_RAX] = regs->rax;
++ gdb_regs[_RBX] = regs->rbx;
++ gdb_regs[_RCX] = regs->rcx;
++ gdb_regs[_RDX] = regs->rdx;
++ gdb_regs[_RSI] = regs->rsi;
++ gdb_regs[_RDI] = regs->rdi;
++ gdb_regs[_RBP] = regs->rbp;
++ gdb_regs[_PS] = regs->eflags;
++ gdb_regs[_PC] = regs->rip;
++ gdb_regs[_R8] = regs->r8;
++ gdb_regs[_R9] = regs->r9;
++ gdb_regs[_R10] = regs->r10;
++ gdb_regs[_R11] = regs->r11;
++ gdb_regs[_R12] = regs->r12;
++ gdb_regs[_R13] = regs->r13;
++ gdb_regs[_R14] = regs->r14;
++ gdb_regs[_R15] = regs->r15;
++ gdb_regs[_RSP] = regs->rsp;
++}
++
++extern void thread_return(void);
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++ gdb_regs[_RAX] = 0;
++ gdb_regs[_RBX] = 0;
++ gdb_regs[_RCX] = 0;
++ gdb_regs[_RDX] = 0;
++ gdb_regs[_RSI] = 0;
++ gdb_regs[_RDI] = 0;
++ gdb_regs[_RBP] = *(unsigned long *)p->thread.rsp;
++ gdb_regs[_PS] = *(unsigned long *)(p->thread.rsp + 8);
++ gdb_regs[_PC] = (unsigned long)&thread_return;
++ gdb_regs[_R8] = 0;
++ gdb_regs[_R9] = 0;
++ gdb_regs[_R10] = 0;
++ gdb_regs[_R11] = 0;
++ gdb_regs[_R12] = 0;
++ gdb_regs[_R13] = 0;
++ gdb_regs[_R14] = 0;
++ gdb_regs[_R15] = 0;
++ gdb_regs[_RSP] = p->thread.rsp;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++ regs->rax = gdb_regs[_RAX];
++ regs->rbx = gdb_regs[_RBX];
++ regs->rcx = gdb_regs[_RCX];
++ regs->rdx = gdb_regs[_RDX];
++ regs->rsi = gdb_regs[_RSI];
++ regs->rdi = gdb_regs[_RDI];
++ regs->rbp = gdb_regs[_RBP];
++ regs->eflags = gdb_regs[_PS];
++ regs->rip = gdb_regs[_PC];
++ regs->r8 = gdb_regs[_R8];
++ regs->r9 = gdb_regs[_R9];
++ regs->r10 = gdb_regs[_R10];
++ regs->r11 = gdb_regs[_R11];
++ regs->r12 = gdb_regs[_R12];
++ regs->r13 = gdb_regs[_R13];
++ regs->r14 = gdb_regs[_R14];
++ regs->r15 = gdb_regs[_R15];
++#if 0 /* can't change these */
++ regs->rsp = gdb_regs[_RSP];
++ regs->ss = gdb_regs[_SS];
++ regs->fs = gdb_regs[_FS];
++ regs->gs = gdb_regs[_GS];
++#endif
++
++} /* gdb_regs_to_regs */
++
++struct hw_breakpoint {
++ unsigned enabled;
++ unsigned type;
++ unsigned len;
++ unsigned long addr;
++} breakinfo[4] = { {
++enabled:0}, {
++enabled:0}, {
++enabled:0}, {
++enabled:0}};
++
++static void kgdb_correct_hw_break(void)
++{
++ int breakno;
++ int correctit;
++ int breakbit;
++ unsigned long dr7;
++
++ get_debugreg(dr7, 7);
++ correctit = 0;
++ for (breakno = 0; breakno < 3; breakno++) {
++ breakbit = 2 << (breakno << 1);
++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
++ correctit = 1;
++ dr7 |= breakbit;
++ dr7 &= ~(0xf0000 << (breakno << 2));
++ dr7 |= (((breakinfo[breakno].len << 2) |
++ breakinfo[breakno].type) << 16) <<
++ (breakno << 2);
++ switch (breakno) {
++ case 0:
++ set_debugreg(breakinfo[breakno].addr, 0);
++ break;
++
++ case 1:
++ set_debugreg(breakinfo[breakno].addr, 1);
++ break;
++
++ case 2:
++ set_debugreg(breakinfo[breakno].addr, 2);
++ break;
++
++ case 3:
++ set_debugreg(breakinfo[breakno].addr, 3);
++ break;
++ }
++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
++ correctit = 1;
++ dr7 &= ~breakbit;
++ dr7 &= ~(0xf0000 << (breakno << 2));
++ }
++ }
++ if (correctit)
++ set_debugreg(dr7, 7);
++}
++
++static int kgdb_remove_hw_break(unsigned long addr, int len,
++ enum kgdb_bptype bptype)
++{
++ int i, idx = -1;
++ for (i = 0; i < 4; i++) {
++ if (breakinfo[i].addr == addr && breakinfo[i].enabled) {
++ idx = i;
++ break;
++ }
++ }
++ if (idx == -1)
++ return -1;
++
++ breakinfo[idx].enabled = 0;
++ return 0;
++}
++
++static void kgdb_remove_all_hw_break(void)
++{
++ int i;
++
++ for (i = 0; i < 4; i++) {
++ memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint));
++ }
++}
++
++static int kgdb_set_hw_break(unsigned long addr, int len,
++ enum kgdb_bptype bptype)
++{
++ int i, idx = -1;
++ for (i = 0; i < 4; i++) {
++ if (!breakinfo[i].enabled) {
++ idx = i;
++ break;
++ }
++ }
++ if (idx == -1)
++ return -1;
++ if (bptype == bp_hardware_breakpoint) {
++ breakinfo[idx].type = 0;
++ breakinfo[idx].len = 0;
++ } else if (bptype == bp_write_watchpoint) {
++ breakinfo[idx].type = 1;
++ if (len == 1 || len == 2 || len == 4)
++ breakinfo[idx].len = len - 1;
++ else
++ return -1;
++ } else if (bptype == bp_access_watchpoint) {
++ breakinfo[idx].type = 3;
++ if (len == 1 || len == 2 || len == 4)
++ breakinfo[idx].len = len - 1;
++ else
++ return -1;
++ } else
++ return -1;
++ breakinfo[idx].enabled = 1;
++ breakinfo[idx].addr = addr;
++ return 0;
++}
++
++void kgdb_disable_hw_debug(struct pt_regs *regs)
++{
++ /* Disable hardware debugging while we are in kgdb */
++ set_debugreg(0UL, 7);
++}
++
++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code)
++{
++ /* Master processor is completely in the debugger */
++ gdb_x86_64vector = e_vector;
++ gdb_x86_64errcode = err_code;
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++ send_IPI_allbutself(APIC_DM_NMI);
++}
++
++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
++ char *remcomInBuffer, char *remcomOutBuffer,
++ struct pt_regs *linux_regs)
++{
++ unsigned long addr;
++ unsigned long breakno;
++ char *ptr;
++ int newPC;
++ unsigned long dr6;
++
++ switch (remcomInBuffer[0]) {
++ case 'c':
++ case 's':
++ /* try to read optional parameter, pc unchanged if no parm */
++ ptr = &remcomInBuffer[1];
++ if (kgdb_hex2long(&ptr, &addr))
++ linux_regs->rip = addr;
++ newPC = linux_regs->rip;
++
++ /* clear the trace bit */
++ linux_regs->eflags &= ~TF_MASK;
++
++ atomic_set(&cpu_doing_single_step, -1);
++ /* set the trace bit if we're stepping */
++ if (remcomInBuffer[0] == 's') {
++ linux_regs->eflags |= TF_MASK;
++ debugger_step = 1;
++ if (kgdb_contthread)
++ atomic_set(&cpu_doing_single_step,
++ raw_smp_processor_id());
++
++ }
++
++ get_debugreg(dr6, 6);
++ if (!(dr6 & 0x4000)) {
++ for (breakno = 0; breakno < 4; ++breakno) {
++ if (dr6 & (1 << breakno)) {
++ if (breakinfo[breakno].type == 0) {
++ /* Set restore flag */
++ linux_regs->eflags |=
++ X86_EFLAGS_RF;
++ break;
++ }
++ }
++ }
++ }
++ set_debugreg(0UL, 6);
++ kgdb_correct_hw_break();
++
++ return (0);
++ } /* switch */
++ return -1;
++}
++
++static struct pt_regs *in_interrupt_stack(unsigned long rsp, int cpu)
++{
++ struct pt_regs *regs;
++ unsigned long end = (unsigned long)cpu_pda(cpu)->irqstackptr;
++ if (rsp <= end && rsp >= end - IRQSTACKSIZE + 8) {
++ regs = *(((struct pt_regs **)end) - 1);
++ return regs;
++ }
++ return NULL;
++}
++
++static struct pt_regs *in_exception_stack(unsigned long rsp, int cpu)
++{
++ int i;
++ struct tss_struct *init_tss = &__get_cpu_var(init_tss);
++ for (i = 0; i < N_EXCEPTION_STACKS; i++)
++ if (rsp >= init_tss[cpu].ist[i] &&
++ rsp <= init_tss[cpu].ist[i] + EXCEPTION_STKSZ) {
++ struct pt_regs *r =
++ (void *)init_tss[cpu].ist[i] + EXCEPTION_STKSZ;
++ return r - 1;
++ }
++ return NULL;
++}
++
++void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, unsigned threadid)
++{
++ static char intr_desc[] = "Stack at interrupt entrypoint";
++ static char exc_desc[] = "Stack at exception entrypoint";
++ struct pt_regs *stregs;
++ int cpu = raw_smp_processor_id();
++
++ if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++ kgdb_mem2hex(intr_desc, buffer, strlen(intr_desc));
++ else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++ kgdb_mem2hex(exc_desc, buffer, strlen(exc_desc));
++}
++
++struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, int threadid)
++{
++ struct pt_regs *stregs;
++ int cpu = raw_smp_processor_id();
++
++ if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++ return current;
++ else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++ return current;
++
++ return NULL;
++}
++
++struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid)
++{
++ struct pt_regs *stregs;
++ int cpu = raw_smp_processor_id();
++
++ if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++ return stregs;
++ else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++ return stregs;
++
++ return NULL;
++}
++
++/* Register KGDB with the die_chain so that we hook into all of the right
++ * spots. */
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++ void *ptr)
++{
++ struct die_args *args = ptr;
++ struct pt_regs *regs = args->regs;
++
++ if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active)
++ && kgdb_may_fault) {
++ kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++ return NOTIFY_STOP;
++ /* CPU roundup? */
++ } else if (atomic_read(&debugger_active) && cmd == DIE_NMI_IPI) {
++ kgdb_nmihook(raw_smp_processor_id(), regs);
++ return NOTIFY_STOP;
++ /* See if KGDB is interested. */
++ } else if (cmd == DIE_DEBUG
++ && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id()
++ && user_mode(regs)) {
++ /* single step exception from kernel space to user space so
++ * eat the exception and continue the process
++ */
++ printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n");
++ kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs);
++ return NOTIFY_STOP;
++ } else if (cmd == DIE_PAGE_FAULT || user_mode(regs) ||
++ cmd == DIE_NMI_IPI || (cmd == DIE_DEBUG &&
++ atomic_read(&debugger_active)))
++ /* Userpace events, normal watchdog event, or spurious
++ * debug exception. Ignore. */
++ return NOTIFY_DONE;
++
++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++
++ return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++ .notifier_call = kgdb_notify,
++ .priority = 0x7fffffff, /* we need to notified first */
++};
++
++int kgdb_arch_init(void)
++{
++ register_die_notifier(&kgdb_notifier);
++ return 0;
++}
++/*
++ * Skip an int3 exception when it occurs after a breakpoint has been
++ * removed. Backtrack eip by 1 since the int3 would have caused it to
++ * increment by 1.
++ */
++
++int kgdb_skipexception(int exception, struct pt_regs *regs)
++{
++ if (exception == 3 && kgdb_isremovedbreak(regs->rip - 1)) {
++ regs->rip -= 1;
++ return 1;
++ }
++ return 0;
++}
++
++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++ if (exception == 3) {
++ return instruction_pointer(regs) - 1;
++ }
++ return instruction_pointer(regs);
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++ .gdb_bpt_instr = {0xcc},
++ .flags = KGDB_HW_BREAKPOINT,
++ .shadowth = 1,
++ .set_hw_breakpoint = kgdb_set_hw_break,
++ .remove_hw_breakpoint = kgdb_remove_hw_break,
++ .remove_all_hw_break = kgdb_remove_all_hw_break,
++ .correct_hw_break = kgdb_correct_hw_break,
++};
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/mce.c linux-2.6.22-591/arch/x86_64/kernel/mce.c
+--- linux-2.6.22-570/arch/x86_64/kernel/mce.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/x86_64/kernel/mce.c 2007-12-21 15:36:11.000000000 -0500
+@@ -174,7 +174,7 @@
+ if (events != atomic_read(&mce_logged) && trigger[0]) {
+ /* Small race window, but should be harmless. */
+ atomic_set(&mce_logged, events);
+- call_usermodehelper(trigger, trigger_argv, NULL, -1);
++ call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/signal.c linux-2.6.22-591/arch/x86_64/kernel/signal.c
+--- linux-2.6.22-570/arch/x86_64/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/x86_64/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500
+@@ -480,7 +480,7 @@
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+ {
+ struct task_struct *me = current;
+- if (exception_trace)
++ if (show_unhandled_signals && printk_ratelimit())
+ printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
+ me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax);
+
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/traps.c linux-2.6.22-591/arch/x86_64/kernel/traps.c
+--- linux-2.6.22-570/arch/x86_64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500
+@@ -96,6 +96,11 @@
+ }
+
+ int kstack_depth_to_print = 12;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+
+ #ifdef CONFIG_KALLSYMS
+ void printk_address(unsigned long address)
+@@ -198,6 +203,33 @@
+ return NULL;
+ }
+
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
++{
++ struct ops_and_data *oad = (struct ops_and_data *)context;
++ int n = 0;
++ unsigned long sp = UNW_SP(info);
++
++ if (arch_unw_user_mode(info))
++ return -1;
++ while (unwind(info) == 0 && UNW_PC(info)) {
++ n++;
++ oad->ops->address(oad->data, UNW_PC(info));
++ if (arch_unw_user_mode(info))
++ break;
++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++ && sp > UNW_SP(info))
++ break;
++ sp = UNW_SP(info);
++ touch_nmi_watchdog();
++ }
++ return n;
++}
++
+ #define MSG(txt) ops->warning(data, txt)
+
+ /*
+@@ -225,6 +257,40 @@
+ if (!tsk)
+ tsk = current;
+
++ if (call_trace >= 0) {
++ int unw_ret = 0;
++ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
++
++ if (regs) {
++ if (unwind_init_frame_info(&info, tsk, regs) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ } else if (tsk == current)
++ unw_ret = unwind_init_running(&info, dump_trace_unwind,
++ &oad);
++ else {
++ if (unwind_init_blocked(&info, tsk) == 0)
++ unw_ret = dump_trace_unwind(&info, &oad);
++ }
++ if (unw_ret > 0) {
++ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++ ops->warning_symbol(data,
++ "DWARF2 unwinder stuck at %s",
++ UNW_PC(&info));
++ if ((long)UNW_SP(&info) < 0) {
++ MSG("Leftover inexact backtrace:");
++ stack = (unsigned long *)UNW_SP(&info);
++ if (!stack)
++ goto out;
++ } else
++ MSG("Full inexact backtrace again:");
++ } else if (call_trace >= 1)
++ goto out;
++ else
++ MSG("Full inexact backtrace again:");
++ } else
++ MSG("Inexact backtrace:");
++ }
+ if (!stack) {
+ unsigned long dummy;
+ stack = &dummy;
+@@ -308,6 +374,7 @@
+ tinfo = task_thread_info(tsk);
+ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++out:
+ put_cpu();
+ }
+ EXPORT_SYMBOL(dump_trace);
+@@ -585,7 +652,8 @@
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+
+- if (exception_trace && unhandled_signal(tsk, signr))
++ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
++ printk_ratelimit())
+ printk(KERN_INFO
+ "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n",
+ tsk->comm, tsk->pid, tsk->xid, str,
+@@ -689,7 +757,8 @@
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+
+- if (exception_trace && unhandled_signal(tsk, SIGSEGV))
++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
++ printk_ratelimit())
+ printk(KERN_INFO
+ "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n",
+ tsk->comm, tsk->pid, tsk->xid,
+@@ -1128,3 +1197,21 @@
+ return 0;
+ }
+ early_param("kstack", kstack_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++ if (!s)
++ return -EINVAL;
++ if (strcmp(s, "old") == 0)
++ call_trace = -1;
++ else if (strcmp(s, "both") == 0)
++ call_trace = 0;
++ else if (strcmp(s, "newfallback") == 0)
++ call_trace = 1;
++ else if (strcmp(s, "new") == 0)
++ call_trace = 2;
++ return 0;
++}
++early_param("call_trace", call_trace_setup);
++#endif
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/unwind.S linux-2.6.22-591/arch/x86_64/kernel/unwind.S
+--- linux-2.6.22-570/arch/x86_64/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,38 @@
++/* Assembler support for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/segment.h>
++#include <asm/ptrace.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++ CFI_STARTPROC
++ movq %r15, R15(%rdi)
++ movq %r14, R14(%rdi)
++ xchgq %rsi, %rdx
++ movq %r13, R13(%rdi)
++ movq %r12, R12(%rdi)
++ xorl %eax, %eax
++ movq %rbp, RBP(%rdi)
++ movq %rbx, RBX(%rdi)
++ movq (%rsp), %rcx
++ movq %rax, R11(%rdi)
++ movq %rax, R10(%rdi)
++ movq %rax, R9(%rdi)
++ movq %rax, R8(%rdi)
++ movq %rax, RAX(%rdi)
++ movq %rax, RCX(%rdi)
++ movq %rax, RDX(%rdi)
++ movq %rax, RSI(%rdi)
++ movq %rax, RDI(%rdi)
++ movq %rax, ORIG_RAX(%rdi)
++ movq %rcx, RIP(%rdi)
++ leaq 8(%rsp), %rcx
++ movq $__KERNEL_CS, CS(%rdi)
++ movq %rax, EFLAGS(%rdi)
++ movq %rcx, RSP(%rdi)
++ movq $__KERNEL_DS, SS(%rdi)
++ jmpq *%rdx
++ CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
++
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S
+--- linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S 2007-12-21 15:36:11.000000000 -0500
+@@ -219,7 +219,9 @@
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.exitcall.exit)
++#ifndef CONFIG_UNWIND_INFO
+ *(.eh_frame)
++#endif
+ }
+
+ STABS_DEBUG
+diff -Nurb linux-2.6.22-570/arch/x86_64/mm/fault.c linux-2.6.22-591/arch/x86_64/mm/fault.c
+--- linux-2.6.22-570/arch/x86_64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500
+@@ -221,16 +221,6 @@
+ return 0;
+ }
+
+-int unhandled_signal(struct task_struct *tsk, int sig)
+-{
+- if (is_init(tsk))
+- return 1;
+- if (tsk->ptrace & PT_PTRACED)
+- return 0;
+- return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
+- (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
+-}
+-
+ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code)
+ {
+@@ -302,7 +292,7 @@
+ }
+
+ int page_fault_trace = 0;
+-int exception_trace = 1;
++int show_unhandled_signals = 1;
+
+ /*
+ * This routine handles page faults. It determines the address,
+@@ -534,6 +524,10 @@
+ if (is_errata93(regs, address))
+ return;
+
++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++ error_code, 14, SIGSEGV) == NOTIFY_STOP)
++ return;
++
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+diff -Nurb linux-2.6.22-570/arch/x86_64/mm/init.c linux-2.6.22-591/arch/x86_64/mm/init.c
+--- linux-2.6.22-570/arch/x86_64/mm/init.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/arch/x86_64/mm/init.c 2007-12-21 15:36:11.000000000 -0500
+@@ -697,41 +697,6 @@
+ return pfn_valid(pte_pfn(*pte));
+ }
+
+-#ifdef CONFIG_SYSCTL
+-#include <linux/sysctl.h>
+-
+-extern int exception_trace, page_fault_trace;
+-
+-static ctl_table debug_table2[] = {
+- {
+- .ctl_name = 99,
+- .procname = "exception-trace",
+- .data = &exception_trace,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec
+- },
+- {}
+-};
+-
+-static ctl_table debug_root_table2[] = {
+- {
+- .ctl_name = CTL_DEBUG,
+- .procname = "debug",
+- .mode = 0555,
+- .child = debug_table2
+- },
+- {}
+-};
+-
+-static __init int x8664_sysctl_init(void)
+-{
+- register_sysctl_table(debug_root_table2);
+- return 0;
+-}
+-__initcall(x8664_sysctl_init);
+-#endif
+-
+ /* A pseudo VMA to allow ptrace access for the vsyscall page. This only
+ covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+ not need special handling anymore. */
+diff -Nurb linux-2.6.22-570/creatinst.sh linux-2.6.22-591/creatinst.sh
+--- linux-2.6.22-570/creatinst.sh 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/creatinst.sh 2007-12-23 02:56:35.000000000 -0500
+@@ -0,0 +1,12 @@
++rm -fR inst
++mkdir inst
++make install INSTALL_PATH=inst
++make modules_install INSTALL_MOD_PATH=inst
++tar cfz inst.tar.gz inst
++scp -i ~/newvici inst.tar.gz root@vici-03:/tmp
++ssh -i ~/newvici root@vici-03 "cd /tmp;tar xvfz inst.tar.gz"
++ssh -i ~/newvici root@vici-03 "wget www/~sapanb/vgup;sh vgup"
++ssh -i ~/newvici root@vici-03 "cp -R /tmp/inst/lib/* /mnt/lib/"
++ssh -i ~/newvici root@vici-03 "rm -fR /tmp/inst/lib; mv /tmp/inst/* /mnt/boot"
++sleep 5
++ssh -i ~/newvici root@vici-03 reboot
+diff -Nurb linux-2.6.22-570/creatinst.sh.orig linux-2.6.22-591/creatinst.sh.orig
+--- linux-2.6.22-570/creatinst.sh.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/creatinst.sh.orig 2007-12-22 19:17:36.000000000 -0500
+@@ -0,0 +1,5 @@
++rm -fR inst
++mkdir inst
++make install INSTALL_PATH=inst
++make modules_install INSTALL_MOD_PATH=inst
++tar cfz inst.tar.gz inst
+diff -Nurb linux-2.6.22-570/crypto/Kconfig linux-2.6.22-591/crypto/Kconfig
+--- linux-2.6.22-570/crypto/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/crypto/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -1,7 +1,17 @@
+ #
+-# Cryptographic API Configuration
++# Generic algorithms support
++#
++config XOR_BLOCKS
++ tristate
++
+ #
++# async_tx api: hardware offloaded memory transfer/transform support
++#
++source "crypto/async_tx/Kconfig"
+
++#
++# Cryptographic API Configuration
++#
+ menu "Cryptographic options"
+
+ config CRYPTO
+diff -Nurb linux-2.6.22-570/crypto/Makefile linux-2.6.22-591/crypto/Makefile
+--- linux-2.6.22-570/crypto/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/crypto/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -50,3 +50,9 @@
+ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+
+ obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
++
++#
++# generic algorithms and the async_tx api
++#
++obj-$(CONFIG_XOR_BLOCKS) += xor.o
++obj-$(CONFIG_ASYNC_CORE) += async_tx/
+diff -Nurb linux-2.6.22-570/crypto/async_tx/Kconfig linux-2.6.22-591/crypto/async_tx/Kconfig
+--- linux-2.6.22-570/crypto/async_tx/Kconfig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,16 @@
++config ASYNC_CORE
++ tristate
++
++config ASYNC_MEMCPY
++ tristate
++ select ASYNC_CORE
++
++config ASYNC_XOR
++ tristate
++ select ASYNC_CORE
++ select XOR_BLOCKS
++
++config ASYNC_MEMSET
++ tristate
++ select ASYNC_CORE
++
+diff -Nurb linux-2.6.22-570/crypto/async_tx/Makefile linux-2.6.22-591/crypto/async_tx/Makefile
+--- linux-2.6.22-570/crypto/async_tx/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,4 @@
++obj-$(CONFIG_ASYNC_CORE) += async_tx.o
++obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
++obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
++obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memcpy.c linux-2.6.22-591/crypto/async_tx/async_memcpy.c
+--- linux-2.6.22-570/crypto/async_tx/async_memcpy.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/async_memcpy.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,131 @@
++/*
++ * copy offload engine support
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ * Dan Williams <dan.j.williams@intel.com>
++ *
++ * with architecture considerations by:
++ * Neil Brown <neilb@suse.de>
++ * Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/highmem.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/async_tx.h>
++
++/**
++ * async_memcpy - attempt to copy memory with a dma engine.
++ * @dest: destination page
++ * @src: src page
++ * @offset: offset in pages to start transaction
++ * @len: length in bytes
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
++ * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
++ * @depend_tx: memcpy depends on the result of this transaction
++ * @cb_fn: function to call when the memcpy completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
++ unsigned int src_offset, size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
++ struct dma_device *device = chan ? chan->device : NULL;
++ int int_en = cb_fn ? 1 : 0;
++ struct dma_async_tx_descriptor *tx = device ?
++ device->device_prep_dma_memcpy(chan, len,
++ int_en) : NULL;
++
++ if (tx) { /* run the memcpy asynchronously */
++ dma_addr_t addr;
++ enum dma_data_direction dir;
++
++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_FROM_DEVICE;
++
++ addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
++ tx->tx_set_dest(addr, tx, 0);
++
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_TO_DEVICE;
++
++ addr = dma_map_page(device->dev, src, src_offset, len, dir);
++ tx->tx_set_src(addr, tx, 0);
++
++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++ } else { /* run the memcpy synchronously */
++ void *dest_buf, *src_buf;
++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++ /* wait for any prerequisite operations */
++ if (depend_tx) {
++ /* if ack is already set then we cannot be sure
++ * we are referring to the correct operation
++ */
++ BUG_ON(depend_tx->ack);
++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++ panic("%s: DMA_ERROR waiting for depend_tx\n",
++ __FUNCTION__);
++ }
++
++ if (flags & ASYNC_TX_KMAP_DST)
++ dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
++ else
++ dest_buf = page_address(dest) + dest_offset;
++
++ if (flags & ASYNC_TX_KMAP_SRC)
++ src_buf = kmap_atomic(src, KM_USER0) + src_offset;
++ else
++ src_buf = page_address(src) + src_offset;
++
++ memcpy(dest_buf, src_buf, len);
++
++ if (flags & ASYNC_TX_KMAP_DST)
++ kunmap_atomic(dest_buf, KM_USER0);
++
++ if (flags & ASYNC_TX_KMAP_SRC)
++ kunmap_atomic(src_buf, KM_USER0);
++
++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++ }
++
++ return tx;
++}
++EXPORT_SYMBOL_GPL(async_memcpy);
++
++static int __init async_memcpy_init(void)
++{
++ return 0;
++}
++
++static void __exit async_memcpy_exit(void)
++{
++ do { } while (0);
++}
++
++module_init(async_memcpy_init);
++module_exit(async_memcpy_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous memcpy api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memset.c linux-2.6.22-591/crypto/async_tx/async_memset.c
+--- linux-2.6.22-570/crypto/async_tx/async_memset.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/async_memset.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,109 @@
++/*
++ * memory fill offload engine support
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ * Dan Williams <dan.j.williams@intel.com>
++ *
++ * with architecture considerations by:
++ * Neil Brown <neilb@suse.de>
++ * Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/async_tx.h>
++
++/**
++ * async_memset - attempt to fill memory with a dma engine.
++ * @dest: destination page
++ * @val: fill value
++ * @offset: offset in pages to start transaction
++ * @len: length in bytes
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: memset depends on the result of this transaction
++ * @cb_fn: function to call when the memcpy completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_memset(struct page *dest, int val, unsigned int offset,
++ size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
++ struct dma_device *device = chan ? chan->device : NULL;
++ int int_en = cb_fn ? 1 : 0;
++ struct dma_async_tx_descriptor *tx = device ?
++ device->device_prep_dma_memset(chan, val, len,
++ int_en) : NULL;
++
++ if (tx) { /* run the memset asynchronously */
++ dma_addr_t dma_addr;
++ enum dma_data_direction dir;
++
++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_FROM_DEVICE;
++
++ dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
++ tx->tx_set_dest(dma_addr, tx, 0);
++
++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++ } else { /* run the memset synchronously */
++ void *dest_buf;
++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++ dest_buf = (void *) (((char *) page_address(dest)) + offset);
++
++ /* wait for any prerequisite operations */
++ if (depend_tx) {
++ /* if ack is already set then we cannot be sure
++ * we are referring to the correct operation
++ */
++ BUG_ON(depend_tx->ack);
++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++ panic("%s: DMA_ERROR waiting for depend_tx\n",
++ __FUNCTION__);
++ }
++
++ memset(dest_buf, val, len);
++
++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++ }
++
++ return tx;
++}
++EXPORT_SYMBOL_GPL(async_memset);
++
++static int __init async_memset_init(void)
++{
++ return 0;
++}
++
++static void __exit async_memset_exit(void)
++{
++ do { } while (0);
++}
++
++module_init(async_memset_init);
++module_exit(async_memset_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous memset api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_tx.c linux-2.6.22-591/crypto/async_tx/async_tx.c
+--- linux-2.6.22-570/crypto/async_tx/async_tx.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/async_tx.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,497 @@
++/*
++ * core routines for the asynchronous memory transfer/transform api
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ * Dan Williams <dan.j.williams@intel.com>
++ *
++ * with architecture considerations by:
++ * Neil Brown <neilb@suse.de>
++ * Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/async_tx.h>
++
++#ifdef CONFIG_DMA_ENGINE
++static enum dma_state_client
++dma_channel_add_remove(struct dma_client *client,
++ struct dma_chan *chan, enum dma_state state);
++
++static struct dma_client async_tx_dma = {
++ .event_callback = dma_channel_add_remove,
++ /* .cap_mask == 0 defaults to all channels */
++};
++
++/**
++ * dma_cap_mask_all - enable iteration over all operation types
++ */
++static dma_cap_mask_t dma_cap_mask_all;
++
++/**
++ * chan_ref_percpu - tracks channel allocations per core/opertion
++ */
++struct chan_ref_percpu {
++ struct dma_chan_ref *ref;
++};
++
++static int channel_table_initialized;
++static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
++
++/**
++ * async_tx_lock - protect modification of async_tx_master_list and serialize
++ * rebalance operations
++ */
++static spinlock_t async_tx_lock;
++
++static struct list_head
++async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
++
++/* async_tx_issue_pending_all - start all transactions on all channels */
++void async_tx_issue_pending_all(void)
++{
++ struct dma_chan_ref *ref;
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++ ref->chan->device->device_issue_pending(ref->chan);
++ rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
++
++/* dma_wait_for_async_tx - spin wait for a transcation to complete
++ * @tx: transaction to wait on
++ */
++enum dma_status
++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
++{
++ enum dma_status status;
++ struct dma_async_tx_descriptor *iter;
++
++ if (!tx)
++ return DMA_SUCCESS;
++
++ /* poll through the dependency chain, return when tx is complete */
++ do {
++ iter = tx;
++ while (iter->cookie == -EBUSY)
++ iter = iter->parent;
++
++ status = dma_sync_wait(iter->chan, iter->cookie);
++ } while (status == DMA_IN_PROGRESS || (iter != tx));
++
++ return status;
++}
++EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
++
++/* async_tx_run_dependencies - helper routine for dma drivers to process
++ * (start) dependent operations on their target channel
++ * @tx: transaction with dependencies
++ */
++void
++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
++{
++ struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
++ struct dma_device *dev;
++ struct dma_chan *chan;
++
++ list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
++ depend_node) {
++ chan = dep_tx->chan;
++ dev = chan->device;
++ /* we can't depend on ourselves */
++ BUG_ON(chan == tx->chan);
++ list_del(&dep_tx->depend_node);
++ tx->tx_submit(dep_tx);
++
++ /* we need to poke the engine as client code does not
++ * know about dependency submission events
++ */
++ dev->device_issue_pending(chan);
++ }
++}
++EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
++
++static void
++free_dma_chan_ref(struct rcu_head *rcu)
++{
++ struct dma_chan_ref *ref;
++ ref = container_of(rcu, struct dma_chan_ref, rcu);
++ kfree(ref);
++}
++
++static void
++init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
++{
++ INIT_LIST_HEAD(&ref->node);
++ INIT_RCU_HEAD(&ref->rcu);
++ ref->chan = chan;
++ atomic_set(&ref->count, 0);
++}
++
++/**
++ * get_chan_ref_by_cap - returns the nth channel of the given capability
++ * defaults to returning the channel with the desired capability and the
++ * lowest reference count if the index can not be satisfied
++ * @cap: capability to match
++ * @index: nth channel desired, passing -1 has the effect of forcing the
++ * default return value
++ */
++static struct dma_chan_ref *
++get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
++{
++ struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++ if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
++ if (!min_ref)
++ min_ref = ref;
++ else if (atomic_read(&ref->count) <
++ atomic_read(&min_ref->count))
++ min_ref = ref;
++
++ if (index-- == 0) {
++ ret_ref = ref;
++ break;
++ }
++ }
++ rcu_read_unlock();
++
++ if (!ret_ref)
++ ret_ref = min_ref;
++
++ if (ret_ref)
++ atomic_inc(&ret_ref->count);
++
++ return ret_ref;
++}
++
++/**
++ * async_tx_rebalance - redistribute the available channels, optimize
++ * for cpu isolation in the SMP case, and opertaion isolation in the
++ * uniprocessor case
++ */
++static void async_tx_rebalance(void)
++{
++ int cpu, cap, cpu_idx = 0;
++ unsigned long flags;
++
++ if (!channel_table_initialized)
++ return;
++
++ spin_lock_irqsave(&async_tx_lock, flags);
++
++ /* undo the last distribution */
++ for_each_dma_cap_mask(cap, dma_cap_mask_all)
++ for_each_possible_cpu(cpu) {
++ struct dma_chan_ref *ref =
++ per_cpu_ptr(channel_table[cap], cpu)->ref;
++ if (ref) {
++ atomic_set(&ref->count, 0);
++ per_cpu_ptr(channel_table[cap], cpu)->ref =
++ NULL;
++ }
++ }
++
++ for_each_dma_cap_mask(cap, dma_cap_mask_all)
++ for_each_online_cpu(cpu) {
++ struct dma_chan_ref *new;
++ if (NR_CPUS > 1)
++ new = get_chan_ref_by_cap(cap, cpu_idx++);
++ else
++ new = get_chan_ref_by_cap(cap, -1);
++
++ per_cpu_ptr(channel_table[cap], cpu)->ref = new;
++ }
++
++ spin_unlock_irqrestore(&async_tx_lock, flags);
++}
++
++static enum dma_state_client
++dma_channel_add_remove(struct dma_client *client,
++ struct dma_chan *chan, enum dma_state state)
++{
++ unsigned long found, flags;
++ struct dma_chan_ref *master_ref, *ref;
++ enum dma_state_client ack = DMA_DUP; /* default: take no action */
++
++ switch (state) {
++ case DMA_RESOURCE_AVAILABLE:
++ found = 0;
++ rcu_read_lock();
++ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++ if (ref->chan == chan) {
++ found = 1;
++ break;
++ }
++ rcu_read_unlock();
++
++ pr_debug("async_tx: dma resource available [%s]\n",
++ found ? "old" : "new");
++
++ if (!found)
++ ack = DMA_ACK;
++ else
++ break;
++
++ /* add the channel to the generic management list */
++ master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
++ if (master_ref) {
++ /* keep a reference until async_tx is unloaded */
++ dma_chan_get(chan);
++ init_dma_chan_ref(master_ref, chan);
++ spin_lock_irqsave(&async_tx_lock, flags);
++ list_add_tail_rcu(&master_ref->node,
++ &async_tx_master_list);
++ spin_unlock_irqrestore(&async_tx_lock,
++ flags);
++ } else {
++ printk(KERN_WARNING "async_tx: unable to create"
++ " new master entry in response to"
++ " a DMA_RESOURCE_ADDED event"
++ " (-ENOMEM)\n");
++ return 0;
++ }
++
++ async_tx_rebalance();
++ break;
++ case DMA_RESOURCE_REMOVED:
++ found = 0;
++ spin_lock_irqsave(&async_tx_lock, flags);
++ list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++ if (ref->chan == chan) {
++ /* permit backing devices to go away */
++ dma_chan_put(ref->chan);
++ list_del_rcu(&ref->node);
++ call_rcu(&ref->rcu, free_dma_chan_ref);
++ found = 1;
++ break;
++ }
++ spin_unlock_irqrestore(&async_tx_lock, flags);
++
++ pr_debug("async_tx: dma resource removed [%s]\n",
++ found ? "ours" : "not ours");
++
++ if (found)
++ ack = DMA_ACK;
++ else
++ break;
++
++ async_tx_rebalance();
++ break;
++ case DMA_RESOURCE_SUSPEND:
++ case DMA_RESOURCE_RESUME:
++ printk(KERN_WARNING "async_tx: does not support dma channel"
++ " suspend/resume\n");
++ break;
++ default:
++ BUG();
++ }
++
++ return ack;
++}
++
++static int __init
++async_tx_init(void)
++{
++ enum dma_transaction_type cap;
++
++ spin_lock_init(&async_tx_lock);
++ bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
++
++ /* an interrupt will never be an explicit operation type.
++ * clearing this bit prevents allocation to a slot in 'channel_table'
++ */
++ clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
++
++ for_each_dma_cap_mask(cap, dma_cap_mask_all) {
++ channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
++ if (!channel_table[cap])
++ goto err;
++ }
++
++ channel_table_initialized = 1;
++ dma_async_client_register(&async_tx_dma);
++ dma_async_client_chan_request(&async_tx_dma);
++
++ printk(KERN_INFO "async_tx: api initialized (async)\n");
++
++ return 0;
++err:
++ printk(KERN_ERR "async_tx: initialization failure\n");
++
++ while (--cap >= 0)
++ free_percpu(channel_table[cap]);
++
++ return 1;
++}
++
++static void __exit async_tx_exit(void)
++{
++ enum dma_transaction_type cap;
++
++ channel_table_initialized = 0;
++
++ for_each_dma_cap_mask(cap, dma_cap_mask_all)
++ if (channel_table[cap])
++ free_percpu(channel_table[cap]);
++
++ dma_async_client_unregister(&async_tx_dma);
++}
++
++/**
++ * async_tx_find_channel - find a channel to carry out the operation or let
++ * the transaction execute synchronously
++ * @depend_tx: transaction dependency
++ * @tx_type: transaction type
++ */
++struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++ enum dma_transaction_type tx_type)
++{
++ /* see if we can keep the chain on one channel */
++ if (depend_tx &&
++ dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
++ return depend_tx->chan;
++ else if (likely(channel_table_initialized)) {
++ struct dma_chan_ref *ref;
++ int cpu = get_cpu();
++ ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
++ put_cpu();
++ return ref ? ref->chan : NULL;
++ } else
++ return NULL;
++}
++EXPORT_SYMBOL_GPL(async_tx_find_channel);
++#else
++static int __init async_tx_init(void)
++{
++ printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
++ return 0;
++}
++
++static void __exit async_tx_exit(void)
++{
++ do { } while (0);
++}
++#endif
++
++void
++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ tx->callback = cb_fn;
++ tx->callback_param = cb_param;
++
++ /* set this new tx to run after depend_tx if:
++ * 1/ a dependency exists (depend_tx is !NULL)
++ * 2/ the tx can not be submitted to the current channel
++ */
++ if (depend_tx && depend_tx->chan != chan) {
++ /* if ack is already set then we cannot be sure
++ * we are referring to the correct operation
++ */
++ BUG_ON(depend_tx->ack);
++
++ tx->parent = depend_tx;
++ spin_lock_bh(&depend_tx->lock);
++ list_add_tail(&tx->depend_node, &depend_tx->depend_list);
++ if (depend_tx->cookie == 0) {
++ struct dma_chan *dep_chan = depend_tx->chan;
++ struct dma_device *dep_dev = dep_chan->device;
++ dep_dev->device_dependency_added(dep_chan);
++ }
++ spin_unlock_bh(&depend_tx->lock);
++
++ /* schedule an interrupt to trigger the channel switch */
++ async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
++ } else {
++ tx->parent = NULL;
++ tx->tx_submit(tx);
++ }
++
++ if (flags & ASYNC_TX_ACK)
++ async_tx_ack(tx);
++
++ if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
++ async_tx_ack(depend_tx);
++}
++EXPORT_SYMBOL_GPL(async_tx_submit);
++
++/**
++ * async_trigger_callback - schedules the callback function to be run after
++ * any dependent operations have been completed.
++ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: 'callback' requires the completion of this transaction
++ * @cb_fn: function to call after depend_tx completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_trigger_callback(enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ struct dma_chan *chan;
++ struct dma_device *device;
++ struct dma_async_tx_descriptor *tx;
++
++ if (depend_tx) {
++ chan = depend_tx->chan;
++ device = chan->device;
++
++ /* see if we can schedule an interrupt
++ * otherwise poll for completion
++ */
++ if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
++ device = NULL;
++
++ tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
++ } else
++ tx = NULL;
++
++ if (tx) {
++ pr_debug("%s: (async)\n", __FUNCTION__);
++
++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++ } else {
++ pr_debug("%s: (sync)\n", __FUNCTION__);
++
++ /* wait for any prerequisite operations */
++ if (depend_tx) {
++ /* if ack is already set then we cannot be sure
++ * we are referring to the correct operation
++ */
++ BUG_ON(depend_tx->ack);
++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++ panic("%s: DMA_ERROR waiting for depend_tx\n",
++ __FUNCTION__);
++ }
++
++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++ }
++
++ return tx;
++}
++EXPORT_SYMBOL_GPL(async_trigger_callback);
++
++module_init(async_tx_init);
++module_exit(async_tx_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_xor.c linux-2.6.22-591/crypto/async_tx/async_xor.c
+--- linux-2.6.22-570/crypto/async_tx/async_xor.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/async_tx/async_xor.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,327 @@
++/*
++ * xor offload engine api
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ * Dan Williams <dan.j.williams@intel.com>
++ *
++ * with architecture considerations by:
++ * Neil Brown <neilb@suse.de>
++ * Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/raid/xor.h>
++#include <linux/async_tx.h>
++
++static void
++do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
++ struct dma_chan *chan, struct page *dest, struct page **src_list,
++ unsigned int offset, unsigned int src_cnt, size_t len,
++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ dma_addr_t dma_addr;
++ enum dma_data_direction dir;
++ int i;
++
++ pr_debug("%s: len: %zu\n", __FUNCTION__, len);
++
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_FROM_DEVICE;
++
++ dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
++ tx->tx_set_dest(dma_addr, tx, 0);
++
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_TO_DEVICE;
++
++ for (i = 0; i < src_cnt; i++) {
++ dma_addr = dma_map_page(device->dev, src_list[i],
++ offset, len, dir);
++ tx->tx_set_src(dma_addr, tx, i);
++ }
++
++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++}
++
++static void
++do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
++ unsigned int src_cnt, size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ void *_dest;
++ int i;
++
++ pr_debug("%s: len: %zu\n", __FUNCTION__, len);
++
++ /* reuse the 'src_list' array to convert to buffer pointers */
++ for (i = 0; i < src_cnt; i++)
++ src_list[i] = (struct page *)
++ (page_address(src_list[i]) + offset);
++
++ /* set destination address */
++ _dest = page_address(dest) + offset;
++
++ if (flags & ASYNC_TX_XOR_ZERO_DST)
++ memset(_dest, 0, len);
++
++ xor_blocks(src_cnt, len, _dest,
++ (void **) src_list);
++
++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++}
++
++/**
++ * async_xor - attempt to xor a set of blocks with a dma engine.
++ * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
++ * flag must be set to not include dest data in the calculation. The
++ * assumption with dma eninges is that they only use the destination
++ * buffer as a source when it is explicity specified in the source list.
++ * @dest: destination page
++ * @src_list: array of source pages (if the dest is also a source it must be
++ * at index zero). The contents of this array may be overwritten.
++ * @offset: offset in pages to start transaction
++ * @src_cnt: number of source pages
++ * @len: length in bytes
++ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
++ * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: xor depends on the result of this transaction.
++ * @cb_fn: function to call when the xor completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_xor(struct page *dest, struct page **src_list, unsigned int offset,
++ int src_cnt, size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
++ struct dma_device *device = chan ? chan->device : NULL;
++ struct dma_async_tx_descriptor *tx = NULL;
++ dma_async_tx_callback _cb_fn;
++ void *_cb_param;
++ unsigned long local_flags;
++ int xor_src_cnt;
++ int i = 0, src_off = 0, int_en;
++
++ BUG_ON(src_cnt <= 1);
++
++ while (src_cnt) {
++ local_flags = flags;
++ if (device) { /* run the xor asynchronously */
++ xor_src_cnt = min(src_cnt, device->max_xor);
++ /* if we are submitting additional xors
++ * only set the callback on the last transaction
++ */
++ if (src_cnt > xor_src_cnt) {
++ local_flags &= ~ASYNC_TX_ACK;
++ _cb_fn = NULL;
++ _cb_param = NULL;
++ } else {
++ _cb_fn = cb_fn;
++ _cb_param = cb_param;
++ }
++
++ int_en = _cb_fn ? 1 : 0;
++
++ tx = device->device_prep_dma_xor(
++ chan, xor_src_cnt, len, int_en);
++
++ if (tx) {
++ do_async_xor(tx, device, chan, dest,
++ &src_list[src_off], offset, xor_src_cnt, len,
++ local_flags, depend_tx, _cb_fn,
++ _cb_param);
++ } else /* fall through */
++ goto xor_sync;
++ } else { /* run the xor synchronously */
++xor_sync:
++ /* in the sync case the dest is an implied source
++ * (assumes the dest is at the src_off index)
++ */
++ if (flags & ASYNC_TX_XOR_DROP_DST) {
++ src_cnt--;
++ src_off++;
++ }
++
++ /* process up to 'MAX_XOR_BLOCKS' sources */
++ xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
++
++ /* if we are submitting additional xors
++ * only set the callback on the last transaction
++ */
++ if (src_cnt > xor_src_cnt) {
++ local_flags &= ~ASYNC_TX_ACK;
++ _cb_fn = NULL;
++ _cb_param = NULL;
++ } else {
++ _cb_fn = cb_fn;
++ _cb_param = cb_param;
++ }
++
++ /* wait for any prerequisite operations */
++ if (depend_tx) {
++ /* if ack is already set then we cannot be sure
++ * we are referring to the correct operation
++ */
++ BUG_ON(depend_tx->ack);
++ if (dma_wait_for_async_tx(depend_tx) ==
++ DMA_ERROR)
++ panic("%s: DMA_ERROR waiting for "
++ "depend_tx\n",
++ __FUNCTION__);
++ }
++
++ do_sync_xor(dest, &src_list[src_off], offset,
++ xor_src_cnt, len, local_flags, depend_tx,
++ _cb_fn, _cb_param);
++ }
++
++ /* the previous tx is hidden from the client,
++ * so ack it
++ */
++ if (i && depend_tx)
++ async_tx_ack(depend_tx);
++
++ depend_tx = tx;
++
++ if (src_cnt > xor_src_cnt) {
++ /* drop completed sources */
++ src_cnt -= xor_src_cnt;
++ src_off += xor_src_cnt;
++
++ /* unconditionally preserve the destination */
++ flags &= ~ASYNC_TX_XOR_ZERO_DST;
++
++ /* use the intermediate result a source, but remember
++ * it's dropped, because it's implied, in the sync case
++ */
++ src_list[--src_off] = dest;
++ src_cnt++;
++ flags |= ASYNC_TX_XOR_DROP_DST;
++ } else
++ src_cnt = 0;
++ i++;
++ }
++
++ return tx;
++}
++EXPORT_SYMBOL_GPL(async_xor);
++
++static int page_is_zero(struct page *p, unsigned int offset, size_t len)
++{
++ char *a = page_address(p) + offset;
++ return ((*(u32 *) a) == 0 &&
++ memcmp(a, a + 4, len - 4) == 0);
++}
++
++/**
++ * async_xor_zero_sum - attempt a xor parity check with a dma engine.
++ * @dest: destination page used if the xor is performed synchronously
++ * @src_list: array of source pages. The dest page must be listed as a source
++ * at index zero. The contents of this array may be overwritten.
++ * @offset: offset in pages to start transaction
++ * @src_cnt: number of source pages
++ * @len: length in bytes
++ * @result: 0 if sum == 0 else non-zero
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: xor depends on the result of this transaction.
++ * @cb_fn: function to call when the xor completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_xor_zero_sum(struct page *dest, struct page **src_list,
++ unsigned int offset, int src_cnt, size_t len,
++ u32 *result, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_param)
++{
++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
++ struct dma_device *device = chan ? chan->device : NULL;
++ int int_en = cb_fn ? 1 : 0;
++ struct dma_async_tx_descriptor *tx = device ?
++ device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
++ int_en) : NULL;
++ int i;
++
++ BUG_ON(src_cnt <= 1);
++
++ if (tx) {
++ dma_addr_t dma_addr;
++ enum dma_data_direction dir;
++
++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++
++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++ DMA_NONE : DMA_TO_DEVICE;
++
++ for (i = 0; i < src_cnt; i++) {
++ dma_addr = dma_map_page(device->dev, src_list[i],
++ offset, len, dir);
++ tx->tx_set_src(dma_addr, tx, i);
++ }
++
++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++ } else {
++ unsigned long xor_flags = flags;
++
++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++ xor_flags |= ASYNC_TX_XOR_DROP_DST;
++ xor_flags &= ~ASYNC_TX_ACK;
++
++ tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
++ depend_tx, NULL, NULL);
++
++ if (tx) {
++ if (dma_wait_for_async_tx(tx) == DMA_ERROR)
++ panic("%s: DMA_ERROR waiting for tx\n",
++ __FUNCTION__);
++ async_tx_ack(tx);
++ }
++
++ *result = page_is_zero(dest, offset, len) ? 0 : 1;
++
++ tx = NULL;
++
++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++ }
++
++ return tx;
++}
++EXPORT_SYMBOL_GPL(async_xor_zero_sum);
++
++static int __init async_xor_init(void)
++{
++ return 0;
++}
++
++static void __exit async_xor_exit(void)
++{
++ do { } while (0);
++}
++
++module_init(async_xor_init);
++module_exit(async_xor_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/xor.c linux-2.6.22-591/crypto/xor.c
+--- linux-2.6.22-570/crypto/xor.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/crypto/xor.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,155 @@
++/*
++ * xor.c : Multiple Devices driver for Linux
++ *
++ * Copyright (C) 1996, 1997, 1998, 1999, 2000,
++ * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
++ *
++ * Dispatch optimized RAID-5 checksumming functions.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * You should have received a copy of the GNU General Public License
++ * (for example /usr/src/linux/COPYING); if not, write to the Free
++ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#define BH_TRACE 0
++#include <linux/module.h>
++#include <linux/raid/md.h>
++#include <linux/raid/xor.h>
++#include <asm/xor.h>
++
++/* The xor routines to use. */
++static struct xor_block_template *active_template;
++
++void
++xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
++{
++ unsigned long *p1, *p2, *p3, *p4;
++
++ p1 = (unsigned long *) srcs[0];
++ if (src_count == 1) {
++ active_template->do_2(bytes, dest, p1);
++ return;
++ }
++
++ p2 = (unsigned long *) srcs[1];
++ if (src_count == 2) {
++ active_template->do_3(bytes, dest, p1, p2);
++ return;
++ }
++
++ p3 = (unsigned long *) srcs[2];
++ if (src_count == 3) {
++ active_template->do_4(bytes, dest, p1, p2, p3);
++ return;
++ }
++
++ p4 = (unsigned long *) srcs[3];
++ active_template->do_5(bytes, dest, p1, p2, p3, p4);
++}
++EXPORT_SYMBOL(xor_blocks);
++
++/* Set of all registered templates. */
++static struct xor_block_template *template_list;
++
++#define BENCH_SIZE (PAGE_SIZE)
++
++static void
++do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
++{
++ int speed;
++ unsigned long now;
++ int i, count, max;
++
++ tmpl->next = template_list;
++ template_list = tmpl;
++
++ /*
++ * Count the number of XORs done during a whole jiffy, and use
++ * this to calculate the speed of checksumming. We use a 2-page
++ * allocation to have guaranteed color L1-cache layout.
++ */
++ max = 0;
++ for (i = 0; i < 5; i++) {
++ now = jiffies;
++ count = 0;
++ while (jiffies == now) {
++ mb(); /* prevent loop optimzation */
++ tmpl->do_2(BENCH_SIZE, b1, b2);
++ mb();
++ count++;
++ mb();
++ }
++ if (count > max)
++ max = count;
++ }
++
++ speed = max * (HZ * BENCH_SIZE / 1024);
++ tmpl->speed = speed;
++
++ printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name,
++ speed / 1000, speed % 1000);
++}
++
++static int __init
++calibrate_xor_blocks(void)
++{
++ void *b1, *b2;
++ struct xor_block_template *f, *fastest;
++
++ b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
++ if (!b1) {
++ printk(KERN_WARNING "xor: Yikes! No memory available.\n");
++ return -ENOMEM;
++ }
++ b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
++
++ /*
++ * If this arch/cpu has a short-circuited selection, don't loop through
++ * all the possible functions, just test the best one
++ */
++
++ fastest = NULL;
++
++#ifdef XOR_SELECT_TEMPLATE
++ fastest = XOR_SELECT_TEMPLATE(fastest);
++#endif
++
++#define xor_speed(templ) do_xor_speed((templ), b1, b2)
++
++ if (fastest) {
++ printk(KERN_INFO "xor: automatically using best "
++ "checksumming function: %s\n",
++ fastest->name);
++ xor_speed(fastest);
++ } else {
++ printk(KERN_INFO "xor: measuring software checksum speed\n");
++ XOR_TRY_TEMPLATES;
++ fastest = template_list;
++ for (f = fastest; f; f = f->next)
++ if (f->speed > fastest->speed)
++ fastest = f;
++ }
++
++ printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
++ fastest->name, fastest->speed / 1000, fastest->speed % 1000);
++
++#undef xor_speed
++
++ free_pages((unsigned long)b1, 2);
++
++ active_template = fastest;
++ return 0;
++}
++
++static __exit void xor_exit(void) { }
++
++MODULE_LICENSE("GPL");
++
++/* when built-in xor.o must initialize before drivers/md/md.o */
++core_initcall(calibrate_xor_blocks);
++module_exit(xor_exit);
+diff -Nurb linux-2.6.22-570/drivers/Makefile linux-2.6.22-591/drivers/Makefile
+--- linux-2.6.22-570/drivers/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -70,6 +70,7 @@
+ obj-$(CONFIG_MCA) += mca/
+ obj-$(CONFIG_EISA) += eisa/
+ obj-$(CONFIG_CPU_FREQ) += cpufreq/
++obj-$(CONFIG_CPU_IDLE) += cpuidle/
+ obj-$(CONFIG_MMC) += mmc/
+ obj-$(CONFIG_NEW_LEDS) += leds/
+ obj-$(CONFIG_INFINIBAND) += infiniband/
+diff -Nurb linux-2.6.22-570/drivers/acpi/Kconfig linux-2.6.22-591/drivers/acpi/Kconfig
+--- linux-2.6.22-570/drivers/acpi/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -124,7 +124,7 @@
+
+ config ACPI_VIDEO
+ tristate "Video"
+- depends on X86 && BACKLIGHT_CLASS_DEVICE
++ depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL
+ help
+ This driver implement the ACPI Extensions For Display Adapters
+ for integrated graphics devices on motherboard, as specified in
+@@ -280,6 +280,14 @@
+ of verbosity. Saying Y enables these statements. This will increase
+ your kernel size by around 50K.
+
++config ACPI_DEBUG_FUNC_TRACE
++ bool "Additionally enable ACPI function tracing"
++ default n
++ depends on ACPI_DEBUG
++ help
++ ACPI Debug Statements slow down ACPI processing. Function trace
++ is about half of the penalty and is rarely useful.
++
+ config ACPI_EC
+ bool
+ default y
+diff -Nurb linux-2.6.22-570/drivers/acpi/battery.c linux-2.6.22-591/drivers/acpi/battery.c
+--- linux-2.6.22-570/drivers/acpi/battery.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/battery.c 2007-12-21 15:36:11.000000000 -0500
+@@ -43,21 +43,30 @@
+ #define ACPI_BATTERY_CLASS "battery"
+ #define ACPI_BATTERY_HID "PNP0C0A"
+ #define ACPI_BATTERY_DEVICE_NAME "Battery"
+-#define ACPI_BATTERY_FILE_INFO "info"
+-#define ACPI_BATTERY_FILE_STATUS "state"
+-#define ACPI_BATTERY_FILE_ALARM "alarm"
+ #define ACPI_BATTERY_NOTIFY_STATUS 0x80
+ #define ACPI_BATTERY_NOTIFY_INFO 0x81
+ #define ACPI_BATTERY_UNITS_WATTS "mW"
+ #define ACPI_BATTERY_UNITS_AMPS "mA"
+
+ #define _COMPONENT ACPI_BATTERY_COMPONENT
++
++#define ACPI_BATTERY_UPDATE_TIME 0
++
++#define ACPI_BATTERY_NONE_UPDATE 0
++#define ACPI_BATTERY_EASY_UPDATE 1
++#define ACPI_BATTERY_INIT_UPDATE 2
++
+ ACPI_MODULE_NAME("battery");
+
+ MODULE_AUTHOR("Paul Diefenbaugh");
+ MODULE_DESCRIPTION("ACPI Battery Driver");
+ MODULE_LICENSE("GPL");
+
++static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME;
++
++/* 0 - every time, > 0 - by update_time */
++module_param(update_time, uint, 0644);
++
+ extern struct proc_dir_entry *acpi_lock_battery_dir(void);
+ extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
+
+@@ -76,7 +85,7 @@
+ },
+ };
+
+-struct acpi_battery_status {
++struct acpi_battery_state {
+ acpi_integer state;
+ acpi_integer present_rate;
+ acpi_integer remaining_capacity;
+@@ -99,33 +108,111 @@
+ acpi_string oem_info;
+ };
+
+-struct acpi_battery_flags {
+- u8 present:1; /* Bay occupied? */
+- u8 power_unit:1; /* 0=watts, 1=apms */
+- u8 alarm:1; /* _BTP present? */
+- u8 reserved:5;
++enum acpi_battery_files{
++ ACPI_BATTERY_INFO = 0,
++ ACPI_BATTERY_STATE,
++ ACPI_BATTERY_ALARM,
++ ACPI_BATTERY_NUMFILES,
+ };
+
+-struct acpi_battery_trips {
+- unsigned long warning;
+- unsigned long low;
++struct acpi_battery_flags {
++ u8 battery_present_prev;
++ u8 alarm_present;
++ u8 init_update;
++ u8 update[ACPI_BATTERY_NUMFILES];
++ u8 power_unit;
+ };
+
+ struct acpi_battery {
+- struct acpi_device * device;
++ struct mutex mutex;
++ struct acpi_device *device;
+ struct acpi_battery_flags flags;
+- struct acpi_battery_trips trips;
++ struct acpi_buffer bif_data;
++ struct acpi_buffer bst_data;
+ unsigned long alarm;
+- struct acpi_battery_info *info;
++ unsigned long update_time[ACPI_BATTERY_NUMFILES];
+ };
+
++inline int acpi_battery_present(struct acpi_battery *battery)
++{
++ return battery->device->status.battery_present;
++}
++inline char *acpi_battery_power_units(struct acpi_battery *battery)
++{
++ if (battery->flags.power_unit)
++ return ACPI_BATTERY_UNITS_AMPS;
++ else
++ return ACPI_BATTERY_UNITS_WATTS;
++}
++
++inline acpi_handle acpi_battery_handle(struct acpi_battery *battery)
++{
++ return battery->device->handle;
++}
++
+ /* --------------------------------------------------------------------------
+ Battery Management
+ -------------------------------------------------------------------------- */
+
+-static int
+-acpi_battery_get_info(struct acpi_battery *battery,
+- struct acpi_battery_info **bif)
++static void acpi_battery_check_result(struct acpi_battery *battery, int result)
++{
++ if (!battery)
++ return;
++
++ if (result) {
++ battery->flags.init_update = 1;
++ }
++}
++
++static int acpi_battery_extract_package(struct acpi_battery *battery,
++ union acpi_object *package,
++ struct acpi_buffer *format,
++ struct acpi_buffer *data,
++ char *package_name)
++{
++ acpi_status status = AE_OK;
++ struct acpi_buffer data_null = { 0, NULL };
++
++ status = acpi_extract_package(package, format, &data_null);
++ if (status != AE_BUFFER_OVERFLOW) {
++ ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s",
++ package_name));
++ return -ENODEV;
++ }
++
++ if (data_null.length != data->length) {
++ kfree(data->pointer);
++ data->pointer = kzalloc(data_null.length, GFP_KERNEL);
++ if (!data->pointer) {
++ ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()"));
++ return -ENOMEM;
++ }
++ data->length = data_null.length;
++ }
++
++ status = acpi_extract_package(package, format, data);
++ if (ACPI_FAILURE(status)) {
++ ACPI_EXCEPTION((AE_INFO, status, "Extracting %s",
++ package_name));
++ return -ENODEV;
++ }
++
++ return 0;
++}
++
++static int acpi_battery_get_status(struct acpi_battery *battery)
++{
++ int result = 0;
++
++ result = acpi_bus_get_status(battery->device);
++ if (result) {
++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA"));
++ return -ENODEV;
++ }
++ return result;
++}
++
++static int acpi_battery_get_info(struct acpi_battery *battery)
+ {
+ int result = 0;
+ acpi_status status = 0;
+@@ -133,16 +220,20 @@
+ struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF),
+ ACPI_BATTERY_FORMAT_BIF
+ };
+- struct acpi_buffer data = { 0, NULL };
+ union acpi_object *package = NULL;
++ struct acpi_buffer *data = NULL;
++ struct acpi_battery_info *bif = NULL;
+
++ battery->update_time[ACPI_BATTERY_INFO] = get_seconds();
+
+- if (!battery || !bif)
+- return -EINVAL;
++ if (!acpi_battery_present(battery))
++ return 0;
+
+- /* Evalute _BIF */
++ /* Evaluate _BIF */
+
+- status = acpi_evaluate_object(battery->device->handle, "_BIF", NULL, &buffer);
++ status =
++ acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL,
++ &buffer);
+ if (ACPI_FAILURE(status)) {
+ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF"));
+ return -ENODEV;
+@@ -150,41 +241,29 @@
+
+ package = buffer.pointer;
+
+- /* Extract Package Data */
+-
+- status = acpi_extract_package(package, &format, &data);
+- if (status != AE_BUFFER_OVERFLOW) {
+- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF"));
+- result = -ENODEV;
+- goto end;
+- }
++ data = &battery->bif_data;
+
+- data.pointer = kzalloc(data.length, GFP_KERNEL);
+- if (!data.pointer) {
+- result = -ENOMEM;
+- goto end;
+- }
++ /* Extract Package Data */
+
+- status = acpi_extract_package(package, &format, &data);
+- if (ACPI_FAILURE(status)) {
+- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF"));
+- kfree(data.pointer);
+- result = -ENODEV;
++ result =
++ acpi_battery_extract_package(battery, package, &format, data,
++ "_BIF");
++ if (result)
+ goto end;
+- }
+
+ end:
++
+ kfree(buffer.pointer);
+
+- if (!result)
+- (*bif) = data.pointer;
++ if (!result) {
++ bif = data->pointer;
++ battery->flags.power_unit = bif->power_unit;
++ }
+
+ return result;
+ }
+
+-static int
+-acpi_battery_get_status(struct acpi_battery *battery,
+- struct acpi_battery_status **bst)
++static int acpi_battery_get_state(struct acpi_battery *battery)
+ {
+ int result = 0;
+ acpi_status status = 0;
+@@ -192,16 +271,19 @@
+ struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST),
+ ACPI_BATTERY_FORMAT_BST
+ };
+- struct acpi_buffer data = { 0, NULL };
+ union acpi_object *package = NULL;
++ struct acpi_buffer *data = NULL;
+
++ battery->update_time[ACPI_BATTERY_STATE] = get_seconds();
+
+- if (!battery || !bst)
+- return -EINVAL;
++ if (!acpi_battery_present(battery))
++ return 0;
+
+- /* Evalute _BST */
++ /* Evaluate _BST */
+
+- status = acpi_evaluate_object(battery->device->handle, "_BST", NULL, &buffer);
++ status =
++ acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL,
++ &buffer);
+ if (ACPI_FAILURE(status)) {
+ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST"));
+ return -ENODEV;
+@@ -209,55 +291,49 @@
+
+ package = buffer.pointer;
+
+- /* Extract Package Data */
++ data = &battery->bst_data;
+
+- status = acpi_extract_package(package, &format, &data);
+- if (status != AE_BUFFER_OVERFLOW) {
+- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST"));
+- result = -ENODEV;
+- goto end;
+- }
+-
+- data.pointer = kzalloc(data.length, GFP_KERNEL);
+- if (!data.pointer) {
+- result = -ENOMEM;
+- goto end;
+- }
++ /* Extract Package Data */
+
+- status = acpi_extract_package(package, &format, &data);
+- if (ACPI_FAILURE(status)) {
+- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST"));
+- kfree(data.pointer);
+- result = -ENODEV;
++ result =
++ acpi_battery_extract_package(battery, package, &format, data,
++ "_BST");
++ if (result)
+ goto end;
+- }
+
+ end:
+ kfree(buffer.pointer);
+
+- if (!result)
+- (*bst) = data.pointer;
+-
+ return result;
+ }
+
+-static int
+-acpi_battery_set_alarm(struct acpi_battery *battery, unsigned long alarm)
++static int acpi_battery_get_alarm(struct acpi_battery *battery)
++{
++ battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
++
++ return 0;
++}
++
++static int acpi_battery_set_alarm(struct acpi_battery *battery,
++ unsigned long alarm)
+ {
+ acpi_status status = 0;
+ union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+ struct acpi_object_list arg_list = { 1, &arg0 };
+
++ battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
+
+- if (!battery)
+- return -EINVAL;
++ if (!acpi_battery_present(battery))
++ return -ENODEV;
+
+- if (!battery->flags.alarm)
++ if (!battery->flags.alarm_present)
+ return -ENODEV;
+
+ arg0.integer.value = alarm;
+
+- status = acpi_evaluate_object(battery->device->handle, "_BTP", &arg_list, NULL);
++ status =
++ acpi_evaluate_object(acpi_battery_handle(battery), "_BTP",
++ &arg_list, NULL);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+@@ -268,65 +344,114 @@
+ return 0;
+ }
+
+-static int acpi_battery_check(struct acpi_battery *battery)
++static int acpi_battery_init_alarm(struct acpi_battery *battery)
+ {
+ int result = 0;
+ acpi_status status = AE_OK;
+ acpi_handle handle = NULL;
+- struct acpi_device *device = NULL;
+- struct acpi_battery_info *bif = NULL;
++ struct acpi_battery_info *bif = battery->bif_data.pointer;
++ unsigned long alarm = battery->alarm;
+
++ /* See if alarms are supported, and if so, set default */
+
+- if (!battery)
+- return -EINVAL;
++ status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle);
++ if (ACPI_SUCCESS(status)) {
++ battery->flags.alarm_present = 1;
++ if (!alarm && bif) {
++ alarm = bif->design_capacity_warning;
++ }
++ result = acpi_battery_set_alarm(battery, alarm);
++ if (result)
++ goto end;
++ } else {
++ battery->flags.alarm_present = 0;
++ }
+
+- device = battery->device;
++ end:
+
+- result = acpi_bus_get_status(device);
+- if (result)
+ return result;
++}
+
+- /* Insertion? */
+-
+- if (!battery->flags.present && device->status.battery_present) {
++static int acpi_battery_init_update(struct acpi_battery *battery)
++{
++ int result = 0;
+
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery inserted\n"));
++ result = acpi_battery_get_status(battery);
++ if (result)
++ return result;
+
+- /* Evalute _BIF to get certain static information */
++ battery->flags.battery_present_prev = acpi_battery_present(battery);
+
+- result = acpi_battery_get_info(battery, &bif);
++ if (acpi_battery_present(battery)) {
++ result = acpi_battery_get_info(battery);
++ if (result)
++ return result;
++ result = acpi_battery_get_state(battery);
+ if (result)
+ return result;
+
+- battery->flags.power_unit = bif->power_unit;
+- battery->trips.warning = bif->design_capacity_warning;
+- battery->trips.low = bif->design_capacity_low;
+- kfree(bif);
++ acpi_battery_init_alarm(battery);
++ }
+
+- /* See if alarms are supported, and if so, set default */
++ return result;
++}
+
+- status = acpi_get_handle(battery->device->handle, "_BTP", &handle);
+- if (ACPI_SUCCESS(status)) {
+- battery->flags.alarm = 1;
+- acpi_battery_set_alarm(battery, battery->trips.warning);
++static int acpi_battery_update(struct acpi_battery *battery,
++ int update, int *update_result_ptr)
++{
++ int result = 0;
++ int update_result = ACPI_BATTERY_NONE_UPDATE;
++
++ if (!acpi_battery_present(battery)) {
++ update = 1;
++ }
++
++ if (battery->flags.init_update) {
++ result = acpi_battery_init_update(battery);
++ if (result)
++ goto end;
++ update_result = ACPI_BATTERY_INIT_UPDATE;
++ } else if (update) {
++ result = acpi_battery_get_status(battery);
++ if (result)
++ goto end;
++ if ((!battery->flags.battery_present_prev & acpi_battery_present(battery))
++ || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) {
++ result = acpi_battery_init_update(battery);
++ if (result)
++ goto end;
++ update_result = ACPI_BATTERY_INIT_UPDATE;
++ } else {
++ update_result = ACPI_BATTERY_EASY_UPDATE;
+ }
+ }
+
+- /* Removal? */
++ end:
+
+- else if (battery->flags.present && !device->status.battery_present) {
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery removed\n"));
+- }
++ battery->flags.init_update = (result != 0);
+
+- battery->flags.present = device->status.battery_present;
++ *update_result_ptr = update_result;
+
+ return result;
+ }
+
+-static void acpi_battery_check_present(struct acpi_battery *battery)
++static void acpi_battery_notify_update(struct acpi_battery *battery)
+ {
+- if (!battery->flags.present) {
+- acpi_battery_check(battery);
++ acpi_battery_get_status(battery);
++
++ if (battery->flags.init_update) {
++ return;
++ }
++
++ if ((!battery->flags.battery_present_prev &
++ acpi_battery_present(battery)) ||
++ (battery->flags.battery_present_prev &
++ !acpi_battery_present(battery))) {
++ battery->flags.init_update = 1;
++ } else {
++ battery->flags.update[ACPI_BATTERY_INFO] = 1;
++ battery->flags.update[ACPI_BATTERY_STATE] = 1;
++ battery->flags.update[ACPI_BATTERY_ALARM] = 1;
+ }
+ }
+
+@@ -335,37 +460,33 @@
+ -------------------------------------------------------------------------- */
+
+ static struct proc_dir_entry *acpi_battery_dir;
+-static int acpi_battery_read_info(struct seq_file *seq, void *offset)
++
++static int acpi_battery_print_info(struct seq_file *seq, int result)
+ {
+- int result = 0;
+ struct acpi_battery *battery = seq->private;
+ struct acpi_battery_info *bif = NULL;
+ char *units = "?";
+
+-
+- if (!battery)
++ if (result)
+ goto end;
+
+- acpi_battery_check_present(battery);
+-
+- if (battery->flags.present)
++ if (acpi_battery_present(battery))
+ seq_printf(seq, "present: yes\n");
+ else {
+ seq_printf(seq, "present: no\n");
+ goto end;
+ }
+
+- /* Battery Info (_BIF) */
+-
+- result = acpi_battery_get_info(battery, &bif);
+- if (result || !bif) {
+- seq_printf(seq, "ERROR: Unable to read battery information\n");
++ bif = battery->bif_data.pointer;
++ if (!bif) {
++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL"));
++ result = -ENODEV;
+ goto end;
+ }
+
+- units =
+- bif->
+- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
++ /* Battery Units */
++
++ units = acpi_battery_power_units(battery);
+
+ if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+ seq_printf(seq, "design capacity: unknown\n");
+@@ -396,7 +517,6 @@
+ else
+ seq_printf(seq, "design voltage: %d mV\n",
+ (u32) bif->design_voltage);
+-
+ seq_printf(seq, "design capacity warning: %d %sh\n",
+ (u32) bif->design_capacity_warning, units);
+ seq_printf(seq, "design capacity low: %d %sh\n",
+@@ -411,50 +531,40 @@
+ seq_printf(seq, "OEM info: %s\n", bif->oem_info);
+
+ end:
+- kfree(bif);
+
+- return 0;
+-}
++ if (result)
++ seq_printf(seq, "ERROR: Unable to read battery info\n");
+
+-static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
+-{
+- return single_open(file, acpi_battery_read_info, PDE(inode)->data);
++ return result;
+ }
+
+-static int acpi_battery_read_state(struct seq_file *seq, void *offset)
++static int acpi_battery_print_state(struct seq_file *seq, int result)
+ {
+- int result = 0;
+ struct acpi_battery *battery = seq->private;
+- struct acpi_battery_status *bst = NULL;
++ struct acpi_battery_state *bst = NULL;
+ char *units = "?";
+
+-
+- if (!battery)
++ if (result)
+ goto end;
+
+- acpi_battery_check_present(battery);
+-
+- if (battery->flags.present)
++ if (acpi_battery_present(battery))
+ seq_printf(seq, "present: yes\n");
+ else {
+ seq_printf(seq, "present: no\n");
+ goto end;
+ }
+
+- /* Battery Units */
+-
+- units =
+- battery->flags.
+- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
+-
+- /* Battery Status (_BST) */
+-
+- result = acpi_battery_get_status(battery, &bst);
+- if (result || !bst) {
+- seq_printf(seq, "ERROR: Unable to read battery status\n");
++ bst = battery->bst_data.pointer;
++ if (!bst) {
++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL"));
++ result = -ENODEV;
+ goto end;
+ }
+
++ /* Battery Units */
++
++ units = acpi_battery_power_units(battery);
++
+ if (!(bst->state & 0x04))
+ seq_printf(seq, "capacity state: ok\n");
+ else
+@@ -490,48 +600,43 @@
+ (u32) bst->present_voltage);
+
+ end:
+- kfree(bst);
+
+- return 0;
+-}
++ if (result) {
++ seq_printf(seq, "ERROR: Unable to read battery state\n");
++ }
+
+-static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
+-{
+- return single_open(file, acpi_battery_read_state, PDE(inode)->data);
++ return result;
+ }
+
+-static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
++static int acpi_battery_print_alarm(struct seq_file *seq, int result)
+ {
+ struct acpi_battery *battery = seq->private;
+ char *units = "?";
+
+-
+- if (!battery)
++ if (result)
+ goto end;
+
+- acpi_battery_check_present(battery);
+-
+- if (!battery->flags.present) {
++ if (!acpi_battery_present(battery)) {
+ seq_printf(seq, "present: no\n");
+ goto end;
+ }
+
+ /* Battery Units */
+
+- units =
+- battery->flags.
+- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
+-
+- /* Battery Alarm */
++ units = acpi_battery_power_units(battery);
+
+ seq_printf(seq, "alarm: ");
+ if (!battery->alarm)
+ seq_printf(seq, "unsupported\n");
+ else
+- seq_printf(seq, "%d %sh\n", (u32) battery->alarm, units);
++ seq_printf(seq, "%lu %sh\n", battery->alarm, units);
+
+ end:
+- return 0;
++
++ if (result)
++ seq_printf(seq, "ERROR: Unable to read battery alarm\n");
++
++ return result;
+ }
+
+ static ssize_t
+@@ -543,27 +648,113 @@
+ char alarm_string[12] = { '\0' };
+ struct seq_file *m = file->private_data;
+ struct acpi_battery *battery = m->private;
+-
++ int update_result = ACPI_BATTERY_NONE_UPDATE;
+
+ if (!battery || (count > sizeof(alarm_string) - 1))
+ return -EINVAL;
+
+- acpi_battery_check_present(battery);
++ mutex_lock(&battery->mutex);
+
+- if (!battery->flags.present)
+- return -ENODEV;
++ result = acpi_battery_update(battery, 1, &update_result);
++ if (result) {
++ result = -ENODEV;
++ goto end;
++ }
+
+- if (copy_from_user(alarm_string, buffer, count))
+- return -EFAULT;
++ if (!acpi_battery_present(battery)) {
++ result = -ENODEV;
++ goto end;
++ }
++
++ if (copy_from_user(alarm_string, buffer, count)) {
++ result = -EFAULT;
++ goto end;
++ }
+
+ alarm_string[count] = '\0';
+
+ result = acpi_battery_set_alarm(battery,
+ simple_strtoul(alarm_string, NULL, 0));
+ if (result)
++ goto end;
++
++ end:
++
++ acpi_battery_check_result(battery, result);
++
++ if (!result)
++ result = count;
++
++ mutex_unlock(&battery->mutex);
++
+ return result;
++}
++
++typedef int(*print_func)(struct seq_file *seq, int result);
++typedef int(*get_func)(struct acpi_battery *battery);
++
++static struct acpi_read_mux {
++ print_func print;
++ get_func get;
++} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = {
++ {.get = acpi_battery_get_info, .print = acpi_battery_print_info},
++ {.get = acpi_battery_get_state, .print = acpi_battery_print_state},
++ {.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm},
++};
++
++static int acpi_battery_read(int fid, struct seq_file *seq)
++{
++ struct acpi_battery *battery = seq->private;
++ int result = 0;
++ int update_result = ACPI_BATTERY_NONE_UPDATE;
++ int update = 0;
++
++ mutex_lock(&battery->mutex);
++
++ update = (get_seconds() - battery->update_time[fid] >= update_time);
++ update = (update | battery->flags.update[fid]);
++
++ result = acpi_battery_update(battery, update, &update_result);
++ if (result)
++ goto end;
++
++ if (update_result == ACPI_BATTERY_EASY_UPDATE) {
++ result = acpi_read_funcs[fid].get(battery);
++ if (result)
++ goto end;
++ }
+
+- return count;
++ end:
++ result = acpi_read_funcs[fid].print(seq, result);
++ acpi_battery_check_result(battery, result);
++ battery->flags.update[fid] = result;
++ mutex_unlock(&battery->mutex);
++ return result;
++}
++
++static int acpi_battery_read_info(struct seq_file *seq, void *offset)
++{
++ return acpi_battery_read(ACPI_BATTERY_INFO, seq);
++}
++
++static int acpi_battery_read_state(struct seq_file *seq, void *offset)
++{
++ return acpi_battery_read(ACPI_BATTERY_STATE, seq);
++}
++
++static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
++{
++ return acpi_battery_read(ACPI_BATTERY_ALARM, seq);
++}
++
++static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
++{
++ return single_open(file, acpi_battery_read_info, PDE(inode)->data);
++}
++
++static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
++{
++ return single_open(file, acpi_battery_read_state, PDE(inode)->data);
+ }
+
+ static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
+@@ -571,35 +762,51 @@
+ return single_open(file, acpi_battery_read_alarm, PDE(inode)->data);
+ }
+
+-static const struct file_operations acpi_battery_info_ops = {
++static struct battery_file {
++ struct file_operations ops;
++ mode_t mode;
++ char *name;
++} acpi_battery_file[] = {
++ {
++ .name = "info",
++ .mode = S_IRUGO,
++ .ops = {
+ .open = acpi_battery_info_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+-};
+-
+-static const struct file_operations acpi_battery_state_ops = {
++ },
++ },
++ {
++ .name = "state",
++ .mode = S_IRUGO,
++ .ops = {
+ .open = acpi_battery_state_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
+-};
+-
+-static const struct file_operations acpi_battery_alarm_ops = {
++ },
++ },
++ {
++ .name = "alarm",
++ .mode = S_IFREG | S_IRUGO | S_IWUSR,
++ .ops = {
+ .open = acpi_battery_alarm_open_fs,
+ .read = seq_read,
+ .write = acpi_battery_write_alarm,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .owner = THIS_MODULE,
++ },
++ },
+ };
+
+ static int acpi_battery_add_fs(struct acpi_device *device)
+ {
+ struct proc_dir_entry *entry = NULL;
+-
++ int i;
+
+ if (!acpi_device_dir(device)) {
+ acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device),
+@@ -609,38 +816,16 @@
+ acpi_device_dir(device)->owner = THIS_MODULE;
+ }
+
+- /* 'info' [R] */
+- entry = create_proc_entry(ACPI_BATTERY_FILE_INFO,
+- S_IRUGO, acpi_device_dir(device));
++ for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
++ entry = create_proc_entry(acpi_battery_file[i].name,
++ acpi_battery_file[i].mode, acpi_device_dir(device));
+ if (!entry)
+ return -ENODEV;
+ else {
+- entry->proc_fops = &acpi_battery_info_ops;
++ entry->proc_fops = &acpi_battery_file[i].ops;
+ entry->data = acpi_driver_data(device);
+ entry->owner = THIS_MODULE;
+ }
+-
+- /* 'status' [R] */
+- entry = create_proc_entry(ACPI_BATTERY_FILE_STATUS,
+- S_IRUGO, acpi_device_dir(device));
+- if (!entry)
+- return -ENODEV;
+- else {
+- entry->proc_fops = &acpi_battery_state_ops;
+- entry->data = acpi_driver_data(device);
+- entry->owner = THIS_MODULE;
+- }
+-
+- /* 'alarm' [R/W] */
+- entry = create_proc_entry(ACPI_BATTERY_FILE_ALARM,
+- S_IFREG | S_IRUGO | S_IWUSR,
+- acpi_device_dir(device));
+- if (!entry)
+- return -ENODEV;
+- else {
+- entry->proc_fops = &acpi_battery_alarm_ops;
+- entry->data = acpi_driver_data(device);
+- entry->owner = THIS_MODULE;
+ }
+
+ return 0;
+@@ -648,15 +833,12 @@
+
+ static int acpi_battery_remove_fs(struct acpi_device *device)
+ {
+-
++ int i;
+ if (acpi_device_dir(device)) {
+- remove_proc_entry(ACPI_BATTERY_FILE_ALARM,
++ for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
++ remove_proc_entry(acpi_battery_file[i].name,
+ acpi_device_dir(device));
+- remove_proc_entry(ACPI_BATTERY_FILE_STATUS,
+- acpi_device_dir(device));
+- remove_proc_entry(ACPI_BATTERY_FILE_INFO,
+- acpi_device_dir(device));
+-
++ }
+ remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+ acpi_device_dir(device) = NULL;
+ }
+@@ -673,7 +855,6 @@
+ struct acpi_battery *battery = data;
+ struct acpi_device *device = NULL;
+
+-
+ if (!battery)
+ return;
+
+@@ -684,8 +865,10 @@
+ case ACPI_BATTERY_NOTIFY_INFO:
+ case ACPI_NOTIFY_BUS_CHECK:
+ case ACPI_NOTIFY_DEVICE_CHECK:
+- acpi_battery_check(battery);
+- acpi_bus_generate_event(device, event, battery->flags.present);
++ device = battery->device;
++ acpi_battery_notify_update(battery);
++ acpi_bus_generate_event(device, event,
++ acpi_battery_present(battery));
+ break;
+ default:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+@@ -702,7 +885,6 @@
+ acpi_status status = 0;
+ struct acpi_battery *battery = NULL;
+
+-
+ if (!device)
+ return -EINVAL;
+
+@@ -710,15 +892,21 @@
+ if (!battery)
+ return -ENOMEM;
+
++ mutex_init(&battery->mutex);
++
++ mutex_lock(&battery->mutex);
++
+ battery->device = device;
+ strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
+ strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
+ acpi_driver_data(device) = battery;
+
+- result = acpi_battery_check(battery);
++ result = acpi_battery_get_status(battery);
+ if (result)
+ goto end;
+
++ battery->flags.init_update = 1;
++
+ result = acpi_battery_add_fs(device);
+ if (result)
+ goto end;
+@@ -727,6 +915,7 @@
+ ACPI_ALL_NOTIFY,
+ acpi_battery_notify, battery);
+ if (ACPI_FAILURE(status)) {
++ ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler"));
+ result = -ENODEV;
+ goto end;
+ }
+@@ -736,11 +925,14 @@
+ device->status.battery_present ? "present" : "absent");
+
+ end:
++
+ if (result) {
+ acpi_battery_remove_fs(device);
+ kfree(battery);
+ }
+
++ mutex_unlock(&battery->mutex);
++
+ return result;
+ }
+
+@@ -749,18 +941,27 @@
+ acpi_status status = 0;
+ struct acpi_battery *battery = NULL;
+
+-
+ if (!device || !acpi_driver_data(device))
+ return -EINVAL;
+
+ battery = acpi_driver_data(device);
+
++ mutex_lock(&battery->mutex);
++
+ status = acpi_remove_notify_handler(device->handle,
+ ACPI_ALL_NOTIFY,
+ acpi_battery_notify);
+
+ acpi_battery_remove_fs(device);
+
++ kfree(battery->bif_data.pointer);
++
++ kfree(battery->bst_data.pointer);
++
++ mutex_unlock(&battery->mutex);
++
++ mutex_destroy(&battery->mutex);
++
+ kfree(battery);
+
+ return 0;
+@@ -775,7 +976,10 @@
+ return -EINVAL;
+
+ battery = device->driver_data;
+- return acpi_battery_check(battery);
++
++ battery->flags.init_update = 1;
++
++ return 0;
+ }
+
+ static int __init acpi_battery_init(void)
+@@ -800,7 +1004,6 @@
+
+ static void __exit acpi_battery_exit(void)
+ {
+-
+ acpi_bus_unregister_driver(&acpi_battery_driver);
+
+ acpi_unlock_battery_dir(acpi_battery_dir);
+diff -Nurb linux-2.6.22-570/drivers/acpi/bay.c linux-2.6.22-591/drivers/acpi/bay.c
+--- linux-2.6.22-570/drivers/acpi/bay.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/bay.c 2007-12-21 15:36:11.000000000 -0500
+@@ -288,6 +288,11 @@
+ new_bay->pdev = pdev;
+ platform_set_drvdata(pdev, new_bay);
+
++ /*
++ * we want the bay driver to be able to send uevents
++ */
++ pdev->dev.uevent_suppress = 0;
++
+ if (acpi_bay_add_fs(new_bay)) {
+ platform_device_unregister(new_bay->pdev);
+ goto bay_add_err;
+@@ -328,18 +333,12 @@
+ {
+ struct bay *bay_dev = (struct bay *)data;
+ struct device *dev = &bay_dev->pdev->dev;
++ char event_string[12];
++ char *envp[] = { event_string, NULL };
+
+ bay_dprintk(handle, "Bay event");
+-
+- switch(event) {
+- case ACPI_NOTIFY_BUS_CHECK:
+- case ACPI_NOTIFY_DEVICE_CHECK:
+- case ACPI_NOTIFY_EJECT_REQUEST:
+- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+- break;
+- default:
+- printk(KERN_ERR PREFIX "Bay: unknown event %d\n", event);
+- }
++ sprintf(event_string, "BAY_EVENT=%d\n", event);
++ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
+ }
+
+ static acpi_status
+diff -Nurb linux-2.6.22-570/drivers/acpi/dock.c linux-2.6.22-591/drivers/acpi/dock.c
+--- linux-2.6.22-570/drivers/acpi/dock.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/acpi/dock.c 2007-12-21 15:36:11.000000000 -0500
+@@ -40,8 +40,15 @@
+ MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION);
+ MODULE_LICENSE("GPL");
+
++static int immediate_undock = 1;
++module_param(immediate_undock, bool, 0644);
++MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to "
++ "undock immediately when the undock button is pressed, 0 will cause"
++ " the driver to wait for userspace to write the undock sysfs file "
++ " before undocking");
++
+ static struct atomic_notifier_head dock_notifier_list;
+-static struct platform_device dock_device;
++static struct platform_device *dock_device;
+ static char dock_device_name[] = "dock";
+
+ struct dock_station {
+@@ -63,6 +70,7 @@
+ };
+
+ #define DOCK_DOCKING 0x00000001
++#define DOCK_UNDOCKING 0x00000002
+ #define DOCK_EVENT 3
+ #define UNDOCK_EVENT 2
+
+@@ -327,12 +335,20 @@
+
+ static void dock_event(struct dock_station *ds, u32 event, int num)
+ {
+- struct device *dev = &dock_device.dev;
++ struct device *dev = &dock_device->dev;
++ char event_string[7];
++ char *envp[] = { event_string, NULL };
++
++ if (num == UNDOCK_EVENT)
++ sprintf(event_string, "UNDOCK");
++ else
++ sprintf(event_string, "DOCK");
++
+ /*
+ * Indicate that the status of the dock station has
+ * changed.
+ */
+- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
++ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
+ }
+
+ /**
+@@ -420,6 +436,16 @@
+ ds->last_dock_time = jiffies;
+ }
+
++static inline void begin_undock(struct dock_station *ds)
++{
++ ds->flags |= DOCK_UNDOCKING;
++}
++
++static inline void complete_undock(struct dock_station *ds)
++{
++ ds->flags &= ~(DOCK_UNDOCKING);
++}
++
+ /**
+ * dock_in_progress - see if we are in the middle of handling a dock event
+ * @ds: the dock station
+@@ -550,7 +576,7 @@
+ printk(KERN_ERR PREFIX "Unable to undock!\n");
+ return -EBUSY;
+ }
+-
++ complete_undock(ds);
+ return 0;
+ }
+
+@@ -594,7 +620,11 @@
+ * to the driver who wish to hotplug.
+ */
+ case ACPI_NOTIFY_EJECT_REQUEST:
++ begin_undock(ds);
++ if (immediate_undock)
+ handle_eject_request(ds, event);
++ else
++ dock_event(ds, event, UNDOCK_EVENT);
+ break;
+ default:
+ printk(KERN_ERR PREFIX "Unknown dock event %d\n", event);
+@@ -653,6 +683,17 @@
+ DEVICE_ATTR(docked, S_IRUGO, show_docked, NULL);
+
+ /*
++ * show_flags - read method for flags file in sysfs
++ */
++static ssize_t show_flags(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ return snprintf(buf, PAGE_SIZE, "%d\n", dock_station->flags);
++
++}
++DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
++
++/*
+ * write_undock - write method for "undock" file in sysfs
+ */
+ static ssize_t write_undock(struct device *dev, struct device_attribute *attr,
+@@ -675,16 +716,15 @@
+ struct device_attribute *attr, char *buf)
+ {
+ unsigned long lbuf;
+- acpi_status status = acpi_evaluate_integer(dock_station->handle, "_UID", NULL, &lbuf);
+- if(ACPI_FAILURE(status)) {
++ acpi_status status = acpi_evaluate_integer(dock_station->handle,
++ "_UID", NULL, &lbuf);
++ if (ACPI_FAILURE(status))
+ return 0;
+- }
++
+ return snprintf(buf, PAGE_SIZE, "%lx\n", lbuf);
+ }
+ DEVICE_ATTR(uid, S_IRUGO, show_dock_uid, NULL);
+
+-
+-
+ /**
+ * dock_add - add a new dock station
+ * @handle: the dock station handle
+diff -Nurb linux-2.6.22-570/drivers/acpi/ec.c linux-2.6.22-591/drivers/acpi/ec.c
+--- linux-2.6.22-570/drivers/acpi/ec.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/ec.c 2007-12-21 15:36:11.000000000 -0500
+@@ -34,25 +34,26 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/interrupt.h>
++#include <linux/list.h>
+ #include <asm/io.h>
+ #include <acpi/acpi_bus.h>
+ #include <acpi/acpi_drivers.h>
+ #include <acpi/actypes.h>
+
+-#define _COMPONENT ACPI_EC_COMPONENT
+-ACPI_MODULE_NAME("ec");
+-#define ACPI_EC_COMPONENT 0x00100000
+ #define ACPI_EC_CLASS "embedded_controller"
+ #define ACPI_EC_HID "PNP0C09"
+ #define ACPI_EC_DEVICE_NAME "Embedded Controller"
+ #define ACPI_EC_FILE_INFO "info"
++
+ #undef PREFIX
+ #define PREFIX "ACPI: EC: "
++
+ /* EC status register */
+ #define ACPI_EC_FLAG_OBF 0x01 /* Output buffer full */
+ #define ACPI_EC_FLAG_IBF 0x02 /* Input buffer full */
+ #define ACPI_EC_FLAG_BURST 0x10 /* burst mode */
+ #define ACPI_EC_FLAG_SCI 0x20 /* EC-SCI occurred */
++
+ /* EC commands */
+ enum ec_command {
+ ACPI_EC_COMMAND_READ = 0x80,
+@@ -61,6 +62,7 @@
+ ACPI_EC_BURST_DISABLE = 0x83,
+ ACPI_EC_COMMAND_QUERY = 0x84,
+ };
++
+ /* EC events */
+ enum ec_event {
+ ACPI_EC_EVENT_OBF_1 = 1, /* Output buffer full */
+@@ -94,6 +96,16 @@
+
+ /* If we find an EC via the ECDT, we need to keep a ptr to its context */
+ /* External interfaces use first EC only, so remember */
++typedef int (*acpi_ec_query_func) (void *data);
++
++struct acpi_ec_query_handler {
++ struct list_head node;
++ acpi_ec_query_func func;
++ acpi_handle handle;
++ void *data;
++ u8 query_bit;
++};
++
+ static struct acpi_ec {
+ acpi_handle handle;
+ unsigned long gpe;
+@@ -104,6 +116,7 @@
+ atomic_t query_pending;
+ atomic_t event_count;
+ wait_queue_head_t wait;
++ struct list_head list;
+ } *boot_ec, *first_ec;
+
+ /* --------------------------------------------------------------------------
+@@ -245,7 +258,7 @@
+
+ status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0, 0, 0);
+ if (status) {
+- printk(KERN_DEBUG PREFIX
++ printk(KERN_ERR PREFIX
+ "input buffer is not empty, aborting transaction\n");
+ goto end;
+ }
+@@ -394,21 +407,67 @@
+ /* --------------------------------------------------------------------------
+ Event Management
+ -------------------------------------------------------------------------- */
++int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
++ acpi_handle handle, acpi_ec_query_func func,
++ void *data)
++{
++ struct acpi_ec_query_handler *handler =
++ kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL);
++ if (!handler)
++ return -ENOMEM;
++
++ handler->query_bit = query_bit;
++ handler->handle = handle;
++ handler->func = func;
++ handler->data = data;
++ mutex_lock(&ec->lock);
++ list_add_tail(&handler->node, &ec->list);
++ mutex_unlock(&ec->lock);
++ return 0;
++}
++
++EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler);
++
++void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
++{
++ struct acpi_ec_query_handler *handler;
++ mutex_lock(&ec->lock);
++ list_for_each_entry(handler, &ec->list, node) {
++ if (query_bit == handler->query_bit) {
++ list_del(&handler->node);
++ kfree(handler);
++ break;
++ }
++ }
++ mutex_unlock(&ec->lock);
++}
++
++EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
+
+ static void acpi_ec_gpe_query(void *ec_cxt)
+ {
+ struct acpi_ec *ec = ec_cxt;
+ u8 value = 0;
+- char object_name[8];
++ struct acpi_ec_query_handler *handler, copy;
+
+ if (!ec || acpi_ec_query(ec, &value))
+ return;
+-
+- snprintf(object_name, 8, "_Q%2.2X", value);
+-
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name));
+-
+- acpi_evaluate_object(ec->handle, object_name, NULL, NULL);
++ mutex_lock(&ec->lock);
++ list_for_each_entry(handler, &ec->list, node) {
++ if (value == handler->query_bit) {
++ /* have custom handler for this bit */
++ memcpy(©, handler, sizeof(copy));
++ mutex_unlock(&ec->lock);
++ if (copy.func) {
++ copy.func(copy.data);
++ } else if (copy.handle) {
++ acpi_evaluate_object(copy.handle, NULL, NULL, NULL);
++ }
++ return;
++ }
++ }
++ mutex_unlock(&ec->lock);
++ printk(KERN_ERR PREFIX "Handler for query 0x%x is not found!\n", value);
+ }
+
+ static u32 acpi_ec_gpe_handler(void *data)
+@@ -427,8 +486,7 @@
+ if ((value & ACPI_EC_FLAG_SCI) && !atomic_read(&ec->query_pending)) {
+ atomic_set(&ec->query_pending, 1);
+ status =
+- acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query,
+- ec);
++ acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, ec);
+ }
+
+ return status == AE_OK ?
+@@ -454,57 +512,35 @@
+ }
+
+ static acpi_status
+-acpi_ec_space_handler(u32 function,
+- acpi_physical_address address,
+- u32 bit_width,
+- acpi_integer * value,
++acpi_ec_space_handler(u32 function, acpi_physical_address address,
++ u32 bits, acpi_integer *value,
+ void *handler_context, void *region_context)
+ {
+- int result = 0;
+ struct acpi_ec *ec = handler_context;
+- u64 temp = *value;
+- acpi_integer f_v = 0;
+- int i = 0;
++ int result = 0, i = 0;
++ u8 temp = 0;
+
+ if ((address > 0xFF) || !value || !handler_context)
+ return AE_BAD_PARAMETER;
+
+- if (bit_width != 8 && acpi_strict) {
++ if (function != ACPI_READ && function != ACPI_WRITE)
+ return AE_BAD_PARAMETER;
+- }
+
+- next_byte:
+- switch (function) {
+- case ACPI_READ:
+- temp = 0;
+- result = acpi_ec_read(ec, (u8) address, (u8 *) & temp);
+- break;
+- case ACPI_WRITE:
+- result = acpi_ec_write(ec, (u8) address, (u8) temp);
+- break;
+- default:
+- result = -EINVAL;
+- goto out;
+- break;
+- }
+-
+- bit_width -= 8;
+- if (bit_width) {
+- if (function == ACPI_READ)
+- f_v |= temp << 8 * i;
+- if (function == ACPI_WRITE)
+- temp >>= 8;
+- i++;
+- address++;
+- goto next_byte;
+- }
++ if (bits != 8 && acpi_strict)
++ return AE_BAD_PARAMETER;
+
++ while (bits - i > 0) {
+ if (function == ACPI_READ) {
+- f_v |= temp << 8 * i;
+- *value = f_v;
++ result = acpi_ec_read(ec, address, &temp);
++ (*value) |= ((acpi_integer)temp) << i;
++ } else {
++ temp = 0xff & ((*value) >> i);
++ result = acpi_ec_write(ec, address, temp);
++ }
++ i += 8;
++ ++address;
+ }
+
+- out:
+ switch (result) {
+ case -EINVAL:
+ return AE_BAD_PARAMETER;
+@@ -597,9 +633,6 @@
+ static acpi_status
+ ec_parse_io_ports(struct acpi_resource *resource, void *context);
+
+-static acpi_status
+-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval);
+-
+ static struct acpi_ec *make_acpi_ec(void)
+ {
+ struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
+@@ -610,13 +643,52 @@
+ atomic_set(&ec->event_count, 1);
+ mutex_init(&ec->lock);
+ init_waitqueue_head(&ec->wait);
++ INIT_LIST_HEAD(&ec->list);
+
+ return ec;
+ }
+
++static acpi_status
++acpi_ec_register_query_methods(acpi_handle handle, u32 level,
++ void *context, void **return_value)
++{
++ struct acpi_namespace_node *node = handle;
++ struct acpi_ec *ec = context;
++ int value = 0;
++ if (sscanf(node->name.ascii, "_Q%x", &value) == 1) {
++ acpi_ec_add_query_handler(ec, value, handle, NULL, NULL);
++ }
++ return AE_OK;
++}
++
++static int ec_parse_device(struct acpi_ec *ec, acpi_handle handle)
++{
++ if (ACPI_FAILURE(acpi_walk_resources(handle, METHOD_NAME__CRS,
++ ec_parse_io_ports, ec)))
++ return -EINVAL;
++
++ /* Get GPE bit assignment (EC events). */
++ /* TODO: Add support for _GPE returning a package */
++ if (ACPI_FAILURE(acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe)))
++ return -EINVAL;
++
++ /* Use the global lock for all EC transactions? */
++ acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
++
++ /* Find and register all query methods */
++ acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1,
++ acpi_ec_register_query_methods, ec, NULL);
++
++ ec->handle = handle;
++
++ printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx",
++ ec->gpe, ec->command_addr, ec->data_addr);
++
++ return 0;
++}
++
+ static int acpi_ec_add(struct acpi_device *device)
+ {
+- acpi_status status = AE_OK;
+ struct acpi_ec *ec = NULL;
+
+ if (!device)
+@@ -629,8 +701,7 @@
+ if (!ec)
+ return -ENOMEM;
+
+- status = ec_parse_device(device->handle, 0, ec, NULL);
+- if (status != AE_CTRL_TERMINATE) {
++ if (ec_parse_device(ec, device->handle)) {
+ kfree(ec);
+ return -EINVAL;
+ }
+@@ -641,6 +712,8 @@
+ /* We might have incorrect info for GL at boot time */
+ mutex_lock(&boot_ec->lock);
+ boot_ec->global_lock = ec->global_lock;
++ /* Copy handlers from new ec into boot ec */
++ list_splice(&ec->list, &boot_ec->list);
+ mutex_unlock(&boot_ec->lock);
+ kfree(ec);
+ ec = boot_ec;
+@@ -651,22 +724,24 @@
+ acpi_driver_data(device) = ec;
+
+ acpi_ec_add_fs(device);
+-
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s [%s] (gpe %d) interrupt mode.",
+- acpi_device_name(device), acpi_device_bid(device),
+- (u32) ec->gpe));
+-
+ return 0;
+ }
+
+ static int acpi_ec_remove(struct acpi_device *device, int type)
+ {
+ struct acpi_ec *ec;
++ struct acpi_ec_query_handler *handler;
+
+ if (!device)
+ return -EINVAL;
+
+ ec = acpi_driver_data(device);
++ mutex_lock(&ec->lock);
++ list_for_each_entry(handler, &ec->list, node) {
++ list_del(&handler->node);
++ kfree(handler);
++ }
++ mutex_unlock(&ec->lock);
+ acpi_ec_remove_fs(device);
+ acpi_driver_data(device) = NULL;
+ if (ec == first_ec)
+@@ -722,15 +797,13 @@
+ return -ENODEV;
+ }
+
+- /* EC is fully operational, allow queries */
+- atomic_set(&ec->query_pending, 0);
+-
+ return 0;
+ }
+
+ static int acpi_ec_start(struct acpi_device *device)
+ {
+ struct acpi_ec *ec;
++ int ret = 0;
+
+ if (!device)
+ return -EINVAL;
+@@ -740,14 +813,14 @@
+ if (!ec)
+ return -EINVAL;
+
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02lx, ports=0x%2lx,0x%2lx",
+- ec->gpe, ec->command_addr, ec->data_addr));
+-
+ /* Boot EC is already working */
+- if (ec == boot_ec)
+- return 0;
++ if (ec != boot_ec)
++ ret = ec_install_handlers(ec);
+
+- return ec_install_handlers(ec);
++ /* EC is fully operational, allow queries */
++ atomic_set(&ec->query_pending, 0);
++
++ return ret;
+ }
+
+ static int acpi_ec_stop(struct acpi_device *device, int type)
+@@ -779,34 +852,6 @@
+ return 0;
+ }
+
+-static acpi_status
+-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
+-{
+- acpi_status status;
+-
+- struct acpi_ec *ec = context;
+- status = acpi_walk_resources(handle, METHOD_NAME__CRS,
+- ec_parse_io_ports, ec);
+- if (ACPI_FAILURE(status))
+- return status;
+-
+- /* Get GPE bit assignment (EC events). */
+- /* TODO: Add support for _GPE returning a package */
+- status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe);
+- if (ACPI_FAILURE(status))
+- return status;
+-
+- /* Use the global lock for all EC transactions? */
+- acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
+-
+- ec->handle = handle;
+-
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx",
+- ec->gpe, ec->command_addr, ec->data_addr));
+-
+- return AE_CTRL_TERMINATE;
+-}
+-
+ int __init acpi_ec_ecdt_probe(void)
+ {
+ int ret;
+@@ -825,7 +870,7 @@
+ if (ACPI_FAILURE(status))
+ goto error;
+
+- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT"));
++ printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n");
+
+ boot_ec->command_addr = ecdt_ptr->control.address;
+ boot_ec->data_addr = ecdt_ptr->data.address;
+diff -Nurb linux-2.6.22-570/drivers/acpi/osl.c linux-2.6.22-591/drivers/acpi/osl.c
+--- linux-2.6.22-570/drivers/acpi/osl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/osl.c 2007-12-21 15:36:11.000000000 -0500
+@@ -77,13 +77,7 @@
+ #define OSI_STRING_LENGTH_MAX 64 /* arbitrary */
+ static char osi_additional_string[OSI_STRING_LENGTH_MAX];
+
+-#define OSI_LINUX_ENABLED
+-#ifdef OSI_LINUX_ENABLED
+-int osi_linux = 1; /* enable _OSI(Linux) by default */
+-#else
+ int osi_linux; /* disable _OSI(Linux) by default */
+-#endif
+-
+
+ #ifdef CONFIG_DMI
+ static struct __initdata dmi_system_id acpi_osl_dmi_table[];
+@@ -1056,6 +1050,17 @@
+
+ EXPORT_SYMBOL(max_cstate);
+
++void (*acpi_do_set_cstate_limit)(void);
++EXPORT_SYMBOL(acpi_do_set_cstate_limit);
++
++void acpi_set_cstate_limit(unsigned int new_limit)
++{
++ max_cstate = new_limit;
++ if (acpi_do_set_cstate_limit)
++ acpi_do_set_cstate_limit();
++}
++EXPORT_SYMBOL(acpi_set_cstate_limit);
++
+ /*
+ * Acquire a spinlock.
+ *
+@@ -1183,17 +1188,10 @@
+ if (!strcmp("Linux", interface)) {
+ printk(KERN_WARNING PREFIX
+ "System BIOS is requesting _OSI(Linux)\n");
+-#ifdef OSI_LINUX_ENABLED
+- printk(KERN_WARNING PREFIX
+- "Please test with \"acpi_osi=!Linux\"\n"
+- "Please send dmidecode "
+- "to linux-acpi@vger.kernel.org\n");
+-#else
+ printk(KERN_WARNING PREFIX
+ "If \"acpi_osi=Linux\" works better,\n"
+ "Please send dmidecode "
+ "to linux-acpi@vger.kernel.org\n");
+-#endif
+ if(osi_linux)
+ return AE_OK;
+ }
+@@ -1227,36 +1225,14 @@
+ }
+
+ #ifdef CONFIG_DMI
+-#ifdef OSI_LINUX_ENABLED
+-static int dmi_osi_not_linux(struct dmi_system_id *d)
+-{
+- printk(KERN_NOTICE "%s detected: requires not _OSI(Linux)\n", d->ident);
+- enable_osi_linux(0);
+- return 0;
+-}
+-#else
+ static int dmi_osi_linux(struct dmi_system_id *d)
+ {
+- printk(KERN_NOTICE "%s detected: requires _OSI(Linux)\n", d->ident);
++ printk(KERN_NOTICE "%s detected: enabling _OSI(Linux)\n", d->ident);
+ enable_osi_linux(1);
+ return 0;
+ }
+-#endif
+
+ static struct dmi_system_id acpi_osl_dmi_table[] __initdata = {
+-#ifdef OSI_LINUX_ENABLED
+- /*
+- * Boxes that need NOT _OSI(Linux)
+- */
+- {
+- .callback = dmi_osi_not_linux,
+- .ident = "Toshiba Satellite P100",
+- .matches = {
+- DMI_MATCH(DMI_BOARD_VENDOR, "TOSHIBA"),
+- DMI_MATCH(DMI_BOARD_NAME, "Satellite P100"),
+- },
+- },
+-#else
+ /*
+ * Boxes that need _OSI(Linux)
+ */
+@@ -1268,7 +1244,6 @@
+ DMI_MATCH(DMI_BOARD_NAME, "MPAD-MSAE Customer Reference Boards"),
+ },
+ },
+-#endif
+ {}
+ };
+ #endif /* CONFIG_DMI */
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_core.c linux-2.6.22-591/drivers/acpi/processor_core.c
+--- linux-2.6.22-570/drivers/acpi/processor_core.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/acpi/processor_core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -44,6 +44,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/dmi.h>
+ #include <linux/moduleparam.h>
++#include <linux/cpuidle.h>
+
+ #include <asm/io.h>
+ #include <asm/system.h>
+@@ -66,6 +67,7 @@
+ #define ACPI_PROCESSOR_FILE_LIMIT "limit"
+ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
+ #define ACPI_PROCESSOR_NOTIFY_POWER 0x81
++#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
+
+ #define ACPI_PROCESSOR_LIMIT_USER 0
+ #define ACPI_PROCESSOR_LIMIT_THERMAL 1
+@@ -84,6 +86,8 @@
+ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data);
+ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
+ static int acpi_processor_handle_eject(struct acpi_processor *pr);
++extern int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
++
+
+ static struct acpi_driver acpi_processor_driver = {
+ .name = "processor",
+@@ -701,6 +705,9 @@
+ acpi_processor_cst_has_changed(pr);
+ acpi_bus_generate_event(device, event, 0);
+ break;
++ case ACPI_PROCESSOR_NOTIFY_THROTTLING:
++ acpi_processor_tstate_has_changed(pr);
++ acpi_bus_generate_event(device, event, 0);
+ default:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "Unsupported event [0x%x]\n", event));
+@@ -1024,11 +1031,15 @@
+
+ acpi_processor_ppc_init();
+
++ cpuidle_register_driver(&acpi_idle_driver);
++ acpi_do_set_cstate_limit = acpi_max_cstate_changed;
+ return 0;
+ }
+
+ static void __exit acpi_processor_exit(void)
+ {
++ acpi_do_set_cstate_limit = NULL;
++ cpuidle_unregister_driver(&acpi_idle_driver);
+
+ acpi_processor_ppc_exit();
+
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_idle.c linux-2.6.22-591/drivers/acpi/processor_idle.c
+--- linux-2.6.22-570/drivers/acpi/processor_idle.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/acpi/processor_idle.c 2007-12-21 15:36:11.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/sched.h> /* need_resched() */
+ #include <linux/latency.h>
+ #include <linux/clockchips.h>
++#include <linux/cpuidle.h>
+
+ /*
+ * Include the apic definitions for x86 to have the APIC timer related defines
+@@ -62,25 +63,34 @@
+ #define _COMPONENT ACPI_PROCESSOR_COMPONENT
+ ACPI_MODULE_NAME("processor_idle");
+ #define ACPI_PROCESSOR_FILE_POWER "power"
+-#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+-#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
+-#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
+-static void (*pm_idle_save) (void) __read_mostly;
+-module_param(max_cstate, uint, 0644);
++#define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
++#define C2_OVERHEAD 1 /* 1us */
++#define C3_OVERHEAD 1 /* 1us */
++
++void acpi_max_cstate_changed(void)
++{
++ /* Driver will reset devices' max cstate limit */
++ cpuidle_force_redetect_devices(&acpi_idle_driver);
++}
++
++static int change_max_cstate(const char *val, struct kernel_param *kp)
++{
++ int max;
++
++ max = simple_strtol(val, NULL, 0);
++ if (!max)
++ return -EINVAL;
++ max_cstate = max;
++ if (acpi_do_set_cstate_limit)
++ acpi_do_set_cstate_limit();
++ return 0;
++}
++
++module_param_call(max_cstate, change_max_cstate, param_get_uint, &max_cstate, 0644);
+
+ static unsigned int nocst __read_mostly;
+ module_param(nocst, uint, 0000);
+
+-/*
+- * bm_history -- bit-mask with a bit per jiffy of bus-master activity
+- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
+- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
+- * 100 HZ: 0x0000000F: 4 jiffies = 40ms
+- * reduce history for more aggressive entry into C3
+- */
+-static unsigned int bm_history __read_mostly =
+- (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
+-module_param(bm_history, uint, 0644);
+ /* --------------------------------------------------------------------------
+ Power Management
+ -------------------------------------------------------------------------- */
+@@ -166,88 +176,6 @@
+ {},
+ };
+
+-static inline u32 ticks_elapsed(u32 t1, u32 t2)
+-{
+- if (t2 >= t1)
+- return (t2 - t1);
+- else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+- return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+- else
+- return ((0xFFFFFFFF - t1) + t2);
+-}
+-
+-static void
+-acpi_processor_power_activate(struct acpi_processor *pr,
+- struct acpi_processor_cx *new)
+-{
+- struct acpi_processor_cx *old;
+-
+- if (!pr || !new)
+- return;
+-
+- old = pr->power.state;
+-
+- if (old)
+- old->promotion.count = 0;
+- new->demotion.count = 0;
+-
+- /* Cleanup from old state. */
+- if (old) {
+- switch (old->type) {
+- case ACPI_STATE_C3:
+- /* Disable bus master reload */
+- if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
+- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+- break;
+- }
+- }
+-
+- /* Prepare to use new state. */
+- switch (new->type) {
+- case ACPI_STATE_C3:
+- /* Enable bus master reload */
+- if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
+- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+- break;
+- }
+-
+- pr->power.state = new;
+-
+- return;
+-}
+-
+-static void acpi_safe_halt(void)
+-{
+- current_thread_info()->status &= ~TS_POLLING;
+- /*
+- * TS_POLLING-cleared state must be visible before we
+- * test NEED_RESCHED:
+- */
+- smp_mb();
+- if (!need_resched())
+- safe_halt();
+- current_thread_info()->status |= TS_POLLING;
+-}
+-
+-static atomic_t c3_cpu_count;
+-
+-/* Common C-state entry for C2, C3, .. */
+-static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
+-{
+- if (cstate->space_id == ACPI_CSTATE_FFH) {
+- /* Call into architectural FFH based C-state */
+- acpi_processor_ffh_cstate_enter(cstate);
+- } else {
+- int unused;
+- /* IO port based C-state */
+- inb(cstate->address);
+- /* Dummy wait op - must do something useless after P_LVL2 read
+- because chipsets cannot guarantee that STPCLK# signal
+- gets asserted in time to freeze execution properly. */
+- unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+- }
+-}
+-
+ #ifdef ARCH_APICTIMER_STOPS_ON_C3
+
+ /*
+@@ -341,6 +269,7 @@
+ return 0;
+ }
+
++<<<<<<< HEAD/drivers/acpi/processor_idle.c
+ static void acpi_processor_idle(void)
+ {
+ struct acpi_processor *pr = NULL;
+@@ -712,6 +641,8 @@
+ return 0;
+ }
+
++=======
++>>>>>>> /drivers/acpi/processor_idle.c
+ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
+ {
+
+@@ -929,7 +860,7 @@
+ * Normalize the C2 latency to expidite policy
+ */
+ cx->valid = 1;
+- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
++ cx->latency_ticks = cx->latency;
+
+ return;
+ }
+@@ -1003,7 +934,7 @@
+ * use this in our C3 policy
+ */
+ cx->valid = 1;
+- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
++ cx->latency_ticks = cx->latency;
+
+ return;
+ }
+@@ -1069,18 +1000,6 @@
+ pr->power.count = acpi_processor_power_verify(pr);
+
+ /*
+- * Set Default Policy
+- * ------------------
+- * Now that we know which states are supported, set the default
+- * policy. Note that this policy can be changed dynamically
+- * (e.g. encourage deeper sleeps to conserve battery life when
+- * not on AC).
+- */
+- result = acpi_processor_set_power_policy(pr);
+- if (result)
+- return result;
+-
+- /*
+ * if one state of type C2 or C3 is available, mark this
+ * CPU as being "idle manageable"
+ */
+@@ -1097,9 +1016,6 @@
+
+ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+ {
+- int result = 0;
+-
+-
+ if (!pr)
+ return -EINVAL;
+
+@@ -1110,16 +1026,9 @@
+ if (!pr->flags.power_setup_done)
+ return -ENODEV;
+
+- /* Fall back to the default idle loop */
+- pm_idle = pm_idle_save;
+- synchronize_sched(); /* Relies on interrupts forcing exit from idle. */
+-
+- pr->flags.power = 0;
+- result = acpi_processor_get_power_info(pr);
+- if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
+- pm_idle = acpi_processor_idle;
+-
+- return result;
++ acpi_processor_get_power_info(pr);
++ return cpuidle_force_redetect(per_cpu(cpuidle_devices, pr->id),
++ &acpi_idle_driver);
+ }
+
+ /* proc interface */
+@@ -1205,30 +1114,6 @@
+ .release = single_release,
+ };
+
+-#ifdef CONFIG_SMP
+-static void smp_callback(void *v)
+-{
+- /* we already woke the CPU up, nothing more to do */
+-}
+-
+-/*
+- * This function gets called when a part of the kernel has a new latency
+- * requirement. This means we need to get all processors out of their C-state,
+- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+- * wakes them all right up.
+- */
+-static int acpi_processor_latency_notify(struct notifier_block *b,
+- unsigned long l, void *v)
+-{
+- smp_call_function(smp_callback, NULL, 0, 1);
+- return NOTIFY_OK;
+-}
+-
+-static struct notifier_block acpi_processor_latency_notifier = {
+- .notifier_call = acpi_processor_latency_notify,
+-};
+-#endif
+-
+ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
+ struct acpi_device *device)
+ {
+@@ -1245,9 +1130,6 @@
+ "ACPI: processor limited to max C-state %d\n",
+ max_cstate);
+ first_run++;
+-#ifdef CONFIG_SMP
+- register_latency_notifier(&acpi_processor_latency_notifier);
+-#endif
+ }
+
+ if (!pr)
+@@ -1264,6 +1146,7 @@
+
+ acpi_processor_get_power_info(pr);
+
++
+ /*
+ * Install the idle handler if processor power management is supported.
+ * Note that we use previously set idle handler will be used on
+@@ -1276,11 +1159,6 @@
+ printk(" C%d[C%d]", i,
+ pr->power.states[i].type);
+ printk(")\n");
+-
+- if (pr->id == 0) {
+- pm_idle_save = pm_idle;
+- pm_idle = acpi_processor_idle;
+- }
+ }
+
+ /* 'power' [R] */
+@@ -1308,21 +1186,332 @@
+ if (acpi_device_dir(device))
+ remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
+ acpi_device_dir(device));
++ return 0;
++}
++
++/**
++ * ticks_elapsed - a helper function that determines how many ticks (in US)
++ * have elapsed between two PM Timer timestamps
++ * @t1: the start time
++ * @t2: the end time
++ */
++static inline u32 ticks_elapsed(u32 t1, u32 t2)
++{
++ if (t2 >= t1)
++ return PM_TIMER_TICKS_TO_US(t2 - t1);
++ else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
++ return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
++ else
++ return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
++}
+
+- /* Unregister the idle handler when processor #0 is removed. */
+- if (pr->id == 0) {
+- pm_idle = pm_idle_save;
++/**
++ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
++ * @pr: the processor
++ * @target: the new target state
++ */
++static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
++ struct acpi_processor_cx *target)
++{
++ if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
++ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
++ pr->flags.bm_rld_set = 0;
++ }
+
++ if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
++ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
++ pr->flags.bm_rld_set = 1;
++ }
++}
++
++/**
++ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
++ * @cx: cstate data
++ */
++static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
++{
++ if (cx->space_id == ACPI_CSTATE_FFH) {
++ /* Call into architectural FFH based C-state */
++ acpi_processor_ffh_cstate_enter(cx);
++ } else {
++ int unused;
++ /* IO port based C-state */
++ inb(cx->address);
++ /* Dummy wait op - must do something useless after P_LVL2 read
++ because chipsets cannot guarantee that STPCLK# signal
++ gets asserted in time to freeze execution properly. */
++ unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
++ }
++}
++
++/**
++ * acpi_idle_enter_c1 - enters an ACPI C1 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ *
++ * This is equivalent to the HALT instruction.
++ */
++static int acpi_idle_enter_c1(struct cpuidle_device *dev,
++ struct cpuidle_state *state)
++{
++ struct acpi_processor *pr;
++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++ pr = processors[smp_processor_id()];
++
++ if (unlikely(!pr))
++ return 0;
++
++ if (pr->flags.bm_check)
++ acpi_idle_update_bm_rld(pr, cx);
++
++ current_thread_info()->status &= ~TS_POLLING;
+ /*
+- * We are about to unload the current idle thread pm callback
+- * (pm_idle), Wait for all processors to update cached/local
+- * copies of pm_idle before proceeding.
+- */
+- cpu_idle_wait();
+-#ifdef CONFIG_SMP
+- unregister_latency_notifier(&acpi_processor_latency_notifier);
++ * TS_POLLING-cleared state must be visible before we test
++ * NEED_RESCHED:
++ */
++ smp_mb();
++ if (!need_resched())
++ safe_halt();
++ current_thread_info()->status |= TS_POLLING;
++
++ cx->usage++;
++
++ return 0;
++}
++
++/**
++ * acpi_idle_enter_c2 - enters an ACPI C2 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ */
++static int acpi_idle_enter_c2(struct cpuidle_device *dev,
++ struct cpuidle_state *state)
++{
++ struct acpi_processor *pr;
++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++ u32 t1, t2;
++ pr = processors[smp_processor_id()];
++
++ if (unlikely(!pr))
++ return 0;
++
++ if (pr->flags.bm_check)
++ acpi_idle_update_bm_rld(pr, cx);
++
++ local_irq_disable();
++ current_thread_info()->status &= ~TS_POLLING;
++ /*
++ * TS_POLLING-cleared state must be visible before we test
++ * NEED_RESCHED:
++ */
++ smp_mb();
++
++ if (unlikely(need_resched())) {
++ current_thread_info()->status |= TS_POLLING;
++ local_irq_enable();
++ return 0;
++ }
++
++ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++ acpi_state_timer_broadcast(pr, cx, 1);
++ acpi_idle_do_entry(cx);
++ t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++
++#ifdef CONFIG_GENERIC_TIME
++ /* TSC halts in C2, so notify users */
++ mark_tsc_unstable("possible TSC halt in C2");
+ #endif
++
++ local_irq_enable();
++ current_thread_info()->status |= TS_POLLING;
++
++ cx->usage++;
++
++ acpi_state_timer_broadcast(pr, cx, 0);
++ return ticks_elapsed(t1, t2);
++}
++
++static int c3_cpu_count;
++static DEFINE_SPINLOCK(c3_lock);
++
++/**
++ * acpi_idle_enter_c3 - enters an ACPI C3 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ *
++ * Similar to C2 entry, except special bus master handling is needed.
++ */
++static int acpi_idle_enter_c3(struct cpuidle_device *dev,
++ struct cpuidle_state *state)
++{
++ struct acpi_processor *pr;
++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++ u32 t1, t2;
++ pr = processors[smp_processor_id()];
++
++ if (unlikely(!pr))
++ return 0;
++
++ if (pr->flags.bm_check)
++ acpi_idle_update_bm_rld(pr, cx);
++
++ local_irq_disable();
++ current_thread_info()->status &= ~TS_POLLING;
++ /*
++ * TS_POLLING-cleared state must be visible before we test
++ * NEED_RESCHED:
++ */
++ smp_mb();
++
++ if (unlikely(need_resched())) {
++ current_thread_info()->status |= TS_POLLING;
++ local_irq_enable();
++ return 0;
++ }
++
++ /* disable bus master */
++ if (pr->flags.bm_check) {
++ spin_lock(&c3_lock);
++ c3_cpu_count++;
++ if (c3_cpu_count == num_online_cpus()) {
++ /*
++ * All CPUs are trying to go to C3
++ * Disable bus master arbitration
++ */
++ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
++ }
++ spin_unlock(&c3_lock);
++ } else {
++ /* SMP with no shared cache... Invalidate cache */
++ ACPI_FLUSH_CPU_CACHE();
++ }
++
++ /* Get start time (ticks) */
++ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++ acpi_state_timer_broadcast(pr, cx, 1);
++ acpi_idle_do_entry(cx);
++ t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++
++ if (pr->flags.bm_check) {
++ spin_lock(&c3_lock);
++ /* Enable bus master arbitration */
++ if (c3_cpu_count == num_online_cpus())
++ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
++ c3_cpu_count--;
++ spin_unlock(&c3_lock);
+ }
+
++#ifdef CONFIG_GENERIC_TIME
++ /* TSC halts in C3, so notify users */
++ mark_tsc_unstable("TSC halts in C3");
++#endif
++
++ local_irq_enable();
++ current_thread_info()->status |= TS_POLLING;
++
++ cx->usage++;
++
++ acpi_state_timer_broadcast(pr, cx, 0);
++ return ticks_elapsed(t1, t2);
++}
++
++/**
++ * acpi_idle_bm_check - checks if bus master activity was detected
++ */
++static int acpi_idle_bm_check(void)
++{
++ u32 bm_status = 0;
++
++ acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
++ if (bm_status)
++ acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
++ /*
++ * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
++ * the true state of bus mastering activity; forcing us to
++ * manually check the BMIDEA bit of each IDE channel.
++ */
++ else if (errata.piix4.bmisx) {
++ if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
++ || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
++ bm_status = 1;
++ }
++ return bm_status;
++}
++
++/**
++ * acpi_idle_init - attaches the driver to a CPU
++ * @dev: the CPU
++ */
++static int acpi_idle_init(struct cpuidle_device *dev)
++{
++ int cpu = dev->cpu;
++ int i, count = 0;
++ struct acpi_processor_cx *cx;
++ struct cpuidle_state *state;
++
++ struct acpi_processor *pr = processors[cpu];
++
++ if (!pr->flags.power_setup_done)
++ return -EINVAL;
++
++ if (pr->flags.power == 0) {
++ return -EINVAL;
++ }
++
++ for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
++ cx = &pr->power.states[i];
++ state = &dev->states[count];
++
++ if (!cx->valid)
++ continue;
++
++#ifdef CONFIG_HOTPLUG_CPU
++ if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
++ !pr->flags.has_cst &&
++ !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
++ continue;
++#endif
++ cpuidle_set_statedata(state, cx);
++
++ state->exit_latency = cx->latency;
++ state->target_residency = cx->latency * 6;
++ state->power_usage = cx->power;
++
++ state->flags = 0;
++ switch (cx->type) {
++ case ACPI_STATE_C1:
++ state->flags |= CPUIDLE_FLAG_SHALLOW;
++ state->enter = acpi_idle_enter_c1;
++ break;
++
++ case ACPI_STATE_C2:
++ state->flags |= CPUIDLE_FLAG_BALANCED;
++ state->flags |= CPUIDLE_FLAG_TIME_VALID;
++ state->enter = acpi_idle_enter_c2;
++ break;
++
++ case ACPI_STATE_C3:
++ state->flags |= CPUIDLE_FLAG_DEEP;
++ state->flags |= CPUIDLE_FLAG_TIME_VALID;
++ state->flags |= CPUIDLE_FLAG_CHECK_BM;
++ state->enter = acpi_idle_enter_c3;
++ break;
++ }
++
++ count++;
++ }
++
++ if (!count)
++ return -EINVAL;
++
++ dev->state_count = count;
+ return 0;
+ }
++
++struct cpuidle_driver acpi_idle_driver = {
++ .name = "acpi_idle",
++ .init = acpi_idle_init,
++ .redetect = acpi_idle_init,
++ .bm_check = acpi_idle_bm_check,
++ .owner = THIS_MODULE,
++};
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_throttling.c linux-2.6.22-591/drivers/acpi/processor_throttling.c
+--- linux-2.6.22-570/drivers/acpi/processor_throttling.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/processor_throttling.c 2007-12-21 15:36:11.000000000 -0500
+@@ -44,17 +44,231 @@
+ #define _COMPONENT ACPI_PROCESSOR_COMPONENT
+ ACPI_MODULE_NAME("processor_throttling");
+
++static int acpi_processor_get_throttling(struct acpi_processor *pr);
++int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
++
++static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
++{
++ acpi_status status = 0;
++ unsigned long tpc = 0;
++
++ if (!pr)
++ return -EINVAL;
++ status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc);
++ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC"));
++ return -ENODEV;
++ }
++ pr->throttling_platform_limit = (int)tpc;
++ return 0;
++}
++
++int acpi_processor_tstate_has_changed(struct acpi_processor *pr)
++{
++ return acpi_processor_get_platform_limit(pr);
++}
++
++/* --------------------------------------------------------------------------
++ _PTC, _TSS, _TSD support
++ -------------------------------------------------------------------------- */
++static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
++{
++ int result = 0;
++ acpi_status status = 0;
++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++ union acpi_object *ptc = NULL;
++ union acpi_object obj = { 0 };
++
++ status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer);
++ if (ACPI_FAILURE(status)) {
++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC"));
++ return -ENODEV;
++ }
++
++ ptc = (union acpi_object *)buffer.pointer;
++ if (!ptc || (ptc->type != ACPI_TYPE_PACKAGE)
++ || (ptc->package.count != 2)) {
++ printk(KERN_ERR PREFIX "Invalid _PTC data\n");
++ result = -EFAULT;
++ goto end;
++ }
++
++ /*
++ * control_register
++ */
++
++ obj = ptc->package.elements[0];
++
++ if ((obj.type != ACPI_TYPE_BUFFER)
++ || (obj.buffer.length < sizeof(struct acpi_ptc_register))
++ || (obj.buffer.pointer == NULL)) {
++ printk(KERN_ERR PREFIX
++ "Invalid _PTC data (control_register)\n");
++ result = -EFAULT;
++ goto end;
++ }
++ memcpy(&pr->throttling.control_register, obj.buffer.pointer,
++ sizeof(struct acpi_ptc_register));
++
++ /*
++ * status_register
++ */
++
++ obj = ptc->package.elements[1];
++
++ if ((obj.type != ACPI_TYPE_BUFFER)
++ || (obj.buffer.length < sizeof(struct acpi_ptc_register))
++ || (obj.buffer.pointer == NULL)) {
++ printk(KERN_ERR PREFIX "Invalid _PTC data (status_register)\n");
++ result = -EFAULT;
++ goto end;
++ }
++
++ memcpy(&pr->throttling.status_register, obj.buffer.pointer,
++ sizeof(struct acpi_ptc_register));
++
++ end:
++ kfree(buffer.pointer);
++
++ return result;
++}
++static int acpi_processor_get_throttling_states(struct acpi_processor *pr)
++{
++ int result = 0;
++ acpi_status status = AE_OK;
++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++ struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
++ struct acpi_buffer state = { 0, NULL };
++ union acpi_object *tss = NULL;
++ int i;
++
++ status = acpi_evaluate_object(pr->handle, "_TSS", NULL, &buffer);
++ if (ACPI_FAILURE(status)) {
++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS"));
++ return -ENODEV;
++ }
++
++ tss = buffer.pointer;
++ if (!tss || (tss->type != ACPI_TYPE_PACKAGE)) {
++ printk(KERN_ERR PREFIX "Invalid _TSS data\n");
++ result = -EFAULT;
++ goto end;
++ }
++
++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d throttling states\n",
++ tss->package.count));
++
++ pr->throttling.state_count = tss->package.count;
++ pr->throttling.states_tss =
++ kmalloc(sizeof(struct acpi_processor_tx_tss) * tss->package.count,
++ GFP_KERNEL);
++ if (!pr->throttling.states_tss) {
++ result = -ENOMEM;
++ goto end;
++ }
++
++ for (i = 0; i < pr->throttling.state_count; i++) {
++
++ struct acpi_processor_tx_tss *tx =
++ (struct acpi_processor_tx_tss *)&(pr->throttling.
++ states_tss[i]);
++
++ state.length = sizeof(struct acpi_processor_tx_tss);
++ state.pointer = tx;
++
++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Extracting state %d\n", i));
++
++ status = acpi_extract_package(&(tss->package.elements[i]),
++ &format, &state);
++ if (ACPI_FAILURE(status)) {
++ ACPI_EXCEPTION((AE_INFO, status, "Invalid _TSS data"));
++ result = -EFAULT;
++ kfree(pr->throttling.states_tss);
++ goto end;
++ }
++
++ if (!tx->freqpercentage) {
++ printk(KERN_ERR PREFIX
++ "Invalid _TSS data: freq is zero\n");
++ result = -EFAULT;
++ kfree(pr->throttling.states_tss);
++ goto end;
++ }
++ }
++
++ end:
++ kfree(buffer.pointer);
++
++ return result;
++}
++static int acpi_processor_get_tsd(struct acpi_processor *pr)
++{
++ int result = 0;
++ acpi_status status = AE_OK;
++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++ struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
++ struct acpi_buffer state = { 0, NULL };
++ union acpi_object *tsd = NULL;
++ struct acpi_tsd_package *pdomain;
++
++ status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer);
++ if (ACPI_FAILURE(status)) {
++ return -ENODEV;
++ }
++
++ tsd = buffer.pointer;
++ if (!tsd || (tsd->type != ACPI_TYPE_PACKAGE)) {
++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++ result = -EFAULT;
++ goto end;
++ }
++
++ if (tsd->package.count != 1) {
++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++ result = -EFAULT;
++ goto end;
++ }
++
++ pdomain = &(pr->throttling.domain_info);
++
++ state.length = sizeof(struct acpi_tsd_package);
++ state.pointer = pdomain;
++
++ status = acpi_extract_package(&(tsd->package.elements[0]),
++ &format, &state);
++ if (ACPI_FAILURE(status)) {
++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++ result = -EFAULT;
++ goto end;
++ }
++
++ if (pdomain->num_entries != ACPI_TSD_REV0_ENTRIES) {
++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:num_entries\n"));
++ result = -EFAULT;
++ goto end;
++ }
++
++ if (pdomain->revision != ACPI_TSD_REV0_REVISION) {
++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:revision\n"));
++ result = -EFAULT;
++ goto end;
++ }
++
++ end:
++ kfree(buffer.pointer);
++ return result;
++}
++
+ /* --------------------------------------------------------------------------
+ Throttling Control
+ -------------------------------------------------------------------------- */
+-static int acpi_processor_get_throttling(struct acpi_processor *pr)
++static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr)
+ {
+ int state = 0;
+ u32 value = 0;
+ u32 duty_mask = 0;
+ u32 duty_value = 0;
+
+-
+ if (!pr)
+ return -EINVAL;
+
+@@ -94,13 +308,114 @@
+ return 0;
+ }
+
+-int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
++static int acpi_read_throttling_status(struct acpi_processor_throttling
++ *throttling)
++{
++ int value = -1;
++ switch (throttling->status_register.space_id) {
++ case ACPI_ADR_SPACE_SYSTEM_IO:
++ acpi_os_read_port((acpi_io_address) throttling->status_register.
++ address, &value,
++ (u32) throttling->status_register.bit_width *
++ 8);
++ break;
++ case ACPI_ADR_SPACE_FIXED_HARDWARE:
++ printk(KERN_ERR PREFIX
++ "HARDWARE addr space,NOT supported yet\n");
++ break;
++ default:
++ printk(KERN_ERR PREFIX "Unknown addr space %d\n",
++ (u32) (throttling->status_register.space_id));
++ }
++ return value;
++}
++
++static int acpi_write_throttling_state(struct acpi_processor_throttling
++ *throttling, int value)
++{
++ int ret = -1;
++
++ switch (throttling->control_register.space_id) {
++ case ACPI_ADR_SPACE_SYSTEM_IO:
++ acpi_os_write_port((acpi_io_address) throttling->
++ control_register.address, value,
++ (u32) throttling->control_register.
++ bit_width * 8);
++ ret = 0;
++ break;
++ case ACPI_ADR_SPACE_FIXED_HARDWARE:
++ printk(KERN_ERR PREFIX
++ "HARDWARE addr space,NOT supported yet\n");
++ break;
++ default:
++ printk(KERN_ERR PREFIX "Unknown addr space %d\n",
++ (u32) (throttling->control_register.space_id));
++ }
++ return ret;
++}
++
++static int acpi_get_throttling_state(struct acpi_processor *pr, int value)
++{
++ int i;
++
++ for (i = 0; i < pr->throttling.state_count; i++) {
++ struct acpi_processor_tx_tss *tx =
++ (struct acpi_processor_tx_tss *)&(pr->throttling.
++ states_tss[i]);
++ if (tx->control == value)
++ break;
++ }
++ if (i > pr->throttling.state_count)
++ i = -1;
++ return i;
++}
++
++static int acpi_get_throttling_value(struct acpi_processor *pr, int state)
++{
++ int value = -1;
++ if (state >= 0 && state <= pr->throttling.state_count) {
++ struct acpi_processor_tx_tss *tx =
++ (struct acpi_processor_tx_tss *)&(pr->throttling.
++ states_tss[state]);
++ value = tx->control;
++ }
++ return value;
++}
++
++static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
++{
++ int state = 0;
++ u32 value = 0;
++
++ if (!pr)
++ return -EINVAL;
++
++ if (!pr->flags.throttling)
++ return -ENODEV;
++
++ pr->throttling.state = 0;
++ local_irq_disable();
++ value = acpi_read_throttling_status(&pr->throttling);
++ if (value >= 0) {
++ state = acpi_get_throttling_state(pr, value);
++ pr->throttling.state = state;
++ }
++ local_irq_enable();
++
++ return 0;
++}
++
++static int acpi_processor_get_throttling(struct acpi_processor *pr)
++{
++ return pr->throttling.acpi_processor_get_throttling(pr);
++}
++
++int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, int state)
+ {
+ u32 value = 0;
+ u32 duty_mask = 0;
+ u32 duty_value = 0;
+
+-
+ if (!pr)
+ return -EINVAL;
+
+@@ -113,6 +428,8 @@
+ if (state == pr->throttling.state)
+ return 0;
+
++ if (state < pr->throttling_platform_limit)
++ return -EPERM;
+ /*
+ * Calculate the duty_value and duty_mask.
+ */
+@@ -165,12 +482,50 @@
+ return 0;
+ }
+
++int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state)
++{
++ u32 value = 0;
++
++ if (!pr)
++ return -EINVAL;
++
++ if ((state < 0) || (state > (pr->throttling.state_count - 1)))
++ return -EINVAL;
++
++ if (!pr->flags.throttling)
++ return -ENODEV;
++
++ if (state == pr->throttling.state)
++ return 0;
++
++ if (state < pr->throttling_platform_limit)
++ return -EPERM;
++
++ local_irq_disable();
++
++ value = acpi_get_throttling_value(pr, state);
++ if (value >= 0) {
++ acpi_write_throttling_state(&pr->throttling, value);
++ pr->throttling.state = state;
++ }
++ local_irq_enable();
++
++ return 0;
++}
++
++int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
++{
++ return pr->throttling.acpi_processor_set_throttling(pr, state);
++}
++
+ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
+ {
+ int result = 0;
+ int step = 0;
+ int i = 0;
+-
++ int no_ptc = 0;
++ int no_tss = 0;
++ int no_tsd = 0;
+
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n",
+@@ -182,6 +537,21 @@
+ return -EINVAL;
+
+ /* TBD: Support ACPI 2.0 objects */
++ no_ptc = acpi_processor_get_throttling_control(pr);
++ no_tss = acpi_processor_get_throttling_states(pr);
++ no_tsd = acpi_processor_get_tsd(pr);
++
++ if (no_ptc || no_tss) {
++ pr->throttling.acpi_processor_get_throttling =
++ &acpi_processor_get_throttling_fadt;
++ pr->throttling.acpi_processor_set_throttling =
++ &acpi_processor_set_throttling_fadt;
++ } else {
++ pr->throttling.acpi_processor_get_throttling =
++ &acpi_processor_get_throttling_ptc;
++ pr->throttling.acpi_processor_set_throttling =
++ &acpi_processor_set_throttling_ptc;
++ }
+
+ if (!pr->throttling.address) {
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No throttling register\n"));
+@@ -262,7 +632,6 @@
+ int i = 0;
+ int result = 0;
+
+-
+ if (!pr)
+ goto end;
+
+@@ -280,15 +649,27 @@
+ }
+
+ seq_printf(seq, "state count: %d\n"
+- "active state: T%d\n",
+- pr->throttling.state_count, pr->throttling.state);
++ "active state: T%d\n"
++ "state available: T%d to T%d\n",
++ pr->throttling.state_count, pr->throttling.state,
++ pr->throttling_platform_limit,
++ pr->throttling.state_count - 1);
+
+ seq_puts(seq, "states:\n");
++ if (pr->throttling.acpi_processor_get_throttling ==
++ acpi_processor_get_throttling_fadt) {
+ for (i = 0; i < pr->throttling.state_count; i++)
+ seq_printf(seq, " %cT%d: %02d%%\n",
+ (i == pr->throttling.state ? '*' : ' '), i,
+ (pr->throttling.states[i].performance ? pr->
+ throttling.states[i].performance / 10 : 0));
++ } else {
++ for (i = 0; i < pr->throttling.state_count; i++)
++ seq_printf(seq, " %cT%d: %02d%%\n",
++ (i == pr->throttling.state ? '*' : ' '), i,
++ (int)pr->throttling.states_tss[i].
++ freqpercentage);
++ }
+
+ end:
+ return 0;
+@@ -301,7 +682,7 @@
+ PDE(inode)->data);
+ }
+
+-static ssize_t acpi_processor_write_throttling(struct file * file,
++static ssize_t acpi_processor_write_throttling(struct file *file,
+ const char __user * buffer,
+ size_t count, loff_t * data)
+ {
+@@ -310,7 +691,6 @@
+ struct acpi_processor *pr = m->private;
+ char state_string[12] = { '\0' };
+
+-
+ if (!pr || (count > sizeof(state_string) - 1))
+ return -EINVAL;
+
+diff -Nurb linux-2.6.22-570/drivers/acpi/sbs.c linux-2.6.22-591/drivers/acpi/sbs.c
+--- linux-2.6.22-570/drivers/acpi/sbs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/sbs.c 2007-12-21 15:36:11.000000000 -0500
+@@ -127,7 +127,7 @@
+ static struct acpi_driver acpi_sbs_driver = {
+ .name = "sbs",
+ .class = ACPI_SBS_CLASS,
+- .ids = ACPI_SBS_HID,
++ .ids = "ACPI0001,ACPI0005",
+ .ops = {
+ .add = acpi_sbs_add,
+ .remove = acpi_sbs_remove,
+@@ -176,10 +176,8 @@
+ };
+
+ struct acpi_sbs {
+- acpi_handle handle;
+ int base;
+ struct acpi_device *device;
+- struct acpi_ec_smbus *smbus;
+ struct mutex mutex;
+ int sbsm_present;
+ int sbsm_batteries_supported;
+@@ -511,7 +509,7 @@
+ "acpi_sbs_read_word() failed"));
+ goto end;
+ }
+-
++ sbs->sbsm_present = 1;
+ sbs->sbsm_batteries_supported = battery_system_info & 0x000f;
+
+ end:
+@@ -1630,13 +1628,12 @@
+ {
+ struct acpi_sbs *sbs = NULL;
+ int result = 0, remove_result = 0;
+- unsigned long sbs_obj;
+ int id;
+ acpi_status status = AE_OK;
+ unsigned long val;
+
+ status =
+- acpi_evaluate_integer(device->parent->handle, "_EC", NULL, &val);
++ acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
+ if (ACPI_FAILURE(status)) {
+ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC"));
+ return -EIO;
+@@ -1653,7 +1650,7 @@
+
+ sbs_mutex_lock(sbs);
+
+- sbs->base = (val & 0xff00ull) >> 8;
++ sbs->base = 0xff & (val >> 8);
+ sbs->device = device;
+
+ strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME);
+@@ -1665,24 +1662,10 @@
+ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed"));
+ goto end;
+ }
+- status = acpi_evaluate_integer(device->handle, "_SBS", NULL, &sbs_obj);
+- if (status) {
+- ACPI_EXCEPTION((AE_INFO, status,
+- "acpi_evaluate_integer() failed"));
+- result = -EIO;
+- goto end;
+- }
+- if (sbs_obj > 0) {
+- result = acpi_sbsm_get_info(sbs);
+- if (result) {
+- ACPI_EXCEPTION((AE_INFO, AE_ERROR,
+- "acpi_sbsm_get_info() failed"));
+- goto end;
+- }
+- sbs->sbsm_present = 1;
+- }
+
+- if (sbs->sbsm_present == 0) {
++ acpi_sbsm_get_info(sbs);
++
++ if (!sbs->sbsm_present) {
+ result = acpi_battery_add(sbs, 0);
+ if (result) {
+ ACPI_EXCEPTION((AE_INFO, AE_ERROR,
+@@ -1702,8 +1685,6 @@
+ }
+ }
+
+- sbs->handle = device->handle;
+-
+ init_timer(&sbs->update_timer);
+ result = acpi_check_update_proc(sbs);
+ if (result)
+diff -Nurb linux-2.6.22-570/drivers/acpi/system.c linux-2.6.22-591/drivers/acpi/system.c
+--- linux-2.6.22-570/drivers/acpi/system.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/system.c 2007-12-21 15:36:11.000000000 -0500
+@@ -39,15 +39,12 @@
+
+ #define ACPI_SYSTEM_CLASS "system"
+ #define ACPI_SYSTEM_DEVICE_NAME "System"
+-#define ACPI_SYSTEM_FILE_INFO "info"
+-#define ACPI_SYSTEM_FILE_EVENT "event"
+-#define ACPI_SYSTEM_FILE_DSDT "dsdt"
+-#define ACPI_SYSTEM_FILE_FADT "fadt"
+
+ /*
+ * Make ACPICA version work as module param
+ */
+-static int param_get_acpica_version(char *buffer, struct kernel_param *kp) {
++static int param_get_acpica_version(char *buffer, struct kernel_param *kp)
++{
+ int result;
+
+ result = sprintf(buffer, "%x", ACPI_CA_VERSION);
+@@ -58,9 +55,126 @@
+ module_param_call(acpica_version, NULL, param_get_acpica_version, NULL, 0444);
+
+ /* --------------------------------------------------------------------------
++ FS Interface (/sys)
++ -------------------------------------------------------------------------- */
++static LIST_HEAD(acpi_table_attr_list);
++static struct kobject tables_kobj;
++
++struct acpi_table_attr {
++ struct bin_attribute attr;
++ char name[8];
++ int instance;
++ struct list_head node;
++};
++
++static ssize_t acpi_table_show(struct kobject *kobj,
++ struct bin_attribute *bin_attr, char *buf,
++ loff_t offset, size_t count)
++{
++ struct acpi_table_attr *table_attr =
++ container_of(bin_attr, struct acpi_table_attr, attr);
++ struct acpi_table_header *table_header = NULL;
++ acpi_status status;
++ ssize_t ret_count = count;
++
++ status =
++ acpi_get_table(table_attr->name, table_attr->instance,
++ &table_header);
++ if (ACPI_FAILURE(status))
++ return -ENODEV;
++
++ if (offset >= table_header->length) {
++ ret_count = 0;
++ goto end;
++ }
++
++ if (offset + ret_count > table_header->length)
++ ret_count = table_header->length - offset;
++
++ memcpy(buf, ((char *)table_header) + offset, ret_count);
++
++ end:
++ return ret_count;
++}
++
++static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
++ struct acpi_table_header *table_header)
++{
++ struct acpi_table_header *header = NULL;
++ struct acpi_table_attr *attr = NULL;
++
++ memcpy(table_attr->name, table_header->signature, ACPI_NAME_SIZE);
++
++ list_for_each_entry(attr, &acpi_table_attr_list, node) {
++ if (!memcmp(table_header->signature, attr->name,
++ ACPI_NAME_SIZE))
++ if (table_attr->instance < attr->instance)
++ table_attr->instance = attr->instance;
++ }
++ table_attr->instance++;
++
++ if (table_attr->instance > 1 || (table_attr->instance == 1 &&
++ !acpi_get_table(table_header->
++ signature, 2,
++ &header)))
++ sprintf(table_attr->name + 4, "%d", table_attr->instance);
++
++ table_attr->attr.size = 0;
++ table_attr->attr.read = acpi_table_show;
++ table_attr->attr.attr.name = table_attr->name;
++ table_attr->attr.attr.mode = 0444;
++ table_attr->attr.attr.owner = THIS_MODULE;
++
++ return;
++}
++
++static int acpi_system_sysfs_init(void)
++{
++ struct acpi_table_attr *table_attr;
++ struct acpi_table_header *table_header = NULL;
++ int table_index = 0;
++ int result;
++
++ tables_kobj.parent = &acpi_subsys.kobj;
++ kobject_set_name(&tables_kobj, "tables");
++ result = kobject_register(&tables_kobj);
++ if (result)
++ return result;
++
++ do {
++ result = acpi_get_table_by_index(table_index, &table_header);
++ if (!result) {
++ table_index++;
++ table_attr = NULL;
++ table_attr =
++ kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL);
++ if (!table_attr)
++ return -ENOMEM;
++
++ acpi_table_attr_init(table_attr, table_header);
++ result =
++ sysfs_create_bin_file(&tables_kobj,
++ &table_attr->attr);
++ if (result) {
++ kfree(table_attr);
++ return result;
++ } else
++ list_add_tail(&table_attr->node,
++ &acpi_table_attr_list);
++ }
++ } while (!result);
++
++ return 0;
++}
++
++/* --------------------------------------------------------------------------
+ FS Interface (/proc)
+ -------------------------------------------------------------------------- */
+ #ifdef CONFIG_ACPI_PROCFS
++#define ACPI_SYSTEM_FILE_INFO "info"
++#define ACPI_SYSTEM_FILE_EVENT "event"
++#define ACPI_SYSTEM_FILE_DSDT "dsdt"
++#define ACPI_SYSTEM_FILE_FADT "fadt"
+
+ static int acpi_system_read_info(struct seq_file *seq, void *offset)
+ {
+@@ -80,7 +194,6 @@
+ .llseek = seq_lseek,
+ .release = single_release,
+ };
+-#endif
+
+ static ssize_t acpi_system_read_dsdt(struct file *, char __user *, size_t,
+ loff_t *);
+@@ -97,13 +210,11 @@
+ struct acpi_table_header *dsdt = NULL;
+ ssize_t res;
+
+-
+ status = acpi_get_table(ACPI_SIG_DSDT, 1, &dsdt);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+- res = simple_read_from_buffer(buffer, count, ppos,
+- dsdt, dsdt->length);
++ res = simple_read_from_buffer(buffer, count, ppos, dsdt, dsdt->length);
+
+ return res;
+ }
+@@ -123,28 +234,21 @@
+ struct acpi_table_header *fadt = NULL;
+ ssize_t res;
+
+-
+ status = acpi_get_table(ACPI_SIG_FADT, 1, &fadt);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+- res = simple_read_from_buffer(buffer, count, ppos,
+- fadt, fadt->length);
++ res = simple_read_from_buffer(buffer, count, ppos, fadt, fadt->length);
+
+ return res;
+ }
+
+-static int __init acpi_system_init(void)
++static int acpi_system_procfs_init(void)
+ {
+ struct proc_dir_entry *entry;
+ int error = 0;
+ char *name;
+
+-
+- if (acpi_disabled)
+- return 0;
+-
+-#ifdef CONFIG_ACPI_PROCFS
+ /* 'info' [R] */
+ name = ACPI_SYSTEM_FILE_INFO;
+ entry = create_proc_entry(name, S_IRUGO, acpi_root_dir);
+@@ -153,7 +257,6 @@
+ else {
+ entry->proc_fops = &acpi_system_info_ops;
+ }
+-#endif
+
+ /* 'dsdt' [R] */
+ name = ACPI_SYSTEM_FILE_DSDT;
+@@ -177,12 +280,32 @@
+ Error:
+ remove_proc_entry(ACPI_SYSTEM_FILE_FADT, acpi_root_dir);
+ remove_proc_entry(ACPI_SYSTEM_FILE_DSDT, acpi_root_dir);
+-#ifdef CONFIG_ACPI_PROCFS
+ remove_proc_entry(ACPI_SYSTEM_FILE_INFO, acpi_root_dir);
+-#endif
+
+ error = -EFAULT;
+ goto Done;
+ }
++#else
++static int acpi_system_procfs_init(void)
++{
++ return 0;
++}
++#endif
++
++static int __init acpi_system_init(void)
++{
++ int result = 0;
++
++ if (acpi_disabled)
++ return 0;
++
++ result = acpi_system_procfs_init();
++ if (result)
++ return result;
++
++ result = acpi_system_sysfs_init();
++
++ return result;
++}
+
+ subsys_initcall(acpi_system_init);
+diff -Nurb linux-2.6.22-570/drivers/acpi/thermal.c linux-2.6.22-591/drivers/acpi/thermal.c
+--- linux-2.6.22-570/drivers/acpi/thermal.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/thermal.c 2007-12-21 15:36:11.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/jiffies.h>
+ #include <linux/kmod.h>
+ #include <linux/seq_file.h>
++#include <linux/reboot.h>
+ #include <asm/uaccess.h>
+
+ #include <acpi/acpi_bus.h>
+@@ -59,7 +60,6 @@
+ #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
+ #define ACPI_THERMAL_NOTIFY_HOT 0xF1
+ #define ACPI_THERMAL_MODE_ACTIVE 0x00
+-#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
+
+ #define ACPI_THERMAL_MAX_ACTIVE 10
+ #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
+@@ -419,26 +419,6 @@
+ return 0;
+ }
+
+-static int acpi_thermal_call_usermode(char *path)
+-{
+- char *argv[2] = { NULL, NULL };
+- char *envp[3] = { NULL, NULL, NULL };
+-
+-
+- if (!path)
+- return -EINVAL;
+-
+- argv[0] = path;
+-
+- /* minimal command environment */
+- envp[0] = "HOME=/";
+- envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+-
+- call_usermodehelper(argv[0], argv, envp, 0);
+-
+- return 0;
+-}
+-
+ static int acpi_thermal_critical(struct acpi_thermal *tz)
+ {
+ if (!tz || !tz->trips.critical.flags.valid)
+@@ -456,7 +436,7 @@
+ acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
+ tz->trips.critical.flags.enabled);
+
+- acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
++ orderly_poweroff(true);
+
+ return 0;
+ }
+@@ -1114,7 +1094,6 @@
+ break;
+ case ACPI_THERMAL_NOTIFY_THRESHOLDS:
+ acpi_thermal_get_trip_points(tz);
+- acpi_thermal_check(tz);
+ acpi_bus_generate_event(device, event, 0);
+ break;
+ case ACPI_THERMAL_NOTIFY_DEVICES:
+diff -Nurb linux-2.6.22-570/drivers/acpi/utilities/uteval.c linux-2.6.22-591/drivers/acpi/utilities/uteval.c
+--- linux-2.6.22-570/drivers/acpi/utilities/uteval.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/utilities/uteval.c 2007-12-21 15:36:11.000000000 -0500
+@@ -62,16 +62,13 @@
+ static char *acpi_interfaces_supported[] = {
+ /* Operating System Vendor Strings */
+
+- "Windows 2000",
+- "Windows 2001",
+- "Windows 2001 SP0",
+- "Windows 2001 SP1",
+- "Windows 2001 SP2",
+- "Windows 2001 SP3",
+- "Windows 2001 SP4",
+- "Windows 2001.1",
+- "Windows 2001.1 SP1", /* Added 03/2006 */
+- "Windows 2006", /* Added 03/2006 */
++ "Windows 2000", /* Windows 2000 */
++ "Windows 2001", /* Windows XP */
++ "Windows 2001 SP1", /* Windows XP SP1 */
++ "Windows 2001 SP2", /* Windows XP SP2 */
++ "Windows 2001.1", /* Windows Server 2003 */
++ "Windows 2001.1 SP1", /* Windows Server 2003 SP1 - Added 03/2006 */
++ "Windows 2006", /* Windows Vista - Added 03/2006 */
+
+ /* Feature Group Strings */
+
+diff -Nurb linux-2.6.22-570/drivers/acpi/video.c linux-2.6.22-591/drivers/acpi/video.c
+--- linux-2.6.22-570/drivers/acpi/video.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/acpi/video.c 2007-12-21 15:36:14.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/seq_file.h>
+
+ #include <linux/backlight.h>
++#include <linux/video_output.h>
+ #include <asm/uaccess.h>
+
+ #include <acpi/acpi_bus.h>
+@@ -169,6 +170,7 @@
+ struct acpi_device *dev;
+ struct acpi_video_device_brightness *brightness;
+ struct backlight_device *backlight;
++ struct output_device *output_dev;
+ };
+
+ /* bus */
+@@ -272,6 +274,10 @@
+ u32 level_current, u32 event);
+ static void acpi_video_switch_brightness(struct acpi_video_device *device,
+ int event);
++static int acpi_video_device_get_state(struct acpi_video_device *device,
++ unsigned long *state);
++static int acpi_video_output_get(struct output_device *od);
++static int acpi_video_device_set_state(struct acpi_video_device *device, int state);
+
+ /*backlight device sysfs support*/
+ static int acpi_video_get_brightness(struct backlight_device *bd)
+@@ -297,6 +303,28 @@
+ .update_status = acpi_video_set_brightness,
+ };
+
++/*video output device sysfs support*/
++static int acpi_video_output_get(struct output_device *od)
++{
++ unsigned long state;
++ struct acpi_video_device *vd =
++ (struct acpi_video_device *)class_get_devdata(&od->class_dev);
++ acpi_video_device_get_state(vd, &state);
++ return (int)state;
++}
++
++static int acpi_video_output_set(struct output_device *od)
++{
++ unsigned long state = od->request_state;
++ struct acpi_video_device *vd=
++ (struct acpi_video_device *)class_get_devdata(&od->class_dev);
++ return acpi_video_device_set_state(vd, state);
++}
++
++static struct output_properties acpi_output_properties = {
++ .set_state = acpi_video_output_set,
++ .get_status = acpi_video_output_get,
++};
+ /* --------------------------------------------------------------------------
+ Video Management
+ -------------------------------------------------------------------------- */
+@@ -531,7 +559,6 @@
+
+ static void acpi_video_device_find_cap(struct acpi_video_device *device)
+ {
+- acpi_integer status;
+ acpi_handle h_dummy1;
+ int i;
+ u32 max_level = 0;
+@@ -565,9 +592,9 @@
+ device->cap._DSS = 1;
+ }
+
+- status = acpi_video_device_lcd_query_levels(device, &obj);
++ if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) {
+
+- if (obj && obj->type == ACPI_TYPE_PACKAGE && obj->package.count >= 2) {
++ if (obj->package.count >= 2) {
+ int count = 0;
+ union acpi_object *o;
+
+@@ -588,6 +615,7 @@
+ continue;
+ }
+ br->levels[count] = (u32) o->integer.value;
++
+ if (br->levels[count] > max_level)
+ max_level = br->levels[count];
+ count++;
+@@ -606,9 +634,13 @@
+ }
+ }
+
++ } else {
++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n"));
++ }
++
+ kfree(obj);
+
+- if (device->cap._BCL && device->cap._BCM && device->cap._BQC){
++ if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){
+ unsigned long tmp;
+ static int count = 0;
+ char *name;
+@@ -626,6 +658,17 @@
+
+ kfree(name);
+ }
++ if (device->cap._DCS && device->cap._DSS){
++ static int count = 0;
++ char *name;
++ name = kzalloc(MAX_NAME_LEN, GFP_KERNEL);
++ if (!name)
++ return;
++ sprintf(name, "acpi_video%d", count++);
++ device->output_dev = video_output_register(name,
++ NULL, device, &acpi_output_properties);
++ kfree(name);
++ }
+ return;
+ }
+
+@@ -1669,6 +1712,7 @@
+ ACPI_DEVICE_NOTIFY,
+ acpi_video_device_notify);
+ backlight_device_unregister(device->backlight);
++ video_output_unregister(device->output_dev);
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/atm/idt77252.c linux-2.6.22-591/drivers/atm/idt77252.c
+--- linux-2.6.22-570/drivers/atm/idt77252.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/atm/idt77252.c 2007-12-21 15:36:14.000000000 -0500
+@@ -3576,7 +3576,7 @@
+ * XXX: <hack>
+ */
+ sprintf(tname, "eth%d", card->index);
+- tmp = dev_get_by_name(tname); /* jhs: was "tmp = dev_get(tname);" */
++ tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */
+ if (tmp) {
+ memcpy(card->atmdev->esi, tmp->dev_addr, 6);
+
+diff -Nurb linux-2.6.22-570/drivers/base/bus.c linux-2.6.22-591/drivers/base/bus.c
+--- linux-2.6.22-570/drivers/base/bus.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/base/bus.c 2007-12-21 15:36:11.000000000 -0500
+@@ -562,7 +562,6 @@
+
+ bus->drivers_probe_attr.attr.name = "drivers_probe";
+ bus->drivers_probe_attr.attr.mode = S_IWUSR;
+- bus->drivers_probe_attr.attr.owner = bus->owner;
+ bus->drivers_probe_attr.store = store_drivers_probe;
+ retval = bus_create_file(bus, &bus->drivers_probe_attr);
+ if (retval)
+@@ -570,7 +569,6 @@
+
+ bus->drivers_autoprobe_attr.attr.name = "drivers_autoprobe";
+ bus->drivers_autoprobe_attr.attr.mode = S_IWUSR | S_IRUGO;
+- bus->drivers_autoprobe_attr.attr.owner = bus->owner;
+ bus->drivers_autoprobe_attr.show = show_drivers_autoprobe;
+ bus->drivers_autoprobe_attr.store = store_drivers_autoprobe;
+ retval = bus_create_file(bus, &bus->drivers_autoprobe_attr);
+diff -Nurb linux-2.6.22-570/drivers/base/class.c linux-2.6.22-591/drivers/base/class.c
+--- linux-2.6.22-570/drivers/base/class.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/base/class.c 2007-12-21 15:36:14.000000000 -0500
+@@ -134,6 +134,17 @@
+ }
+ }
+
++static int class_setup_shadowing(struct class *cls)
++{
++ const struct shadow_dir_operations *shadow_ops;
++
++ shadow_ops = cls->shadow_ops;
++ if (!shadow_ops)
++ return 0;
++
++ return sysfs_enable_shadowing(&cls->subsys.kobj, shadow_ops);
++}
++
+ int class_register(struct class * cls)
+ {
+ int error;
+@@ -152,11 +163,22 @@
+ subsys_set_kset(cls, class_subsys);
+
+ error = subsystem_register(&cls->subsys);
+- if (!error) {
+- error = add_class_attrs(class_get(cls));
+- class_put(cls);
+- }
++ if (error)
++ goto out;
++
++ error = class_setup_shadowing(cls);
++ if (error)
++ goto out_unregister;
++
++ error = add_class_attrs(cls);
++ if (error)
++ goto out_unregister;
++
++out:
+ return error;
++out_unregister:
++ subsystem_unregister(&cls->subsys);
++ goto out;
+ }
+
+ void class_unregister(struct class * cls)
+@@ -312,9 +334,6 @@
+
+ pr_debug("device class '%s': release.\n", cd->class_id);
+
+- kfree(cd->devt_attr);
+- cd->devt_attr = NULL;
+-
+ if (cd->release)
+ cd->release(cd);
+ else if (cls->release)
+@@ -547,6 +566,9 @@
+ return print_dev_t(buf, class_dev->devt);
+ }
+
++static struct class_device_attribute class_devt_attr =
++ __ATTR(dev, S_IRUGO, show_dev, NULL);
++
+ static ssize_t store_uevent(struct class_device *class_dev,
+ const char *buf, size_t count)
+ {
+@@ -554,6 +576,9 @@
+ return count;
+ }
+
++static struct class_device_attribute class_uevent_attr =
++ __ATTR(uevent, S_IWUSR, NULL, store_uevent);
++
+ void class_device_initialize(struct class_device *class_dev)
+ {
+ kobj_set_kset_s(class_dev, class_obj_subsys);
+@@ -603,34 +628,17 @@
+ &parent_class->subsys.kobj, "subsystem");
+ if (error)
+ goto out3;
+- class_dev->uevent_attr.attr.name = "uevent";
+- class_dev->uevent_attr.attr.mode = S_IWUSR;
+- class_dev->uevent_attr.attr.owner = parent_class->owner;
+- class_dev->uevent_attr.store = store_uevent;
+- error = class_device_create_file(class_dev, &class_dev->uevent_attr);
++
++ error = class_device_create_file(class_dev, &class_uevent_attr);
+ if (error)
+ goto out3;
+
+ if (MAJOR(class_dev->devt)) {
+- struct class_device_attribute *attr;
+- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+- if (!attr) {
+- error = -ENOMEM;
+- goto out4;
+- }
+- attr->attr.name = "dev";
+- attr->attr.mode = S_IRUGO;
+- attr->attr.owner = parent_class->owner;
+- attr->show = show_dev;
+- error = class_device_create_file(class_dev, attr);
+- if (error) {
+- kfree(attr);
++ error = class_device_create_file(class_dev, &class_devt_attr);
++ if (error)
+ goto out4;
+ }
+
+- class_dev->devt_attr = attr;
+- }
+-
+ error = class_device_add_attrs(class_dev);
+ if (error)
+ goto out5;
+@@ -671,10 +679,10 @@
+ out6:
+ class_device_remove_attrs(class_dev);
+ out5:
+- if (class_dev->devt_attr)
+- class_device_remove_file(class_dev, class_dev->devt_attr);
++ if (MAJOR(class_dev->devt))
++ class_device_remove_file(class_dev, &class_devt_attr);
+ out4:
+- class_device_remove_file(class_dev, &class_dev->uevent_attr);
++ class_device_remove_file(class_dev, &class_uevent_attr);
+ out3:
+ kobject_del(&class_dev->kobj);
+ out2:
+@@ -774,9 +782,9 @@
+ sysfs_remove_link(&class_dev->kobj, "device");
+ }
+ sysfs_remove_link(&class_dev->kobj, "subsystem");
+- class_device_remove_file(class_dev, &class_dev->uevent_attr);
+- if (class_dev->devt_attr)
+- class_device_remove_file(class_dev, class_dev->devt_attr);
++ class_device_remove_file(class_dev, &class_uevent_attr);
++ if (MAJOR(class_dev->devt))
++ class_device_remove_file(class_dev, &class_devt_attr);
+ class_device_remove_attrs(class_dev);
+ class_device_remove_groups(class_dev);
+
+diff -Nurb linux-2.6.22-570/drivers/base/core.c linux-2.6.22-591/drivers/base/core.c
+--- linux-2.6.22-570/drivers/base/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/base/core.c 2007-12-21 15:36:14.000000000 -0500
+@@ -310,6 +310,9 @@
+ return count;
+ }
+
++static struct device_attribute uevent_attr =
++ __ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent);
++
+ static int device_add_attributes(struct device *dev,
+ struct device_attribute *attrs)
+ {
+@@ -423,6 +426,9 @@
+ return print_dev_t(buf, dev->devt);
+ }
+
++static struct device_attribute devt_attr =
++ __ATTR(dev, S_IRUGO, show_dev, NULL);
++
+ /*
+ * devices_subsys - structure to be registered with kobject core.
+ */
+@@ -616,8 +622,14 @@
+ return kobj;
+
+ /* or create a new class-directory at the parent device */
+- return kobject_kset_add_dir(&dev->class->class_dirs,
++ kobj = kobject_kset_add_dir(&dev->class->class_dirs,
+ parent_kobj, dev->class->name);
++
++ /* If we created a new class-directory setup shadowing */
++ if (kobj && dev->class->shadow_ops)
++ sysfs_enable_shadowing(kobj, dev->class->shadow_ops);
++
++ return kobj;
+ }
+
+ if (parent)
+@@ -637,6 +649,82 @@
+ return 0;
+ }
+
++static int device_add_class_symlinks(struct device *dev)
++{
++ int error;
++
++ if (!dev->class)
++ return 0;
++ error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj,
++ "subsystem");
++ if (error)
++ goto out;
++ /*
++ * If this is not a "fake" compatible device, then create the
++ * symlink from the class to the device.
++ */
++ if (dev->kobj.parent != &dev->class->subsys.kobj) {
++ error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
++ dev->bus_id);
++ if (error)
++ goto out_subsys;
++ }
++ /* only bus-device parents get a "device"-link */
++ if (dev->parent && dev->parent->bus) {
++ error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
++ "device");
++ if (error)
++ goto out_busid;
++#ifdef CONFIG_SYSFS_DEPRECATED
++ {
++ char * class_name = make_class_name(dev->class->name,
++ &dev->kobj);
++ if (class_name)
++ error = sysfs_create_link(&dev->parent->kobj,
++ &dev->kobj, class_name);
++ kfree(class_name);
++ if (error)
++ goto out_device;
++ }
++#endif
++ }
++ return 0;
++
++#ifdef CONFIG_SYSFS_DEPRECATED
++out_device:
++ if (dev->parent)
++ sysfs_remove_link(&dev->kobj, "device");
++#endif
++out_busid:
++ if (dev->kobj.parent != &dev->class->subsys.kobj)
++ sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
++out_subsys:
++ sysfs_remove_link(&dev->kobj, "subsystem");
++out:
++ return error;
++}
++
++static void device_remove_class_symlinks(struct device *dev)
++{
++ if (!dev->class)
++ return;
++ if (dev->parent) {
++#ifdef CONFIG_SYSFS_DEPRECATED
++ char *class_name;
++
++ class_name = make_class_name(dev->class->name, &dev->kobj);
++ if (class_name) {
++ sysfs_remove_link(&dev->parent->kobj, class_name);
++ kfree(class_name);
++ }
++#endif
++ sysfs_remove_link(&dev->kobj, "device");
++ }
++ if (dev->kobj.parent != &dev->class->subsys.kobj)
++ sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
++ sysfs_remove_link(&dev->kobj, "subsystem");
++}
++
+ /**
+ * device_add - add device to device hierarchy.
+ * @dev: device.
+@@ -651,7 +739,6 @@
+ int device_add(struct device *dev)
+ {
+ struct device *parent = NULL;
+- char *class_name = NULL;
+ struct class_interface *class_intf;
+ int error = -EINVAL;
+
+@@ -681,58 +768,17 @@
+ blocking_notifier_call_chain(&dev->bus->bus_notifier,
+ BUS_NOTIFY_ADD_DEVICE, dev);
+
+- dev->uevent_attr.attr.name = "uevent";
+- dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR;
+- if (dev->driver)
+- dev->uevent_attr.attr.owner = dev->driver->owner;
+- dev->uevent_attr.store = store_uevent;
+- dev->uevent_attr.show = show_uevent;
+- error = device_create_file(dev, &dev->uevent_attr);
++ error = device_create_file(dev, &uevent_attr);
+ if (error)
+ goto attrError;
+
+ if (MAJOR(dev->devt)) {
+- struct device_attribute *attr;
+- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+- if (!attr) {
+- error = -ENOMEM;
+- goto ueventattrError;
+- }
+- attr->attr.name = "dev";
+- attr->attr.mode = S_IRUGO;
+- if (dev->driver)
+- attr->attr.owner = dev->driver->owner;
+- attr->show = show_dev;
+- error = device_create_file(dev, attr);
+- if (error) {
+- kfree(attr);
++ error = device_create_file(dev, &devt_attr);
++ if (error)
+ goto ueventattrError;
+ }
+-
+- dev->devt_attr = attr;
+- }
+-
+- if (dev->class) {
+- sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj,
+- "subsystem");
+- /* If this is not a "fake" compatible device, then create the
+- * symlink from the class to the device. */
+- if (dev->kobj.parent != &dev->class->subsys.kobj)
+- sysfs_create_link(&dev->class->subsys.kobj,
+- &dev->kobj, dev->bus_id);
+- if (parent) {
+- sysfs_create_link(&dev->kobj, &dev->parent->kobj,
+- "device");
+-#ifdef CONFIG_SYSFS_DEPRECATED
+- class_name = make_class_name(dev->class->name,
+- &dev->kobj);
+- if (class_name)
+- sysfs_create_link(&dev->parent->kobj,
+- &dev->kobj, class_name);
+-#endif
+- }
+- }
+-
++ if ((error = device_add_class_symlinks(dev)))
++ goto SymlinkError;
+ if ((error = device_add_attrs(dev)))
+ goto AttrsError;
+ if ((error = device_pm_add(dev)))
+@@ -756,7 +802,6 @@
+ up(&dev->class->sem);
+ }
+ Done:
+- kfree(class_name);
+ put_device(dev);
+ return error;
+ BusError:
+@@ -767,10 +812,10 @@
+ BUS_NOTIFY_DEL_DEVICE, dev);
+ device_remove_attrs(dev);
+ AttrsError:
+- if (dev->devt_attr) {
+- device_remove_file(dev, dev->devt_attr);
+- kfree(dev->devt_attr);
+- }
++ device_remove_class_symlinks(dev);
++ SymlinkError:
++ if (MAJOR(dev->devt))
++ device_remove_file(dev, &devt_attr);
+
+ if (dev->class) {
+ sysfs_remove_link(&dev->kobj, "subsystem");
+@@ -792,7 +837,7 @@
+ }
+ }
+ ueventattrError:
+- device_remove_file(dev, &dev->uevent_attr);
++ device_remove_file(dev, &uevent_attr);
+ attrError:
+ kobject_uevent(&dev->kobj, KOBJ_REMOVE);
+ kobject_del(&dev->kobj);
+@@ -869,17 +914,15 @@
+
+ if (parent)
+ klist_del(&dev->knode_parent);
+- if (dev->devt_attr) {
+- device_remove_file(dev, dev->devt_attr);
+- kfree(dev->devt_attr);
+- }
++ if (MAJOR(dev->devt))
++ device_remove_file(dev, &devt_attr);
+ if (dev->class) {
+ sysfs_remove_link(&dev->kobj, "subsystem");
+ /* If this is not a "fake" compatible device, remove the
+ * symlink from the class to the device. */
+ if (dev->kobj.parent != &dev->class->subsys.kobj)
+- sysfs_remove_link(&dev->class->subsys.kobj,
+- dev->bus_id);
++ sysfs_delete_link(&dev->class->subsys.kobj,
++ &dev->kobj, dev->bus_id);
+ if (parent) {
+ #ifdef CONFIG_SYSFS_DEPRECATED
+ char *class_name = make_class_name(dev->class->name,
+@@ -926,7 +969,7 @@
+ up(&dev->class->sem);
+ }
+ }
+- device_remove_file(dev, &dev->uevent_attr);
++ device_remove_file(dev, &uevent_attr);
+ device_remove_attrs(dev);
+ bus_remove_device(dev);
+
+@@ -1155,7 +1198,7 @@
+ {
+ char *old_class_name = NULL;
+ char *new_class_name = NULL;
+- char *old_symlink_name = NULL;
++ char *old_device_name = NULL;
+ int error;
+
+ dev = get_device(dev);
+@@ -1169,42 +1212,46 @@
+ old_class_name = make_class_name(dev->class->name, &dev->kobj);
+ #endif
+
+- if (dev->class) {
+- old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
+- if (!old_symlink_name) {
++ old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
++ if (!old_device_name) {
+ error = -ENOMEM;
+- goto out_free_old_class;
+- }
+- strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE);
++ goto out;
+ }
+-
++ strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
+ strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+
++ if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) {
++ error = sysfs_rename_link(&dev->class->subsys.kobj,
++ &dev->kobj, old_device_name, new_name);
++ if (error)
++ goto out;
++ }
++
+ error = kobject_rename(&dev->kobj, new_name);
++ if (error) {
++ strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
++ goto out;
++ }
+
+ #ifdef CONFIG_SYSFS_DEPRECATED
+ if (old_class_name) {
++ error = -ENOMEM;
+ new_class_name = make_class_name(dev->class->name, &dev->kobj);
+- if (new_class_name) {
+- sysfs_create_link(&dev->parent->kobj, &dev->kobj,
+- new_class_name);
+- sysfs_remove_link(&dev->parent->kobj, old_class_name);
+- }
+- }
+-#endif
++ if (!new_class_name)
++ goto out;
+
+- if (dev->class) {
+- sysfs_remove_link(&dev->class->subsys.kobj,
+- old_symlink_name);
+- sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
+- dev->bus_id);
++ error = sysfs_rename_link(&dev->parent->kobj, &dev->kobj,
++ old_class_name, new_class_name);
++ if (error)
++ goto out;
+ }
++#endif
++out:
+ put_device(dev);
+
+ kfree(new_class_name);
+- kfree(old_symlink_name);
+- out_free_old_class:
+ kfree(old_class_name);
++ kfree(old_device_name);
+
+ return error;
+ }
+diff -Nurb linux-2.6.22-570/drivers/base/dd.c linux-2.6.22-591/drivers/base/dd.c
+--- linux-2.6.22-570/drivers/base/dd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/base/dd.c 2007-12-23 01:39:35.000000000 -0500
+@@ -296,9 +296,8 @@
+ {
+ struct device_driver * drv;
+
+- drv = dev->driver;
++ drv = get_driver(dev->driver);
+ if (drv) {
+- get_driver(drv);
+ driver_sysfs_remove(dev);
+ sysfs_remove_link(&dev->kobj, "driver");
+ klist_remove(&dev->knode_driver);
+diff -Nurb linux-2.6.22-570/drivers/base/dd.c.orig linux-2.6.22-591/drivers/base/dd.c.orig
+--- linux-2.6.22-570/drivers/base/dd.c.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/base/dd.c.orig 2007-12-22 21:18:39.000000000 -0500
+@@ -0,0 +1,369 @@
++/*
++ * drivers/base/dd.c - The core device/driver interactions.
++ *
++ * This file contains the (sometimes tricky) code that controls the
++ * interactions between devices and drivers, which primarily includes
++ * driver binding and unbinding.
++ *
++ * All of this code used to exist in drivers/base/bus.c, but was
++ * relocated to here in the name of compartmentalization (since it wasn't
++ * strictly code just for the 'struct bus_type'.
++ *
++ * Copyright (c) 2002-5 Patrick Mochel
++ * Copyright (c) 2002-3 Open Source Development Labs
++ *
++ * This file is released under the GPLv2
++ */
++
++#include <linux/device.h>
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/wait.h>
++
++#include "base.h"
++#include "power/power.h"
++
++#define to_drv(node) container_of(node, struct device_driver, kobj.entry)
++
++
++static void driver_bound(struct device *dev)
++{
++ if (klist_node_attached(&dev->knode_driver)) {
++ printk(KERN_WARNING "%s: device %s already bound\n",
++ __FUNCTION__, kobject_name(&dev->kobj));
++ return;
++ }
++
++ pr_debug("bound device '%s' to driver '%s'\n",
++ dev->bus_id, dev->driver->name);
++
++ if (dev->bus)
++ blocking_notifier_call_chain(&dev->bus->bus_notifier,
++ BUS_NOTIFY_BOUND_DRIVER, dev);
++
++ klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices);
++}
++
++static int driver_sysfs_add(struct device *dev)
++{
++ int ret;
++
++ ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj,
++ kobject_name(&dev->kobj));
++ if (ret == 0) {
++ ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj,
++ "driver");
++ if (ret)
++ sysfs_remove_link(&dev->driver->kobj,
++ kobject_name(&dev->kobj));
++ }
++ return ret;
++}
++
++static void driver_sysfs_remove(struct device *dev)
++{
++ struct device_driver *drv = dev->driver;
++
++ if (drv) {
++ sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj));
++ sysfs_remove_link(&dev->kobj, "driver");
++ }
++}
++
++/**
++ * device_bind_driver - bind a driver to one device.
++ * @dev: device.
++ *
++ * Allow manual attachment of a driver to a device.
++ * Caller must have already set @dev->driver.
++ *
++ * Note that this does not modify the bus reference count
++ * nor take the bus's rwsem. Please verify those are accounted
++ * for before calling this. (It is ok to call with no other effort
++ * from a driver's probe() method.)
++ *
++ * This function must be called with @dev->sem held.
++ */
++int device_bind_driver(struct device *dev)
++{
++ int ret;
++
++ ret = driver_sysfs_add(dev);
++ if (!ret)
++ driver_bound(dev);
++ return ret;
++}
++
++static atomic_t probe_count = ATOMIC_INIT(0);
++static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
++
++static int really_probe(struct device *dev, struct device_driver *drv)
++{
++ int ret = 0;
++
++ atomic_inc(&probe_count);
++ pr_debug("%s: Probing driver %s with device %s\n",
++ drv->bus->name, drv->name, dev->bus_id);
++ WARN_ON(!list_empty(&dev->devres_head));
++
++ dev->driver = drv;
++ if (driver_sysfs_add(dev)) {
++ printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
++ __FUNCTION__, dev->bus_id);
++ goto probe_failed;
++ }
++
++ if (dev->bus->probe) {
++ ret = dev->bus->probe(dev);
++ if (ret)
++ goto probe_failed;
++ } else if (drv->probe) {
++ ret = drv->probe(dev);
++ if (ret)
++ goto probe_failed;
++ }
++
++ driver_bound(dev);
++ ret = 1;
++ pr_debug("%s: Bound Device %s to Driver %s\n",
++ drv->bus->name, dev->bus_id, drv->name);
++ goto done;
++
++probe_failed:
++ devres_release_all(dev);
++ driver_sysfs_remove(dev);
++ dev->driver = NULL;
++
++ if (ret != -ENODEV && ret != -ENXIO) {
++ /* driver matched but the probe failed */
++ printk(KERN_WARNING
++ "%s: probe of %s failed with error %d\n",
++ drv->name, dev->bus_id, ret);
++ }
++ /*
++ * Ignore errors returned by ->probe so that the next driver can try
++ * its luck.
++ */
++ ret = 0;
++done:
++ atomic_dec(&probe_count);
++ wake_up(&probe_waitqueue);
++ return ret;
++}
++
++/**
++ * driver_probe_done
++ * Determine if the probe sequence is finished or not.
++ *
++ * Should somehow figure out how to use a semaphore, not an atomic variable...
++ */
++int driver_probe_done(void)
++{
++ pr_debug("%s: probe_count = %d\n", __FUNCTION__,
++ atomic_read(&probe_count));
++ if (atomic_read(&probe_count))
++ return -EBUSY;
++ return 0;
++}
++
++/**
++ * driver_probe_device - attempt to bind device & driver together
++ * @drv: driver to bind a device to
++ * @dev: device to try to bind to the driver
++ *
++ * First, we call the bus's match function, if one present, which should
++ * compare the device IDs the driver supports with the device IDs of the
++ * device. Note we don't do this ourselves because we don't know the
++ * format of the ID structures, nor what is to be considered a match and
++ * what is not.
++ *
++ * This function returns 1 if a match is found, -ENODEV if the device is
++ * not registered, and 0 otherwise.
++ *
++ * This function must be called with @dev->sem held. When called for a
++ * USB interface, @dev->parent->sem must be held as well.
++ */
++int driver_probe_device(struct device_driver * drv, struct device * dev)
++{
++ int ret = 0;
++
++ if (!device_is_registered(dev))
++ return -ENODEV;
++ if (drv->bus->match && !drv->bus->match(dev, drv))
++ goto done;
++
++ pr_debug("%s: Matched Device %s with Driver %s\n",
++ drv->bus->name, dev->bus_id, drv->name);
++
++ ret = really_probe(dev, drv);
++
++done:
++ return ret;
++}
++
++static int __device_attach(struct device_driver * drv, void * data)
++{
++ struct device * dev = data;
++ return driver_probe_device(drv, dev);
++}
++
++/**
++ * device_attach - try to attach device to a driver.
++ * @dev: device.
++ *
++ * Walk the list of drivers that the bus has and call
++ * driver_probe_device() for each pair. If a compatible
++ * pair is found, break out and return.
++ *
++ * Returns 1 if the device was bound to a driver;
++ * 0 if no matching device was found;
++ * -ENODEV if the device is not registered.
++ *
++ * When called for a USB interface, @dev->parent->sem must be held.
++ */
++int device_attach(struct device * dev)
++{
++ int ret = 0;
++
++ down(&dev->sem);
++ if (dev->driver) {
++ ret = device_bind_driver(dev);
++ if (ret == 0)
++ ret = 1;
++ else {
++ dev->driver = NULL;
++ ret = 0;
++ }
++ } else {
++ ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
++ }
++ up(&dev->sem);
++ return ret;
++}
++
++static int __driver_attach(struct device * dev, void * data)
++{
++ struct device_driver * drv = data;
++
++ /*
++ * Lock device and try to bind to it. We drop the error
++ * here and always return 0, because we need to keep trying
++ * to bind to devices and some drivers will return an error
++ * simply if it didn't support the device.
++ *
++ * driver_probe_device() will spit a warning if there
++ * is an error.
++ */
++
++ if (dev->parent) /* Needed for USB */
++ down(&dev->parent->sem);
++ down(&dev->sem);
++ if (!dev->driver)
++ driver_probe_device(drv, dev);
++ up(&dev->sem);
++ if (dev->parent)
++ up(&dev->parent->sem);
++
++ return 0;
++}
++
++/**
++ * driver_attach - try to bind driver to devices.
++ * @drv: driver.
++ *
++ * Walk the list of devices that the bus has on it and try to
++ * match the driver with each one. If driver_probe_device()
++ * returns 0 and the @dev->driver is set, we've found a
++ * compatible pair.
++ */
++int driver_attach(struct device_driver * drv)
++{
++ return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
++}
++
++/**
++ * device_release_driver - manually detach device from driver.
++ * @dev: device.
++ *
++ * Manually detach device from driver.
++ *
++ * __device_release_driver() must be called with @dev->sem held.
++ * When called for a USB interface, @dev->parent->sem must be held
++ * as well.
++ */
++
++static void __device_release_driver(struct device * dev)
++{
++ struct device_driver * drv;
++
++ drv = dev->driver;
++ if (drv) {
++ get_driver(drv);
++ driver_sysfs_remove(dev);
++ sysfs_remove_link(&dev->kobj, "driver");
++ klist_remove(&dev->knode_driver);
++
++ if (dev->bus)
++ blocking_notifier_call_chain(&dev->bus->bus_notifier,
++ BUS_NOTIFY_UNBIND_DRIVER,
++ dev);
++
++ if (dev->bus && dev->bus->remove)
++ dev->bus->remove(dev);
++ else if (drv->remove)
++ drv->remove(dev);
++ devres_release_all(dev);
++ dev->driver = NULL;
++ put_driver(drv);
++ }
++}
++
++void device_release_driver(struct device * dev)
++{
++ /*
++ * If anyone calls device_release_driver() recursively from
++ * within their ->remove callback for the same device, they
++ * will deadlock right here.
++ */
++ down(&dev->sem);
++ __device_release_driver(dev);
++ up(&dev->sem);
++}
++
++
++/**
++ * driver_detach - detach driver from all devices it controls.
++ * @drv: driver.
++ */
++void driver_detach(struct device_driver * drv)
++{
++ struct device * dev;
++
++ for (;;) {
++ spin_lock(&drv->klist_devices.k_lock);
++ if (list_empty(&drv->klist_devices.k_list)) {
++ spin_unlock(&drv->klist_devices.k_lock);
++ break;
++ }
++ dev = list_entry(drv->klist_devices.k_list.prev,
++ struct device, knode_driver.n_node);
++ get_device(dev);
++ spin_unlock(&drv->klist_devices.k_lock);
++
++ if (dev->parent) /* Needed for USB */
++ down(&dev->parent->sem);
++ down(&dev->sem);
++ if (dev->driver == drv)
++ __device_release_driver(dev);
++ up(&dev->sem);
++ if (dev->parent)
++ up(&dev->parent->sem);
++ put_device(dev);
++ }
++}
++
++EXPORT_SYMBOL_GPL(device_bind_driver);
++EXPORT_SYMBOL_GPL(device_release_driver);
++EXPORT_SYMBOL_GPL(device_attach);
++EXPORT_SYMBOL_GPL(driver_attach);
++
+diff -Nurb linux-2.6.22-570/drivers/base/firmware_class.c linux-2.6.22-591/drivers/base/firmware_class.c
+--- linux-2.6.22-570/drivers/base/firmware_class.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/base/firmware_class.c 2007-12-21 15:36:11.000000000 -0500
+@@ -175,7 +175,7 @@
+ static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
+
+ static ssize_t
+-firmware_data_read(struct kobject *kobj,
++firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+ char *buffer, loff_t offset, size_t count)
+ {
+ struct device *dev = to_dev(kobj);
+@@ -240,7 +240,7 @@
+ * the driver as a firmware image.
+ **/
+ static ssize_t
+-firmware_data_write(struct kobject *kobj,
++firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+ char *buffer, loff_t offset, size_t count)
+ {
+ struct device *dev = to_dev(kobj);
+@@ -271,7 +271,7 @@
+ }
+
+ static struct bin_attribute firmware_attr_data_tmpl = {
+- .attr = {.name = "data", .mode = 0644, .owner = THIS_MODULE},
++ .attr = {.name = "data", .mode = 0644},
+ .size = 0,
+ .read = firmware_data_read,
+ .write = firmware_data_write,
+diff -Nurb linux-2.6.22-570/drivers/block/acsi_slm.c linux-2.6.22-591/drivers/block/acsi_slm.c
+--- linux-2.6.22-570/drivers/block/acsi_slm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/block/acsi_slm.c 2007-12-21 15:36:11.000000000 -0500
+@@ -367,7 +367,7 @@
+ int length;
+ int end;
+
+- if (!(page = __get_free_page( GFP_KERNEL )))
++ if (!(page = __get_free_page(GFP_TEMPORARY)))
+ return( -ENOMEM );
+
+ length = slm_getstats( (char *)page, iminor(node) );
+diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoecmd.c linux-2.6.22-591/drivers/block/aoe/aoecmd.c
+--- linux-2.6.22-570/drivers/block/aoe/aoecmd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/block/aoe/aoecmd.c 2007-12-21 15:36:14.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netdevice.h>
+ #include <linux/genhd.h>
++#include <net/net_namespace.h>
+ #include <asm/unaligned.h>
+ #include "aoe.h"
+
+@@ -194,7 +195,7 @@
+ sl = sl_tail = NULL;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(ifp) {
++ for_each_netdev(&init_net, ifp) {
+ dev_hold(ifp);
+ if (!is_aoe_netif(ifp))
+ goto cont;
+diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoenet.c linux-2.6.22-591/drivers/block/aoe/aoenet.c
+--- linux-2.6.22-570/drivers/block/aoe/aoenet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/block/aoe/aoenet.c 2007-12-21 15:36:14.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/netdevice.h>
+ #include <linux/moduleparam.h>
++#include <net/net_namespace.h>
+ #include <asm/unaligned.h>
+ #include "aoe.h"
+
+@@ -114,6 +115,9 @@
+ struct aoe_hdr *h;
+ u32 n;
+
++ if (ifp->nd_net != &init_net)
++ goto exit;
++
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return 0;
+diff -Nurb linux-2.6.22-570/drivers/block/cciss_scsi.c linux-2.6.22-591/drivers/block/cciss_scsi.c
+--- linux-2.6.22-570/drivers/block/cciss_scsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/block/cciss_scsi.c 2007-12-21 15:36:11.000000000 -0500
+@@ -555,7 +555,6 @@
+ {
+ struct scsi_cmnd *cmd;
+ ctlr_info_t *ctlr;
+- u64bit addr64;
+ ErrorInfo_struct *ei;
+
+ ei = cp->err_info;
+@@ -569,20 +568,7 @@
+ cmd = (struct scsi_cmnd *) cp->scsi_cmd;
+ ctlr = hba[cp->ctlr];
+
+- /* undo the DMA mappings */
+-
+- if (cmd->use_sg) {
+- pci_unmap_sg(ctlr->pdev,
+- cmd->request_buffer, cmd->use_sg,
+- cmd->sc_data_direction);
+- }
+- else if (cmd->request_bufflen) {
+- addr64.val32.lower = cp->SG[0].Addr.lower;
+- addr64.val32.upper = cp->SG[0].Addr.upper;
+- pci_unmap_single(ctlr->pdev, (dma_addr_t) addr64.val,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+
+ cmd->result = (DID_OK << 16); /* host byte */
+ cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */
+@@ -597,7 +583,7 @@
+ ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
+ SCSI_SENSE_BUFFERSIZE :
+ ei->SenseLen);
+- cmd->resid = ei->ResidualCnt;
++ scsi_set_resid(cmd, ei->ResidualCnt);
+
+ if(ei->CommandStatus != 0)
+ { /* an error has occurred */
+@@ -1204,46 +1190,29 @@
+ CommandList_struct *cp,
+ struct scsi_cmnd *cmd)
+ {
+- unsigned int use_sg, nsegs=0, len;
+- struct scatterlist *scatter = (struct scatterlist *) cmd->request_buffer;
++ unsigned int len;
++ struct scatterlist *sg;
+ __u64 addr64;
++ int use_sg, i;
+
+- /* is it just one virtual address? */
+- if (!cmd->use_sg) {
+- if (cmd->request_bufflen) { /* anything to xfer? */
+-
+- addr64 = (__u64) pci_map_single(pdev,
+- cmd->request_buffer,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
++ BUG_ON(scsi_sg_count(cmd) > MAXSGENTRIES);
+
+- cp->SG[0].Addr.lower =
++ use_sg = scsi_dma_map(cmd);
++ if (use_sg) { /* not too many addrs? */
++ scsi_for_each_sg(cmd, sg, use_sg, i) {
++ addr64 = (__u64) sg_dma_address(sg);
++ len = sg_dma_len(sg);
++ cp->SG[i].Addr.lower =
+ (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
+- cp->SG[0].Addr.upper =
++ cp->SG[i].Addr.upper =
+ (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
+- cp->SG[0].Len = cmd->request_bufflen;
+- nsegs=1;
++ cp->SG[i].Len = len;
++ cp->SG[i].Ext = 0; // we are not chaining
+ }
+- } /* else, must be a list of virtual addresses.... */
+- else if (cmd->use_sg <= MAXSGENTRIES) { /* not too many addrs? */
+-
+- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg,
+- cmd->sc_data_direction);
+-
+- for (nsegs=0; nsegs < use_sg; nsegs++) {
+- addr64 = (__u64) sg_dma_address(&scatter[nsegs]);
+- len = sg_dma_len(&scatter[nsegs]);
+- cp->SG[nsegs].Addr.lower =
+- (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
+- cp->SG[nsegs].Addr.upper =
+- (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
+- cp->SG[nsegs].Len = len;
+- cp->SG[nsegs].Ext = 0; // we are not chaining
+ }
+- } else BUG();
+
+- cp->Header.SGList = (__u8) nsegs; /* no. SGs contig in this cmd */
+- cp->Header.SGTotal = (__u16) nsegs; /* total sgs in this cmd list */
++ cp->Header.SGList = (__u8) use_sg; /* no. SGs contig in this cmd */
++ cp->Header.SGTotal = (__u16) use_sg; /* total sgs in this cmd list */
+ return;
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/block/loop.c linux-2.6.22-591/drivers/block/loop.c
+--- linux-2.6.22-570/drivers/block/loop.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/drivers/block/loop.c 2007-12-21 15:36:11.000000000 -0500
+@@ -68,6 +68,7 @@
+ #include <linux/loop.h>
+ #include <linux/compat.h>
+ #include <linux/suspend.h>
++#include <linux/freezer.h>
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h> /* for invalidate_bdev() */
+ #include <linux/completion.h>
+@@ -577,13 +578,6 @@
+ struct loop_device *lo = data;
+ struct bio *bio;
+
+- /*
+- * loop can be used in an encrypted device,
+- * hence, it mustn't be stopped at all
+- * because it could be indirectly used during suspension
+- */
+- current->flags |= PF_NOFREEZE;
+-
+ set_user_nice(current, -20);
+
+ while (!kthread_should_stop() || lo->lo_bio) {
+diff -Nurb linux-2.6.22-570/drivers/block/pktcdvd.c linux-2.6.22-591/drivers/block/pktcdvd.c
+--- linux-2.6.22-570/drivers/block/pktcdvd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/block/pktcdvd.c 2007-12-21 15:36:11.000000000 -0500
+@@ -146,8 +146,7 @@
+ **********************************************************/
+
+ #define DEF_ATTR(_obj,_name,_mode) \
+- static struct attribute _obj = { \
+- .name = _name, .owner = THIS_MODULE, .mode = _mode }
++ static struct attribute _obj = { .name = _name, .mode = _mode }
+
+ /**********************************************************
+ /sys/class/pktcdvd/pktcdvd[0-7]/
+@@ -1594,6 +1593,7 @@
+ long min_sleep_time, residue;
+
+ set_user_nice(current, -20);
++ set_freezable();
+
+ for (;;) {
+ DECLARE_WAITQUEUE(wait, current);
+diff -Nurb linux-2.6.22-570/drivers/char/apm-emulation.c linux-2.6.22-591/drivers/char/apm-emulation.c
+--- linux-2.6.22-570/drivers/char/apm-emulation.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/char/apm-emulation.c 2007-12-21 15:36:11.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/sched.h>
+ #include <linux/pm.h>
+ #include <linux/apm-emulation.h>
++#include <linux/freezer.h>
+ #include <linux/device.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
+@@ -329,13 +330,8 @@
+ /*
+ * Wait for the suspend/resume to complete. If there
+ * are pending acknowledges, we wait here for them.
+- *
+- * Note: we need to ensure that the PM subsystem does
+- * not kick us out of the wait when it suspends the
+- * threads.
+ */
+ flags = current->flags;
+- current->flags |= PF_NOFREEZE;
+
+ wait_event(apm_suspend_waitqueue,
+ as->suspend_state == SUSPEND_DONE);
+@@ -365,13 +361,8 @@
+ /*
+ * Wait for the suspend/resume to complete. If there
+ * are pending acknowledges, we wait here for them.
+- *
+- * Note: we need to ensure that the PM subsystem does
+- * not kick us out of the wait when it suspends the
+- * threads.
+ */
+ flags = current->flags;
+- current->flags |= PF_NOFREEZE;
+
+ wait_event_interruptible(apm_suspend_waitqueue,
+ as->suspend_state == SUSPEND_DONE);
+@@ -598,7 +589,6 @@
+ kapmd_tsk = NULL;
+ return ret;
+ }
+- kapmd_tsk->flags |= PF_NOFREEZE;
+ wake_up_process(kapmd_tsk);
+
+ #ifdef CONFIG_PROC_FS
+diff -Nurb linux-2.6.22-570/drivers/char/hvc_console.c linux-2.6.22-591/drivers/char/hvc_console.c
+--- linux-2.6.22-570/drivers/char/hvc_console.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/char/hvc_console.c 2007-12-21 15:36:11.000000000 -0500
+@@ -674,11 +674,12 @@
+ * calling hvc_poll() who determines whether a console adapter support
+ * interrupts.
+ */
+-int khvcd(void *unused)
++static int khvcd(void *unused)
+ {
+ int poll_mask;
+ struct hvc_struct *hp;
+
++ set_freezable();
+ __set_current_state(TASK_RUNNING);
+ do {
+ poll_mask = 0;
+diff -Nurb linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c
+--- linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c 2007-12-21 15:36:11.000000000 -0500
+@@ -2171,52 +2171,42 @@
+ int err;
+
+ bmc->device_id_attr.attr.name = "device_id";
+- bmc->device_id_attr.attr.owner = THIS_MODULE;
+ bmc->device_id_attr.attr.mode = S_IRUGO;
+ bmc->device_id_attr.show = device_id_show;
+
+ bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs";
+- bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE;
+ bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO;
+ bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show;
+
+ bmc->revision_attr.attr.name = "revision";
+- bmc->revision_attr.attr.owner = THIS_MODULE;
+ bmc->revision_attr.attr.mode = S_IRUGO;
+ bmc->revision_attr.show = revision_show;
+
+ bmc->firmware_rev_attr.attr.name = "firmware_revision";
+- bmc->firmware_rev_attr.attr.owner = THIS_MODULE;
+ bmc->firmware_rev_attr.attr.mode = S_IRUGO;
+ bmc->firmware_rev_attr.show = firmware_rev_show;
+
+ bmc->version_attr.attr.name = "ipmi_version";
+- bmc->version_attr.attr.owner = THIS_MODULE;
+ bmc->version_attr.attr.mode = S_IRUGO;
+ bmc->version_attr.show = ipmi_version_show;
+
+ bmc->add_dev_support_attr.attr.name = "additional_device_support";
+- bmc->add_dev_support_attr.attr.owner = THIS_MODULE;
+ bmc->add_dev_support_attr.attr.mode = S_IRUGO;
+ bmc->add_dev_support_attr.show = add_dev_support_show;
+
+ bmc->manufacturer_id_attr.attr.name = "manufacturer_id";
+- bmc->manufacturer_id_attr.attr.owner = THIS_MODULE;
+ bmc->manufacturer_id_attr.attr.mode = S_IRUGO;
+ bmc->manufacturer_id_attr.show = manufacturer_id_show;
+
+ bmc->product_id_attr.attr.name = "product_id";
+- bmc->product_id_attr.attr.owner = THIS_MODULE;
+ bmc->product_id_attr.attr.mode = S_IRUGO;
+ bmc->product_id_attr.show = product_id_show;
+
+ bmc->guid_attr.attr.name = "guid";
+- bmc->guid_attr.attr.owner = THIS_MODULE;
+ bmc->guid_attr.attr.mode = S_IRUGO;
+ bmc->guid_attr.show = guid_show;
+
+ bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision";
+- bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE;
+ bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO;
+ bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show;
+
+diff -Nurb linux-2.6.22-570/drivers/char/keyboard.c linux-2.6.22-591/drivers/char/keyboard.c
+--- linux-2.6.22-570/drivers/char/keyboard.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/char/keyboard.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1150,6 +1150,7 @@
+ sysrq_down = 0;
+ if (sysrq_down && down && !rep) {
+ handle_sysrq(kbd_sysrq_xlate[keycode], tty);
++ sysrq_down = 0; /* In case we miss the 'up' event. */
+ return;
+ }
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/connector/connector.c linux-2.6.22-591/drivers/connector/connector.c
+--- linux-2.6.22-570/drivers/connector/connector.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/connector/connector.c 2007-12-21 15:36:14.000000000 -0500
+@@ -446,7 +446,7 @@
+ dev->id.idx = cn_idx;
+ dev->id.val = cn_val;
+
+- dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
++ dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
+ CN_NETLINK_USERS + 0xf,
+ dev->input, NULL, THIS_MODULE);
+ if (!dev->nls)
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c
+--- linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c 2007-12-21 15:36:11.000000000 -0500
+@@ -25,8 +25,7 @@
+
+ #define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \
+ static struct freq_attr _attr_##_name = {\
+- .attr = {.name = __stringify(_name), .owner = THIS_MODULE, \
+- .mode = _mode, }, \
++ .attr = {.name = __stringify(_name), .mode = _mode, }, \
+ .show = _show,\
+ };
+
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c
+--- linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c 2007-12-21 15:36:11.000000000 -0500
+@@ -120,7 +120,7 @@
+
+ static struct freq_attr freq_attr_scaling_setspeed =
+ {
+- .attr = { .name = "scaling_setspeed", .mode = 0644, .owner = THIS_MODULE },
++ .attr = { .name = "scaling_setspeed", .mode = 0644 },
+ .show = show_speed,
+ .store = store_speed,
+ };
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/freq_table.c linux-2.6.22-591/drivers/cpufreq/freq_table.c
+--- linux-2.6.22-570/drivers/cpufreq/freq_table.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/cpufreq/freq_table.c 2007-12-21 15:36:11.000000000 -0500
+@@ -199,7 +199,6 @@
+ struct freq_attr cpufreq_freq_attr_scaling_available_freqs = {
+ .attr = { .name = "scaling_available_frequencies",
+ .mode = 0444,
+- .owner=THIS_MODULE
+ },
+ .show = show_available_freqs,
+ };
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/Kconfig linux-2.6.22-591/drivers/cpuidle/Kconfig
+--- linux-2.6.22-570/drivers/cpuidle/Kconfig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,39 @@
++menu "CPU idle PM support"
++
++config CPU_IDLE
++ bool "CPU idle PM support"
++ help
++ CPU idle is a generic framework for supporting software-controlled
++ idle processor power management. It includes modular cross-platform
++ governors that can be swapped during runtime.
++
++ If you're using a mobile platform that supports CPU idle PM (e.g.
++ an ACPI-capable notebook), you should say Y here.
++
++if CPU_IDLE
++
++comment "Governors"
++
++config CPU_IDLE_GOV_LADDER
++ tristate "'ladder' governor"
++ depends on CPU_IDLE
++ default y
++ help
++ This cpuidle governor promotes and demotes through the supported idle
++ states using residency time and bus master activity as metrics. This
++ algorithm was originally introduced in the old ACPI processor driver.
++
++config CPU_IDLE_GOV_MENU
++ tristate "'menu' governor"
++ depends on CPU_IDLE && NO_HZ
++ default y
++ help
++ This cpuidle governor evaluates all available states and chooses the
++ deepest state that meets all of the following constraints: BM activity,
++ expected time until next timer interrupt, and last break event time
++ delta. It is designed to minimize power consumption. Currently
++ dynticks is required.
++
++endif # CPU_IDLE
++
++endmenu
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/Makefile linux-2.6.22-591/drivers/cpuidle/Makefile
+--- linux-2.6.22-570/drivers/cpuidle/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,5 @@
++#
++# Makefile for cpuidle.
++#
++
++obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.c linux-2.6.22-591/drivers/cpuidle/cpuidle.c
+--- linux-2.6.22-570/drivers/cpuidle/cpuidle.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/cpuidle.c 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,306 @@
++/*
++ * cpuidle.c - core cpuidle infrastructure
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ * Shaohua Li <shaohua.li@intel.com>
++ * Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <linux/latency.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
++EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices);
++
++DEFINE_MUTEX(cpuidle_lock);
++LIST_HEAD(cpuidle_detected_devices);
++static void (*pm_idle_old)(void);
++
++/**
++ * cpuidle_idle_call - the main idle loop
++ *
++ * NOTE: no locks or semaphores should be used here
++ */
++static void cpuidle_idle_call(void)
++{
++ struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
++ struct cpuidle_state *target_state;
++ int next_state;
++
++ /* check if the device is ready */
++ if (!dev || dev->status != CPUIDLE_STATUS_DOIDLE) {
++ if (pm_idle_old)
++ pm_idle_old();
++ else
++ local_irq_enable();
++ return;
++ }
++
++ /* ask the governor for the next state */
++ next_state = cpuidle_curr_governor->select(dev);
++ if (need_resched())
++ return;
++ target_state = &dev->states[next_state];
++
++ /* enter the state and update stats */
++ dev->last_residency = target_state->enter(dev, target_state);
++ dev->last_state = target_state;
++ target_state->time += dev->last_residency;
++ target_state->usage++;
++
++ /* give the governor an opportunity to reflect on the outcome */
++ if (cpuidle_curr_governor->reflect)
++ cpuidle_curr_governor->reflect(dev);
++}
++
++/**
++ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
++ */
++void cpuidle_install_idle_handler(void)
++{
++ if (pm_idle != cpuidle_idle_call) {
++ /* Make sure all changes finished before we switch to new idle */
++ smp_wmb();
++ pm_idle = cpuidle_idle_call;
++ }
++}
++
++/**
++ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
++ */
++void cpuidle_uninstall_idle_handler(void)
++{
++ if (pm_idle != pm_idle_old) {
++ pm_idle = pm_idle_old;
++ cpu_idle_wait();
++ }
++}
++
++/**
++ * cpuidle_rescan_device - prepares for a new state configuration
++ * @dev: the target device
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_rescan_device(struct cpuidle_device *dev)
++{
++ int i;
++
++ if (cpuidle_curr_governor->scan)
++ cpuidle_curr_governor->scan(dev);
++
++ for (i = 0; i < dev->state_count; i++) {
++ dev->states[i].usage = 0;
++ dev->states[i].time = 0;
++ }
++}
++
++/**
++ * cpuidle_add_device - attaches the driver to a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ */
++static int cpuidle_add_device(struct sys_device *sys_dev)
++{
++ int cpu = sys_dev->id;
++ struct cpuidle_device *dev;
++
++ dev = per_cpu(cpuidle_devices, cpu);
++
++ mutex_lock(&cpuidle_lock);
++ if (cpu_is_offline(cpu)) {
++ mutex_unlock(&cpuidle_lock);
++ return 0;
++ }
++
++ if (!dev) {
++ dev = kzalloc(sizeof(struct cpuidle_device), GFP_KERNEL);
++ if (!dev) {
++ mutex_unlock(&cpuidle_lock);
++ return -ENOMEM;
++ }
++ init_completion(&dev->kobj_unregister);
++ per_cpu(cpuidle_devices, cpu) = dev;
++ }
++ dev->cpu = cpu;
++
++ if (dev->status & CPUIDLE_STATUS_DETECTED) {
++ mutex_unlock(&cpuidle_lock);
++ return 0;
++ }
++
++ cpuidle_add_sysfs(sys_dev);
++
++ if (cpuidle_curr_driver) {
++ if (cpuidle_attach_driver(dev))
++ goto err_ret;
++ }
++
++ if (cpuidle_curr_governor) {
++ if (cpuidle_attach_governor(dev)) {
++ cpuidle_detach_driver(dev);
++ goto err_ret;
++ }
++ }
++
++ if (cpuidle_device_can_idle(dev))
++ cpuidle_install_idle_handler();
++
++ list_add(&dev->device_list, &cpuidle_detected_devices);
++ dev->status |= CPUIDLE_STATUS_DETECTED;
++
++err_ret:
++ mutex_unlock(&cpuidle_lock);
++
++ return 0;
++}
++
++/**
++ * __cpuidle_remove_device - detaches the driver from a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static int __cpuidle_remove_device(struct sys_device *sys_dev)
++{
++ struct cpuidle_device *dev;
++
++ dev = per_cpu(cpuidle_devices, sys_dev->id);
++
++ if (!(dev->status & CPUIDLE_STATUS_DETECTED)) {
++ return 0;
++ }
++ dev->status &= ~CPUIDLE_STATUS_DETECTED;
++ /* NOTE: we don't wait because the cpu is already offline */
++ if (cpuidle_curr_governor)
++ cpuidle_detach_governor(dev);
++ if (cpuidle_curr_driver)
++ cpuidle_detach_driver(dev);
++ cpuidle_remove_sysfs(sys_dev);
++ list_del(&dev->device_list);
++ wait_for_completion(&dev->kobj_unregister);
++ per_cpu(cpuidle_devices, sys_dev->id) = NULL;
++ kfree(dev);
++
++ return 0;
++}
++
++/**
++ * cpuidle_remove_device - detaches the driver from a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ */
++static int cpuidle_remove_device(struct sys_device *sys_dev)
++{
++ int ret;
++ mutex_lock(&cpuidle_lock);
++ ret = __cpuidle_remove_device(sys_dev);
++ mutex_unlock(&cpuidle_lock);
++
++ return ret;
++}
++
++static struct sysdev_driver cpuidle_sysdev_driver = {
++ .add = cpuidle_add_device,
++ .remove = cpuidle_remove_device,
++};
++
++static int cpuidle_cpu_callback(struct notifier_block *nfb,
++ unsigned long action, void *hcpu)
++{
++ struct sys_device *sys_dev;
++
++ sys_dev = get_cpu_sysdev((unsigned long)hcpu);
++
++ switch (action) {
++ case CPU_ONLINE:
++ cpuidle_add_device(sys_dev);
++ break;
++ case CPU_DOWN_PREPARE:
++ mutex_lock(&cpuidle_lock);
++ break;
++ case CPU_DEAD:
++ __cpuidle_remove_device(sys_dev);
++ mutex_unlock(&cpuidle_lock);
++ break;
++ case CPU_DOWN_FAILED:
++ mutex_unlock(&cpuidle_lock);
++ break;
++ }
++
++ return NOTIFY_OK;
++}
++
++static struct notifier_block __cpuinitdata cpuidle_cpu_notifier =
++{
++ .notifier_call = cpuidle_cpu_callback,
++};
++
++#ifdef CONFIG_SMP
++
++static void smp_callback(void *v)
++{
++ /* we already woke the CPU up, nothing more to do */
++}
++
++/*
++ * This function gets called when a part of the kernel has a new latency
++ * requirement. This means we need to get all processors out of their C-state,
++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
++ * wakes them all right up.
++ */
++static int cpuidle_latency_notify(struct notifier_block *b,
++ unsigned long l, void *v)
++{
++ smp_call_function(smp_callback, NULL, 0, 1);
++ return NOTIFY_OK;
++}
++
++static struct notifier_block cpuidle_latency_notifier = {
++ .notifier_call = cpuidle_latency_notify,
++};
++
++#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
++
++#else /* CONFIG_SMP */
++
++#define latency_notifier_init(x) do { } while (0)
++
++#endif /* CONFIG_SMP */
++
++/**
++ * cpuidle_init - core initializer
++ */
++static int __init cpuidle_init(void)
++{
++ int ret;
++
++ pm_idle_old = pm_idle;
++
++ ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
++ if (ret)
++ return ret;
++
++ register_hotcpu_notifier(&cpuidle_cpu_notifier);
++
++ ret = sysdev_driver_register(&cpu_sysdev_class, &cpuidle_sysdev_driver);
++
++ if (ret) {
++ cpuidle_remove_class_sysfs(&cpu_sysdev_class);
++ printk(KERN_ERR "cpuidle: failed to initialize\n");
++ return ret;
++ }
++
++ latency_notifier_init(&cpuidle_latency_notifier);
++
++ return 0;
++}
++
++core_initcall(cpuidle_init);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.h linux-2.6.22-591/drivers/cpuidle/cpuidle.h
+--- linux-2.6.22-570/drivers/cpuidle/cpuidle.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/cpuidle.h 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,50 @@
++/*
++ * cpuidle.h - The internal header file
++ */
++
++#ifndef __DRIVER_CPUIDLE_H
++#define __DRIVER_CPUIDLE_H
++
++#include <linux/sysdev.h>
++
++/* For internal use only */
++extern struct cpuidle_governor *cpuidle_curr_governor;
++extern struct cpuidle_driver *cpuidle_curr_driver;
++extern struct list_head cpuidle_drivers;
++extern struct list_head cpuidle_governors;
++extern struct list_head cpuidle_detected_devices;
++extern struct mutex cpuidle_lock;
++
++/* idle loop */
++extern void cpuidle_install_idle_handler(void);
++extern void cpuidle_uninstall_idle_handler(void);
++extern void cpuidle_rescan_device(struct cpuidle_device *dev);
++
++/* drivers */
++extern int cpuidle_attach_driver(struct cpuidle_device *dev);
++extern void cpuidle_detach_driver(struct cpuidle_device *dev);
++extern int cpuidle_switch_driver(struct cpuidle_driver *drv);
++
++/* governors */
++extern int cpuidle_attach_governor(struct cpuidle_device *dev);
++extern void cpuidle_detach_governor(struct cpuidle_device *dev);
++extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
++
++/* sysfs */
++extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
++extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
++extern int cpuidle_add_driver_sysfs(struct cpuidle_device *device);
++extern void cpuidle_remove_driver_sysfs(struct cpuidle_device *device);
++extern int cpuidle_add_sysfs(struct sys_device *sysdev);
++extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
++
++/**
++ * cpuidle_device_can_idle - determines if a CPU can utilize the idle loop
++ * @dev: the target CPU
++ */
++static inline int cpuidle_device_can_idle(struct cpuidle_device *dev)
++{
++ return (dev->status == CPUIDLE_STATUS_DOIDLE);
++}
++
++#endif /* __DRIVER_CPUIDLE_H */
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/driver.c linux-2.6.22-591/drivers/cpuidle/driver.c
+--- linux-2.6.22-570/drivers/cpuidle/driver.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/driver.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,276 @@
++/*
++ * driver.c - driver support
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ * Shaohua Li <shaohua.li@intel.com>
++ * Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++LIST_HEAD(cpuidle_drivers);
++struct cpuidle_driver *cpuidle_curr_driver;
++
++
++/**
++ * cpuidle_attach_driver - attaches a driver to a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_attach_driver(struct cpuidle_device *dev)
++{
++ int ret;
++
++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
++ return -EIO;
++
++ if (!try_module_get(cpuidle_curr_driver->owner))
++ return -EINVAL;
++
++ ret = cpuidle_curr_driver->init(dev);
++ if (ret) {
++ module_put(cpuidle_curr_driver->owner);
++ printk(KERN_INFO "cpuidle: driver %s failed to attach to "
++ "cpu %d\n", cpuidle_curr_driver->name, dev->cpu);
++ } else {
++ if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++ cpuidle_rescan_device(dev);
++ smp_wmb();
++ dev->status |= CPUIDLE_STATUS_DRIVER_ATTACHED;
++ cpuidle_add_driver_sysfs(dev);
++ }
++
++ return ret;
++}
++
++/**
++ * cpuidle_detach_govenor - detaches a driver from a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_detach_driver(struct cpuidle_device *dev)
++{
++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) {
++ cpuidle_remove_driver_sysfs(dev);
++ dev->status &= ~CPUIDLE_STATUS_DRIVER_ATTACHED;
++ if (cpuidle_curr_driver->exit)
++ cpuidle_curr_driver->exit(dev);
++ module_put(cpuidle_curr_driver->owner);
++ }
++}
++
++/**
++ * __cpuidle_find_driver - finds a driver of the specified name
++ * @str: the name
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static struct cpuidle_driver * __cpuidle_find_driver(const char *str)
++{
++ struct cpuidle_driver *drv;
++
++ list_for_each_entry(drv, &cpuidle_drivers, driver_list)
++ if (!strnicmp(str, drv->name, CPUIDLE_NAME_LEN))
++ return drv;
++
++ return NULL;
++}
++
++/**
++ * cpuidle_switch_driver - changes the driver
++ * @drv: the new target driver
++ *
++ * NOTE: "drv" can be NULL to specify disabled
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_switch_driver(struct cpuidle_driver *drv)
++{
++ struct cpuidle_device *dev;
++
++ if (drv == cpuidle_curr_driver)
++ return -EINVAL;
++
++ cpuidle_uninstall_idle_handler();
++
++ if (cpuidle_curr_driver)
++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++ cpuidle_detach_driver(dev);
++
++ cpuidle_curr_driver = drv;
++
++ if (drv) {
++ int ret = 1;
++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++ if (cpuidle_attach_driver(dev) == 0)
++ ret = 0;
++
++ /* If attach on all devices fail, switch to NULL driver */
++ if (ret)
++ cpuidle_curr_driver = NULL;
++
++ if (cpuidle_curr_driver && cpuidle_curr_governor) {
++ printk(KERN_INFO "cpuidle: using driver %s\n",
++ drv->name);
++ cpuidle_install_idle_handler();
++ }
++ }
++
++ return 0;
++}
++
++/**
++ * cpuidle_register_driver - registers a driver
++ * @drv: the driver
++ */
++int cpuidle_register_driver(struct cpuidle_driver *drv)
++{
++ int ret = -EEXIST;
++
++ if (!drv || !drv->init)
++ return -EINVAL;
++
++ mutex_lock(&cpuidle_lock);
++ if (__cpuidle_find_driver(drv->name) == NULL) {
++ ret = 0;
++ list_add_tail(&drv->driver_list, &cpuidle_drivers);
++ if (!cpuidle_curr_driver)
++ cpuidle_switch_driver(drv);
++ }
++ mutex_unlock(&cpuidle_lock);
++
++ return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_register_driver);
++
++/**
++ * cpuidle_unregister_driver - unregisters a driver
++ * @drv: the driver
++ */
++void cpuidle_unregister_driver(struct cpuidle_driver *drv)
++{
++ if (!drv)
++ return;
++
++ mutex_lock(&cpuidle_lock);
++ if (drv == cpuidle_curr_driver)
++ cpuidle_switch_driver(NULL);
++ list_del(&drv->driver_list);
++ mutex_unlock(&cpuidle_lock);
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
++
++static void __cpuidle_force_redetect(struct cpuidle_device *dev)
++{
++ cpuidle_remove_driver_sysfs(dev);
++ cpuidle_curr_driver->redetect(dev);
++ cpuidle_add_driver_sysfs(dev);
++}
++
++/**
++ * cpuidle_force_redetect - redetects the idle states of a CPU
++ *
++ * @dev: the CPU to redetect
++ * @drv: the target driver
++ *
++ * Generally, the driver will call this when the supported states set has
++ * changed. (e.g. as the result of an ACPI transition to battery power)
++ */
++int cpuidle_force_redetect(struct cpuidle_device *dev,
++ struct cpuidle_driver *drv)
++{
++ int uninstalled = 0;
++
++ mutex_lock(&cpuidle_lock);
++
++ if (drv != cpuidle_curr_driver) {
++ mutex_unlock(&cpuidle_lock);
++ return 0;
++ }
++
++ if (!(dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) ||
++ !cpuidle_curr_driver->redetect) {
++ mutex_unlock(&cpuidle_lock);
++ return -EIO;
++ }
++
++ if (cpuidle_device_can_idle(dev)) {
++ uninstalled = 1;
++ cpuidle_uninstall_idle_handler();
++ }
++
++ __cpuidle_force_redetect(dev);
++
++ if (cpuidle_device_can_idle(dev)) {
++ cpuidle_rescan_device(dev);
++ cpuidle_install_idle_handler();
++ }
++
++ /* other devices are still ok */
++ if (uninstalled)
++ cpuidle_install_idle_handler();
++
++ mutex_unlock(&cpuidle_lock);
++
++ return 0;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_force_redetect);
++
++/**
++ * cpuidle_force_redetect_devices - redetects the idle states of all CPUs
++ *
++ * @drv: the target driver
++ *
++ * Generally, the driver will call this when the supported states set has
++ * changed. (e.g. as the result of an ACPI transition to battery power)
++ */
++int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
++{
++ struct cpuidle_device *dev;
++ int ret = 0;
++
++ mutex_lock(&cpuidle_lock);
++
++ if (drv != cpuidle_curr_driver)
++ goto out;
++
++ if (!cpuidle_curr_driver->redetect) {
++ ret = -EIO;
++ goto out;
++ }
++
++ cpuidle_uninstall_idle_handler();
++
++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++ __cpuidle_force_redetect(dev);
++
++ cpuidle_install_idle_handler();
++out:
++ mutex_unlock(&cpuidle_lock);
++ return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_force_redetect_devices);
++
++/**
++ * cpuidle_get_bm_activity - determines if BM activity has occured
++ */
++int cpuidle_get_bm_activity(void)
++{
++ if (cpuidle_curr_driver->bm_check)
++ return cpuidle_curr_driver->bm_check();
++ else
++ return 0;
++}
++EXPORT_SYMBOL_GPL(cpuidle_get_bm_activity);
++
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governor.c linux-2.6.22-591/drivers/cpuidle/governor.c
+--- linux-2.6.22-570/drivers/cpuidle/governor.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/governor.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,160 @@
++/*
++ * governor.c - governor support
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ * Shaohua Li <shaohua.li@intel.com>
++ * Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++LIST_HEAD(cpuidle_governors);
++struct cpuidle_governor *cpuidle_curr_governor;
++
++
++/**
++ * cpuidle_attach_governor - attaches a governor to a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_attach_governor(struct cpuidle_device *dev)
++{
++ int ret = 0;
++
++ if(dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++ return -EIO;
++
++ if (!try_module_get(cpuidle_curr_governor->owner))
++ return -EINVAL;
++
++ if (cpuidle_curr_governor->init)
++ ret = cpuidle_curr_governor->init(dev);
++ if (ret) {
++ module_put(cpuidle_curr_governor->owner);
++ printk(KERN_ERR "cpuidle: governor %s failed to attach to cpu %d\n",
++ cpuidle_curr_governor->name, dev->cpu);
++ } else {
++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
++ cpuidle_rescan_device(dev);
++ smp_wmb();
++ dev->status |= CPUIDLE_STATUS_GOVERNOR_ATTACHED;
++ }
++
++ return ret;
++}
++
++/**
++ * cpuidle_detach_govenor - detaches a governor from a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_detach_governor(struct cpuidle_device *dev)
++{
++ if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) {
++ dev->status &= ~CPUIDLE_STATUS_GOVERNOR_ATTACHED;
++ if (cpuidle_curr_governor->exit)
++ cpuidle_curr_governor->exit(dev);
++ module_put(cpuidle_curr_governor->owner);
++ }
++}
++
++/**
++ * __cpuidle_find_governor - finds a governor of the specified name
++ * @str: the name
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
++{
++ struct cpuidle_governor *gov;
++
++ list_for_each_entry(gov, &cpuidle_governors, governor_list)
++ if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
++ return gov;
++
++ return NULL;
++}
++
++/**
++ * cpuidle_switch_governor - changes the governor
++ * @gov: the new target governor
++ *
++ * NOTE: "gov" can be NULL to specify disabled
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_switch_governor(struct cpuidle_governor *gov)
++{
++ struct cpuidle_device *dev;
++
++ if (gov == cpuidle_curr_governor)
++ return -EINVAL;
++
++ cpuidle_uninstall_idle_handler();
++
++ if (cpuidle_curr_governor)
++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++ cpuidle_detach_governor(dev);
++
++ cpuidle_curr_governor = gov;
++
++ if (gov) {
++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++ cpuidle_attach_governor(dev);
++ if (cpuidle_curr_driver)
++ cpuidle_install_idle_handler();
++ printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
++ }
++
++ return 0;
++}
++
++/**
++ * cpuidle_register_governor - registers a governor
++ * @gov: the governor
++ */
++int cpuidle_register_governor(struct cpuidle_governor *gov)
++{
++ int ret = -EEXIST;
++
++ if (!gov || !gov->select)
++ return -EINVAL;
++
++ mutex_lock(&cpuidle_lock);
++ if (__cpuidle_find_governor(gov->name) == NULL) {
++ ret = 0;
++ list_add_tail(&gov->governor_list, &cpuidle_governors);
++ if (!cpuidle_curr_governor)
++ cpuidle_switch_governor(gov);
++ }
++ mutex_unlock(&cpuidle_lock);
++
++ return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_register_governor);
++
++/**
++ * cpuidle_unregister_governor - unregisters a governor
++ * @gov: the governor
++ */
++void cpuidle_unregister_governor(struct cpuidle_governor *gov)
++{
++ if (!gov)
++ return;
++
++ mutex_lock(&cpuidle_lock);
++ if (gov == cpuidle_curr_governor)
++ cpuidle_switch_governor(NULL);
++ list_del(&gov->governor_list);
++ mutex_unlock(&cpuidle_lock);
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_unregister_governor);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/Makefile linux-2.6.22-591/drivers/cpuidle/governors/Makefile
+--- linux-2.6.22-570/drivers/cpuidle/governors/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/governors/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,6 @@
++#
++# Makefile for cpuidle governors.
++#
++
++obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
++obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/ladder.c linux-2.6.22-591/drivers/cpuidle/governors/ladder.c
+--- linux-2.6.22-570/drivers/cpuidle/governors/ladder.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/governors/ladder.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,227 @@
++/*
++ * ladder.c - the residency ladder algorithm
++ *
++ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
++ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
++ * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ * Shaohua Li <shaohua.li@intel.com>
++ * Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/latency.h>
++#include <linux/moduleparam.h>
++#include <linux/jiffies.h>
++
++#include <asm/io.h>
++#include <asm/uaccess.h>
++
++#define PROMOTION_COUNT 4
++#define DEMOTION_COUNT 1
++
++/*
++ * bm_history -- bit-mask with a bit per jiffy of bus-master activity
++ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
++ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
++ * 100 HZ: 0x0000000F: 4 jiffies = 40ms
++ * reduce history for more aggressive entry into C3
++ */
++static unsigned int bm_history __read_mostly =
++ (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
++module_param(bm_history, uint, 0644);
++
++struct ladder_device_state {
++ struct {
++ u32 promotion_count;
++ u32 demotion_count;
++ u32 promotion_time;
++ u32 demotion_time;
++ u32 bm;
++ } threshold;
++ struct {
++ int promotion_count;
++ int demotion_count;
++ } stats;
++};
++
++struct ladder_device {
++ struct ladder_device_state states[CPUIDLE_STATE_MAX];
++ unsigned int bm_check:1;
++ unsigned long bm_check_timestamp;
++ unsigned long bm_activity; /* FIXME: bm activity should be global */
++ int last_state_idx;
++};
++
++/**
++ * ladder_do_selection - prepares private data for a state change
++ * @ldev: the ladder device
++ * @old_idx: the current state index
++ * @new_idx: the new target state index
++ */
++static inline void ladder_do_selection(struct ladder_device *ldev,
++ int old_idx, int new_idx)
++{
++ ldev->states[old_idx].stats.promotion_count = 0;
++ ldev->states[old_idx].stats.demotion_count = 0;
++ ldev->last_state_idx = new_idx;
++}
++
++/**
++ * ladder_select_state - selects the next state to enter
++ * @dev: the CPU
++ */
++static int ladder_select_state(struct cpuidle_device *dev)
++{
++ struct ladder_device *ldev = dev->governor_data;
++ struct ladder_device_state *last_state;
++ int last_residency, last_idx = ldev->last_state_idx;
++
++ if (unlikely(!ldev))
++ return 0;
++
++ last_state = &ldev->states[last_idx];
++
++ /* demote if within BM threshold */
++ if (ldev->bm_check) {
++ unsigned long diff;
++
++ diff = jiffies - ldev->bm_check_timestamp;
++ if (diff > 31)
++ diff = 31;
++
++ ldev->bm_activity <<= diff;
++ if (cpuidle_get_bm_activity())
++ ldev->bm_activity |= ((1 << diff) - 1);
++
++ ldev->bm_check_timestamp = jiffies;
++ if ((last_idx > 0) &&
++ (last_state->threshold.bm & ldev->bm_activity)) {
++ ladder_do_selection(ldev, last_idx, last_idx - 1);
++ return last_idx - 1;
++ }
++ }
++
++ if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
++ last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
++ else
++ last_residency = last_state->threshold.promotion_time + 1;
++
++ /* consider promotion */
++ if (last_idx < dev->state_count - 1 &&
++ last_residency > last_state->threshold.promotion_time &&
++ dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
++ last_state->stats.promotion_count++;
++ last_state->stats.demotion_count = 0;
++ if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
++ ladder_do_selection(ldev, last_idx, last_idx + 1);
++ return last_idx + 1;
++ }
++ }
++
++ /* consider demotion */
++ if (last_idx > 0 &&
++ last_residency < last_state->threshold.demotion_time) {
++ last_state->stats.demotion_count++;
++ last_state->stats.promotion_count = 0;
++ if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
++ ladder_do_selection(ldev, last_idx, last_idx - 1);
++ return last_idx - 1;
++ }
++ }
++
++ /* otherwise remain at the current state */
++ return last_idx;
++}
++
++/**
++ * ladder_scan_device - scans a CPU's states and does setup
++ * @dev: the CPU
++ */
++static void ladder_scan_device(struct cpuidle_device *dev)
++{
++ int i, bm_check = 0;
++ struct ladder_device *ldev = dev->governor_data;
++ struct ladder_device_state *lstate;
++ struct cpuidle_state *state;
++
++ ldev->last_state_idx = 0;
++ ldev->bm_check_timestamp = 0;
++ ldev->bm_activity = 0;
++
++ for (i = 0; i < dev->state_count; i++) {
++ state = &dev->states[i];
++ lstate = &ldev->states[i];
++
++ lstate->stats.promotion_count = 0;
++ lstate->stats.demotion_count = 0;
++
++ lstate->threshold.promotion_count = PROMOTION_COUNT;
++ lstate->threshold.demotion_count = DEMOTION_COUNT;
++
++ if (i < dev->state_count - 1)
++ lstate->threshold.promotion_time = state->exit_latency;
++ if (i > 0)
++ lstate->threshold.demotion_time = state->exit_latency;
++ if (state->flags & CPUIDLE_FLAG_CHECK_BM) {
++ lstate->threshold.bm = bm_history;
++ bm_check = 1;
++ } else
++ lstate->threshold.bm = 0;
++ }
++
++ ldev->bm_check = bm_check;
++}
++
++/**
++ * ladder_init_device - initializes a CPU-instance
++ * @dev: the CPU
++ */
++static int ladder_init_device(struct cpuidle_device *dev)
++{
++ dev->governor_data = kmalloc(sizeof(struct ladder_device), GFP_KERNEL);
++
++ return !dev->governor_data;
++}
++
++/**
++ * ladder_exit_device - exits a CPU-instance
++ * @dev: the CPU
++ */
++static void ladder_exit_device(struct cpuidle_device *dev)
++{
++ kfree(dev->governor_data);
++}
++
++static struct cpuidle_governor ladder_governor = {
++ .name = "ladder",
++ .init = ladder_init_device,
++ .exit = ladder_exit_device,
++ .scan = ladder_scan_device,
++ .select = ladder_select_state,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * init_ladder - initializes the governor
++ */
++static int __init init_ladder(void)
++{
++ return cpuidle_register_governor(&ladder_governor);
++}
++
++/**
++ * exit_ladder - exits the governor
++ */
++static void __exit exit_ladder(void)
++{
++ cpuidle_unregister_governor(&ladder_governor);
++}
++
++MODULE_LICENSE("GPL");
++module_init(init_ladder);
++module_exit(exit_ladder);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/menu.c linux-2.6.22-591/drivers/cpuidle/governors/menu.c
+--- linux-2.6.22-570/drivers/cpuidle/governors/menu.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/governors/menu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,152 @@
++/*
++ * menu.c - the menu idle governor
++ *
++ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/latency.h>
++#include <linux/time.h>
++#include <linux/ktime.h>
++#include <linux/tick.h>
++#include <linux/hrtimer.h>
++
++#define BM_HOLDOFF 20000 /* 20 ms */
++
++struct menu_device {
++ int last_state_idx;
++ int deepest_bm_state;
++
++ int break_last_us;
++ int break_elapsed_us;
++
++ int bm_elapsed_us;
++ int bm_holdoff_us;
++
++ unsigned long idle_jiffies;
++};
++
++static DEFINE_PER_CPU(struct menu_device, menu_devices);
++
++/**
++ * menu_select - selects the next idle state to enter
++ * @dev: the CPU
++ */
++static int menu_select(struct cpuidle_device *dev)
++{
++ struct menu_device *data = &__get_cpu_var(menu_devices);
++ int i, expected_us, max_state = dev->state_count;
++
++ /* discard BM history because it is sticky */
++ cpuidle_get_bm_activity();
++
++ /* determine the expected residency time */
++ expected_us = (s32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
++ expected_us = min(expected_us, data->break_last_us);
++
++ /* determine the maximum state compatible with current BM status */
++ if (cpuidle_get_bm_activity())
++ data->bm_elapsed_us = 0;
++ if (data->bm_elapsed_us <= data->bm_holdoff_us)
++ max_state = data->deepest_bm_state + 1;
++
++ /* find the deepest idle state that satisfies our constraints */
++ for (i = 1; i < max_state; i++) {
++ struct cpuidle_state *s = &dev->states[i];
++ if (s->target_residency > expected_us)
++ break;
++ if (s->exit_latency > system_latency_constraint())
++ break;
++ }
++
++ data->last_state_idx = i - 1;
++ data->idle_jiffies = tick_nohz_get_idle_jiffies();
++ return i - 1;
++}
++
++/**
++ * menu_reflect - attempts to guess what happened after entry
++ * @dev: the CPU
++ *
++ * NOTE: it's important to be fast here because this operation will add to
++ * the overall exit latency.
++ */
++static void menu_reflect(struct cpuidle_device *dev)
++{
++ struct menu_device *data = &__get_cpu_var(menu_devices);
++ int last_idx = data->last_state_idx;
++ int measured_us = cpuidle_get_last_residency(dev);
++ struct cpuidle_state *target = &dev->states[last_idx];
++
++ /*
++ * Ugh, this idle state doesn't support residency measurements, so we
++ * are basically lost in the dark. As a compromise, assume we slept
++ * for one full standard timer tick. However, be aware that this
++ * could potentially result in a suboptimal state transition.
++ */
++ if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
++ measured_us = USEC_PER_SEC / HZ;
++
++ data->bm_elapsed_us += measured_us;
++ data->break_elapsed_us += measured_us;
++
++ /*
++ * Did something other than the timer interrupt cause the break event?
++ */
++ if (tick_nohz_get_idle_jiffies() == data->idle_jiffies) {
++ data->break_last_us = data->break_elapsed_us;
++ data->break_elapsed_us = 0;
++ }
++}
++
++/**
++ * menu_scan_device - scans a CPU's states and does setup
++ * @dev: the CPU
++ */
++static void menu_scan_device(struct cpuidle_device *dev)
++{
++ struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
++ int i;
++
++ data->last_state_idx = 0;
++ data->break_last_us = 0;
++ data->break_elapsed_us = 0;
++ data->bm_elapsed_us = 0;
++ data->bm_holdoff_us = BM_HOLDOFF;
++
++ for (i = 1; i < dev->state_count; i++)
++ if (dev->states[i].flags & CPUIDLE_FLAG_CHECK_BM)
++ break;
++ data->deepest_bm_state = i - 1;
++}
++
++struct cpuidle_governor menu_governor = {
++ .name = "menu",
++ .scan = menu_scan_device,
++ .select = menu_select,
++ .reflect = menu_reflect,
++ .owner = THIS_MODULE,
++};
++
++/**
++ * init_menu - initializes the governor
++ */
++static int __init init_menu(void)
++{
++ return cpuidle_register_governor(&menu_governor);
++}
++
++/**
++ * exit_menu - exits the governor
++ */
++static void __exit exit_menu(void)
++{
++ cpuidle_unregister_governor(&menu_governor);
++}
++
++MODULE_LICENSE("GPL");
++module_init(init_menu);
++module_exit(exit_menu);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/sysfs.c linux-2.6.22-591/drivers/cpuidle/sysfs.c
+--- linux-2.6.22-570/drivers/cpuidle/sysfs.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/cpuidle/sysfs.c 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,393 @@
++/*
++ * sysfs.c - sysfs support
++ *
++ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/sysfs.h>
++#include <linux/cpu.h>
++
++#include "cpuidle.h"
++
++static unsigned int sysfs_switch;
++static int __init cpuidle_sysfs_setup(char *unused)
++{
++ sysfs_switch = 1;
++ return 1;
++}
++__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
++
++static ssize_t show_available_drivers(struct sys_device *dev, char *buf)
++{
++ ssize_t i = 0;
++ struct cpuidle_driver *tmp;
++
++ mutex_lock(&cpuidle_lock);
++ list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
++ if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
++ goto out;
++ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
++ }
++out:
++ i+= sprintf(&buf[i], "\n");
++ mutex_unlock(&cpuidle_lock);
++ return i;
++}
++
++static ssize_t show_available_governors(struct sys_device *dev, char *buf)
++{
++ ssize_t i = 0;
++ struct cpuidle_governor *tmp;
++
++ mutex_lock(&cpuidle_lock);
++ list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
++ if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
++ goto out;
++ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
++ }
++ if (list_empty(&cpuidle_governors))
++ i+= sprintf(&buf[i], "no governors");
++out:
++ i+= sprintf(&buf[i], "\n");
++ mutex_unlock(&cpuidle_lock);
++ return i;
++}
++
++static ssize_t show_current_driver(struct sys_device *dev, char *buf)
++{
++ ssize_t ret;
++
++ mutex_lock(&cpuidle_lock);
++ ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name);
++ mutex_unlock(&cpuidle_lock);
++ return ret;
++}
++
++static ssize_t store_current_driver(struct sys_device *dev,
++ const char *buf, size_t count)
++{
++ char str[CPUIDLE_NAME_LEN];
++ int len = count;
++ struct cpuidle_driver *tmp, *found = NULL;
++
++ if (len > CPUIDLE_NAME_LEN)
++ len = CPUIDLE_NAME_LEN;
++
++ if (sscanf(buf, "%s", str) != 1)
++ return -EINVAL;
++
++ mutex_lock(&cpuidle_lock);
++ list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
++ if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
++ found = tmp;
++ break;
++ }
++ }
++ if (found)
++ cpuidle_switch_driver(found);
++ mutex_unlock(&cpuidle_lock);
++
++ return count;
++}
++
++static ssize_t show_current_governor(struct sys_device *dev, char *buf)
++{
++ ssize_t i;
++
++ mutex_lock(&cpuidle_lock);
++ if (cpuidle_curr_governor)
++ i = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
++ else
++ i = sprintf(buf, "no governor\n");
++ mutex_unlock(&cpuidle_lock);
++
++ return i;
++}
++
++static ssize_t store_current_governor(struct sys_device *dev,
++ const char *buf, size_t count)
++{
++ char str[CPUIDLE_NAME_LEN];
++ int len = count;
++ struct cpuidle_governor *tmp, *found = NULL;
++
++ if (len > CPUIDLE_NAME_LEN)
++ len = CPUIDLE_NAME_LEN;
++
++ if (sscanf(buf, "%s", str) != 1)
++ return -EINVAL;
++
++ mutex_lock(&cpuidle_lock);
++ list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
++ if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
++ found = tmp;
++ break;
++ }
++ }
++ if (found)
++ cpuidle_switch_governor(found);
++ mutex_unlock(&cpuidle_lock);
++
++ return count;
++}
++
++static SYSDEV_ATTR(current_driver_ro, 0444, show_current_driver, NULL);
++static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
++
++static struct attribute *cpuclass_default_attrs[] = {
++ &attr_current_driver_ro.attr,
++ &attr_current_governor_ro.attr,
++ NULL
++};
++
++static SYSDEV_ATTR(available_drivers, 0444, show_available_drivers, NULL);
++static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL);
++static SYSDEV_ATTR(current_driver, 0644, show_current_driver,
++ store_current_driver);
++static SYSDEV_ATTR(current_governor, 0644, show_current_governor,
++ store_current_governor);
++
++static struct attribute *cpuclass_switch_attrs[] = {
++ &attr_available_drivers.attr,
++ &attr_available_governors.attr,
++ &attr_current_driver.attr,
++ &attr_current_governor.attr,
++ NULL
++};
++
++static struct attribute_group cpuclass_attr_group = {
++ .attrs = cpuclass_default_attrs,
++ .name = "cpuidle",
++};
++
++/**
++ * cpuidle_add_class_sysfs - add CPU global sysfs attributes
++ */
++int cpuidle_add_class_sysfs(struct sysdev_class *cls)
++{
++ if (sysfs_switch)
++ cpuclass_attr_group.attrs = cpuclass_switch_attrs;
++
++ return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
++}
++
++/**
++ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
++ */
++void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
++{
++ sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
++}
++
++struct cpuidle_attr {
++ struct attribute attr;
++ ssize_t (*show)(struct cpuidle_device *, char *);
++ ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
++};
++
++#define define_one_ro(_name, show) \
++ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
++#define define_one_rw(_name, show, store) \
++ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
++
++#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
++#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
++static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
++{
++ int ret = -EIO;
++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++ struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
++
++ if (cattr->show) {
++ mutex_lock(&cpuidle_lock);
++ ret = cattr->show(dev, buf);
++ mutex_unlock(&cpuidle_lock);
++ }
++ return ret;
++}
++
++static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
++ const char * buf, size_t count)
++{
++ int ret = -EIO;
++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++ struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
++
++ if (cattr->store) {
++ mutex_lock(&cpuidle_lock);
++ ret = cattr->store(dev, buf, count);
++ mutex_unlock(&cpuidle_lock);
++ }
++ return ret;
++}
++
++static struct sysfs_ops cpuidle_sysfs_ops = {
++ .show = cpuidle_show,
++ .store = cpuidle_store,
++};
++
++static void cpuidle_sysfs_release(struct kobject *kobj)
++{
++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++
++ complete(&dev->kobj_unregister);
++}
++
++static struct kobj_type ktype_cpuidle = {
++ .sysfs_ops = &cpuidle_sysfs_ops,
++ .release = cpuidle_sysfs_release,
++};
++
++struct cpuidle_state_attr {
++ struct attribute attr;
++ ssize_t (*show)(struct cpuidle_state *, char *);
++ ssize_t (*store)(struct cpuidle_state *, const char *, size_t);
++};
++
++#define define_one_state_ro(_name, show) \
++static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
++
++#define define_show_state_function(_name) \
++static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
++{ \
++ return sprintf(buf, "%d\n", state->_name);\
++}
++
++define_show_state_function(exit_latency)
++define_show_state_function(power_usage)
++define_show_state_function(usage)
++define_show_state_function(time)
++define_one_state_ro(latency, show_state_exit_latency);
++define_one_state_ro(power, show_state_power_usage);
++define_one_state_ro(usage, show_state_usage);
++define_one_state_ro(time, show_state_time);
++
++static struct attribute *cpuidle_state_default_attrs[] = {
++ &attr_latency.attr,
++ &attr_power.attr,
++ &attr_usage.attr,
++ &attr_time.attr,
++ NULL
++};
++
++#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
++#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
++#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
++static ssize_t cpuidle_state_show(struct kobject * kobj,
++ struct attribute * attr ,char * buf)
++{
++ int ret = -EIO;
++ struct cpuidle_state *state = kobj_to_state(kobj);
++ struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
++
++ if (cattr->show)
++ ret = cattr->show(state, buf);
++
++ return ret;
++}
++
++static struct sysfs_ops cpuidle_state_sysfs_ops = {
++ .show = cpuidle_state_show,
++};
++
++static void cpuidle_state_sysfs_release(struct kobject *kobj)
++{
++ struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
++
++ complete(&state_obj->kobj_unregister);
++}
++
++static struct kobj_type ktype_state_cpuidle = {
++ .sysfs_ops = &cpuidle_state_sysfs_ops,
++ .default_attrs = cpuidle_state_default_attrs,
++ .release = cpuidle_state_sysfs_release,
++};
++
++static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
++{
++ kobject_unregister(&device->kobjs[i]->kobj);
++ wait_for_completion(&device->kobjs[i]->kobj_unregister);
++ kfree(device->kobjs[i]);
++ device->kobjs[i] = NULL;
++}
++
++/**
++ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
++ * @device: the target device
++ */
++int cpuidle_add_driver_sysfs(struct cpuidle_device *device)
++{
++ int i, ret = -ENOMEM;
++ struct cpuidle_state_kobj *kobj;
++
++ /* state statistics */
++ for (i = 0; i < device->state_count; i++) {
++ kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
++ if (!kobj)
++ goto error_state;
++ kobj->state = &device->states[i];
++ init_completion(&kobj->kobj_unregister);
++
++ kobj->kobj.parent = &device->kobj;
++ kobj->kobj.ktype = &ktype_state_cpuidle;
++ kobject_set_name(&kobj->kobj, "state%d", i);
++ ret = kobject_register(&kobj->kobj);
++ if (ret) {
++ kfree(kobj);
++ goto error_state;
++ }
++ device->kobjs[i] = kobj;
++ }
++
++ return 0;
++
++error_state:
++ for (i = i - 1; i >= 0; i--)
++ cpuidle_free_state_kobj(device, i);
++ return ret;
++}
++
++/**
++ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
++ * @device: the target device
++ */
++void cpuidle_remove_driver_sysfs(struct cpuidle_device *device)
++{
++ int i;
++
++ for (i = 0; i < device->state_count; i++)
++ cpuidle_free_state_kobj(device, i);
++}
++
++/**
++ * cpuidle_add_sysfs - creates a sysfs instance for the target device
++ * @sysdev: the target device
++ */
++int cpuidle_add_sysfs(struct sys_device *sysdev)
++{
++ int cpu = sysdev->id;
++ struct cpuidle_device *dev;
++
++ dev = per_cpu(cpuidle_devices, cpu);
++ dev->kobj.parent = &sysdev->kobj;
++ dev->kobj.ktype = &ktype_cpuidle;
++ kobject_set_name(&dev->kobj, "%s", "cpuidle");
++ return kobject_register(&dev->kobj);
++}
++
++/**
++ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
++ * @sysdev: the target device
++ */
++void cpuidle_remove_sysfs(struct sys_device *sysdev)
++{
++ int cpu = sysdev->id;
++ struct cpuidle_device *dev;
++
++ dev = per_cpu(cpuidle_devices, cpu);
++ kobject_unregister(&dev->kobj);
++}
+diff -Nurb linux-2.6.22-570/drivers/dma/Kconfig linux-2.6.22-591/drivers/dma/Kconfig
+--- linux-2.6.22-570/drivers/dma/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/dma/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -8,8 +8,8 @@
+ config DMA_ENGINE
+ bool "Support for DMA engines"
+ ---help---
+- DMA engines offload copy operations from the CPU to dedicated
+- hardware, allowing the copies to happen asynchronously.
++ DMA engines offload bulk memory operations from the CPU to dedicated
++ hardware, allowing the operations to happen asynchronously.
+
+ comment "DMA Clients"
+
+@@ -32,4 +32,11 @@
+ ---help---
+ Enable support for the Intel(R) I/OAT DMA engine.
+
++config INTEL_IOP_ADMA
++ tristate "Intel IOP ADMA support"
++ depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
++ default m
++ ---help---
++ Enable support for the Intel(R) IOP Series RAID engines.
++
+ endmenu
+diff -Nurb linux-2.6.22-570/drivers/dma/Makefile linux-2.6.22-591/drivers/dma/Makefile
+--- linux-2.6.22-570/drivers/dma/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/dma/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -1,3 +1,4 @@
+ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+ obj-$(CONFIG_NET_DMA) += iovlock.o
+ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
++obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+diff -Nurb linux-2.6.22-570/drivers/dma/dmaengine.c linux-2.6.22-591/drivers/dma/dmaengine.c
+--- linux-2.6.22-570/drivers/dma/dmaengine.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/dma/dmaengine.c 2007-12-21 15:36:11.000000000 -0500
+@@ -37,11 +37,11 @@
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+- * Each client has a channels list, it's only modified under the client->lock
+- * and in an RCU callback, so it's safe to read under rcu_read_lock().
++ * Each client is responsible for keeping track of the channels it uses. See
++ * the definition of dma_event_callback in dmaengine.h.
+ *
+ * Each device has a kref, which is initialized to 1 when the device is
+- * registered. A kref_put is done for each class_device registered. When the
++ * registered. A kref_get is done for each class_device registered. When the
+ * class_device is released, the coresponding kref_put is done in the release
+ * method. Every time one of the device's channels is allocated to a client,
+ * a kref_get occurs. When the channel is freed, the coresponding kref_put
+@@ -51,14 +51,17 @@
+ * references to finish.
+ *
+ * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
+- * with a kref and a per_cpu local_t. A single reference is set when on an
+- * ADDED event, and removed with a REMOVE event. Net DMA client takes an
+- * extra reference per outstanding transaction. The relase function does a
+- * kref_put on the device. -ChrisL
++ * with a kref and a per_cpu local_t. A dma_chan_get is called when a client
++ * signals that it wants to use a channel, and dma_chan_put is called when
++ * a channel is removed or a client using it is unregesitered. A client can
++ * take extra references per outstanding transaction, as is the case with
++ * the NET DMA client. The release function does a kref_put on the device.
++ * -ChrisL, DanW
+ */
+
+ #include <linux/init.h>
+ #include <linux/module.h>
++#include <linux/mm.h>
+ #include <linux/device.h>
+ #include <linux/dmaengine.h>
+ #include <linux/hardirq.h>
+@@ -66,6 +69,7 @@
+ #include <linux/percpu.h>
+ #include <linux/rcupdate.h>
+ #include <linux/mutex.h>
++#include <linux/jiffies.h>
+
+ static DEFINE_MUTEX(dma_list_mutex);
+ static LIST_HEAD(dma_device_list);
+@@ -100,8 +104,19 @@
+ static ssize_t show_in_use(struct class_device *cd, char *buf)
+ {
+ struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
++ int in_use = 0;
+
+- return sprintf(buf, "%d\n", (chan->client ? 1 : 0));
++ if (unlikely(chan->slow_ref) &&
++ atomic_read(&chan->refcount.refcount) > 1)
++ in_use = 1;
++ else {
++ if (local_read(&(per_cpu_ptr(chan->local,
++ get_cpu())->refcount)) > 0)
++ in_use = 1;
++ put_cpu();
++ }
++
++ return sprintf(buf, "%d\n", in_use);
+ }
+
+ static struct class_device_attribute dma_class_attrs[] = {
+@@ -127,43 +142,72 @@
+
+ /* --- client and device registration --- */
+
++#define dma_chan_satisfies_mask(chan, mask) \
++ __dma_chan_satisfies_mask((chan), &(mask))
++static int
++__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
++{
++ dma_cap_mask_t has;
++
++ bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
++ DMA_TX_TYPE_END);
++ return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
++}
++
+ /**
+- * dma_client_chan_alloc - try to allocate a channel to a client
++ * dma_client_chan_alloc - try to allocate channels to a client
+ * @client: &dma_client
+ *
+ * Called with dma_list_mutex held.
+ */
+-static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
++static void dma_client_chan_alloc(struct dma_client *client)
+ {
+ struct dma_device *device;
+ struct dma_chan *chan;
+- unsigned long flags;
+ int desc; /* allocated descriptor count */
++ enum dma_state_client ack;
+
+- /* Find a channel, any DMA engine will do */
+- list_for_each_entry(device, &dma_device_list, global_node) {
++ /* Find a channel */
++ list_for_each_entry(device, &dma_device_list, global_node)
+ list_for_each_entry(chan, &device->channels, device_node) {
+- if (chan->client)
++ if (!dma_chan_satisfies_mask(chan, client->cap_mask))
+ continue;
+
+ desc = chan->device->device_alloc_chan_resources(chan);
+ if (desc >= 0) {
++ ack = client->event_callback(client,
++ chan,
++ DMA_RESOURCE_AVAILABLE);
++
++ /* we are done once this client rejects
++ * an available resource
++ */
++ if (ack == DMA_ACK) {
++ dma_chan_get(chan);
+ kref_get(&device->refcount);
+- kref_init(&chan->refcount);
+- chan->slow_ref = 0;
+- INIT_RCU_HEAD(&chan->rcu);
+- chan->client = client;
+- spin_lock_irqsave(&client->lock, flags);
+- list_add_tail_rcu(&chan->client_node,
+- &client->channels);
+- spin_unlock_irqrestore(&client->lock, flags);
+- return chan;
++ } else if (ack == DMA_NAK)
++ return;
+ }
+ }
++}
++
++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
++{
++ enum dma_status status;
++ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
++
++ dma_async_issue_pending(chan);
++ do {
++ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
++ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
++ printk(KERN_ERR "dma_sync_wait_timeout!\n");
++ return DMA_ERROR;
+ }
++ } while (status == DMA_IN_PROGRESS);
+
+- return NULL;
++ return status;
+ }
++EXPORT_SYMBOL(dma_sync_wait);
+
+ /**
+ * dma_chan_cleanup - release a DMA channel's resources
+@@ -173,7 +217,6 @@
+ {
+ struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
+ chan->device->device_free_chan_resources(chan);
+- chan->client = NULL;
+ kref_put(&chan->device->refcount, dma_async_device_cleanup);
+ }
+ EXPORT_SYMBOL(dma_chan_cleanup);
+@@ -189,7 +232,7 @@
+ kref_put(&chan->refcount, dma_chan_cleanup);
+ }
+
+-static void dma_client_chan_free(struct dma_chan *chan)
++static void dma_chan_release(struct dma_chan *chan)
+ {
+ atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
+ chan->slow_ref = 1;
+@@ -197,70 +240,57 @@
+ }
+
+ /**
+- * dma_chans_rebalance - reallocate channels to clients
+- *
+- * When the number of DMA channel in the system changes,
+- * channels need to be rebalanced among clients.
++ * dma_chans_notify_available - broadcast available channels to the clients
+ */
+-static void dma_chans_rebalance(void)
++static void dma_clients_notify_available(void)
+ {
+ struct dma_client *client;
+- struct dma_chan *chan;
+- unsigned long flags;
+
+ mutex_lock(&dma_list_mutex);
+
+- list_for_each_entry(client, &dma_client_list, global_node) {
+- while (client->chans_desired > client->chan_count) {
+- chan = dma_client_chan_alloc(client);
+- if (!chan)
+- break;
+- client->chan_count++;
+- client->event_callback(client,
+- chan,
+- DMA_RESOURCE_ADDED);
+- }
+- while (client->chans_desired < client->chan_count) {
+- spin_lock_irqsave(&client->lock, flags);
+- chan = list_entry(client->channels.next,
+- struct dma_chan,
+- client_node);
+- list_del_rcu(&chan->client_node);
+- spin_unlock_irqrestore(&client->lock, flags);
+- client->chan_count--;
+- client->event_callback(client,
+- chan,
+- DMA_RESOURCE_REMOVED);
+- dma_client_chan_free(chan);
+- }
+- }
++ list_for_each_entry(client, &dma_client_list, global_node)
++ dma_client_chan_alloc(client);
+
+ mutex_unlock(&dma_list_mutex);
+ }
+
+ /**
+- * dma_async_client_register - allocate and register a &dma_client
+- * @event_callback: callback for notification of channel addition/removal
++ * dma_chans_notify_available - tell the clients that a channel is going away
++ * @chan: channel on its way out
+ */
+-struct dma_client *dma_async_client_register(dma_event_callback event_callback)
++static void dma_clients_notify_removed(struct dma_chan *chan)
+ {
+ struct dma_client *client;
++ enum dma_state_client ack;
+
+- client = kzalloc(sizeof(*client), GFP_KERNEL);
+- if (!client)
+- return NULL;
++ mutex_lock(&dma_list_mutex);
+
+- INIT_LIST_HEAD(&client->channels);
+- spin_lock_init(&client->lock);
+- client->chans_desired = 0;
+- client->chan_count = 0;
+- client->event_callback = event_callback;
++ list_for_each_entry(client, &dma_client_list, global_node) {
++ ack = client->event_callback(client, chan,
++ DMA_RESOURCE_REMOVED);
+
++ /* client was holding resources for this channel so
++ * free it
++ */
++ if (ack == DMA_ACK) {
++ dma_chan_put(chan);
++ kref_put(&chan->device->refcount,
++ dma_async_device_cleanup);
++ }
++ }
++
++ mutex_unlock(&dma_list_mutex);
++}
++
++/**
++ * dma_async_client_register - register a &dma_client
++ * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
++ */
++void dma_async_client_register(struct dma_client *client)
++{
+ mutex_lock(&dma_list_mutex);
+ list_add_tail(&client->global_node, &dma_client_list);
+ mutex_unlock(&dma_list_mutex);
+-
+- return client;
+ }
+ EXPORT_SYMBOL(dma_async_client_register);
+
+@@ -272,40 +302,42 @@
+ */
+ void dma_async_client_unregister(struct dma_client *client)
+ {
++ struct dma_device *device;
+ struct dma_chan *chan;
++ enum dma_state_client ack;
+
+ if (!client)
+ return;
+
+- rcu_read_lock();
+- list_for_each_entry_rcu(chan, &client->channels, client_node)
+- dma_client_chan_free(chan);
+- rcu_read_unlock();
+-
+ mutex_lock(&dma_list_mutex);
++ /* free all channels the client is holding */
++ list_for_each_entry(device, &dma_device_list, global_node)
++ list_for_each_entry(chan, &device->channels, device_node) {
++ ack = client->event_callback(client, chan,
++ DMA_RESOURCE_REMOVED);
++
++ if (ack == DMA_ACK) {
++ dma_chan_put(chan);
++ kref_put(&chan->device->refcount,
++ dma_async_device_cleanup);
++ }
++ }
++
+ list_del(&client->global_node);
+ mutex_unlock(&dma_list_mutex);
+-
+- kfree(client);
+- dma_chans_rebalance();
+ }
+ EXPORT_SYMBOL(dma_async_client_unregister);
+
+ /**
+- * dma_async_client_chan_request - request DMA channels
+- * @client: &dma_client
+- * @number: count of DMA channels requested
+- *
+- * Clients call dma_async_client_chan_request() to specify how many
+- * DMA channels they need, 0 to free all currently allocated.
+- * The resulting allocations/frees are indicated to the client via the
+- * event callback.
++ * dma_async_client_chan_request - send all available channels to the
++ * client that satisfy the capability mask
++ * @client - requester
+ */
+-void dma_async_client_chan_request(struct dma_client *client,
+- unsigned int number)
++void dma_async_client_chan_request(struct dma_client *client)
+ {
+- client->chans_desired = number;
+- dma_chans_rebalance();
++ mutex_lock(&dma_list_mutex);
++ dma_client_chan_alloc(client);
++ mutex_unlock(&dma_list_mutex);
+ }
+ EXPORT_SYMBOL(dma_async_client_chan_request);
+
+@@ -322,6 +354,25 @@
+ if (!device)
+ return -ENODEV;
+
++ /* validate device routines */
++ BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
++ !device->device_prep_dma_memcpy);
++ BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
++ !device->device_prep_dma_xor);
++ BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
++ !device->device_prep_dma_zero_sum);
++ BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
++ !device->device_prep_dma_memset);
++ BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
++ !device->device_prep_dma_interrupt);
++
++ BUG_ON(!device->device_alloc_chan_resources);
++ BUG_ON(!device->device_free_chan_resources);
++ BUG_ON(!device->device_dependency_added);
++ BUG_ON(!device->device_is_tx_complete);
++ BUG_ON(!device->device_issue_pending);
++ BUG_ON(!device->dev);
++
+ init_completion(&device->done);
+ kref_init(&device->refcount);
+ device->dev_id = id++;
+@@ -339,6 +390,9 @@
+ device->dev_id, chan->chan_id);
+
+ kref_get(&device->refcount);
++ kref_init(&chan->refcount);
++ chan->slow_ref = 0;
++ INIT_RCU_HEAD(&chan->rcu);
+ class_device_register(&chan->class_dev);
+ }
+
+@@ -346,7 +400,7 @@
+ list_add_tail(&device->global_node, &dma_device_list);
+ mutex_unlock(&dma_list_mutex);
+
+- dma_chans_rebalance();
++ dma_clients_notify_available();
+
+ return 0;
+ }
+@@ -371,32 +425,165 @@
+ void dma_async_device_unregister(struct dma_device *device)
+ {
+ struct dma_chan *chan;
+- unsigned long flags;
+
+ mutex_lock(&dma_list_mutex);
+ list_del(&device->global_node);
+ mutex_unlock(&dma_list_mutex);
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+- if (chan->client) {
+- spin_lock_irqsave(&chan->client->lock, flags);
+- list_del(&chan->client_node);
+- chan->client->chan_count--;
+- spin_unlock_irqrestore(&chan->client->lock, flags);
+- chan->client->event_callback(chan->client,
+- chan,
+- DMA_RESOURCE_REMOVED);
+- dma_client_chan_free(chan);
+- }
++ dma_clients_notify_removed(chan);
+ class_device_unregister(&chan->class_dev);
++ dma_chan_release(chan);
+ }
+- dma_chans_rebalance();
+
+ kref_put(&device->refcount, dma_async_device_cleanup);
+ wait_for_completion(&device->done);
+ }
+ EXPORT_SYMBOL(dma_async_device_unregister);
+
++/**
++ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
++ * @chan: DMA channel to offload copy to
++ * @dest: destination address (virtual)
++ * @src: source address (virtual)
++ * @len: length
++ *
++ * Both @dest and @src must be mappable to a bus address according to the
++ * DMA mapping API rules for streaming mappings.
++ * Both @dest and @src must stay memory resident (kernel memory or locked
++ * user space pages).
++ */
++dma_cookie_t
++dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
++ void *src, size_t len)
++{
++ struct dma_device *dev = chan->device;
++ struct dma_async_tx_descriptor *tx;
++ dma_addr_t addr;
++ dma_cookie_t cookie;
++ int cpu;
++
++ tx = dev->device_prep_dma_memcpy(chan, len, 0);
++ if (!tx)
++ return -ENOMEM;
++
++ tx->ack = 1;
++ tx->callback = NULL;
++ addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
++ tx->tx_set_src(addr, tx, 0);
++ addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
++ tx->tx_set_dest(addr, tx, 0);
++ cookie = tx->tx_submit(tx);
++
++ cpu = get_cpu();
++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++ put_cpu();
++
++ return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
++
++/**
++ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
++ * @chan: DMA channel to offload copy to
++ * @page: destination page
++ * @offset: offset in page to copy to
++ * @kdata: source address (virtual)
++ * @len: length
++ *
++ * Both @page/@offset and @kdata must be mappable to a bus address according
++ * to the DMA mapping API rules for streaming mappings.
++ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
++ * locked user space pages)
++ */
++dma_cookie_t
++dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
++ unsigned int offset, void *kdata, size_t len)
++{
++ struct dma_device *dev = chan->device;
++ struct dma_async_tx_descriptor *tx;
++ dma_addr_t addr;
++ dma_cookie_t cookie;
++ int cpu;
++
++ tx = dev->device_prep_dma_memcpy(chan, len, 0);
++ if (!tx)
++ return -ENOMEM;
++
++ tx->ack = 1;
++ tx->callback = NULL;
++ addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
++ tx->tx_set_src(addr, tx, 0);
++ addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
++ tx->tx_set_dest(addr, tx, 0);
++ cookie = tx->tx_submit(tx);
++
++ cpu = get_cpu();
++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++ put_cpu();
++
++ return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
++
++/**
++ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
++ * @chan: DMA channel to offload copy to
++ * @dest_pg: destination page
++ * @dest_off: offset in page to copy to
++ * @src_pg: source page
++ * @src_off: offset in page to copy from
++ * @len: length
++ *
++ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
++ * address according to the DMA mapping API rules for streaming mappings.
++ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
++ * (kernel memory or locked user space pages).
++ */
++dma_cookie_t
++dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
++ unsigned int dest_off, struct page *src_pg, unsigned int src_off,
++ size_t len)
++{
++ struct dma_device *dev = chan->device;
++ struct dma_async_tx_descriptor *tx;
++ dma_addr_t addr;
++ dma_cookie_t cookie;
++ int cpu;
++
++ tx = dev->device_prep_dma_memcpy(chan, len, 0);
++ if (!tx)
++ return -ENOMEM;
++
++ tx->ack = 1;
++ tx->callback = NULL;
++ addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
++ tx->tx_set_src(addr, tx, 0);
++ addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
++ tx->tx_set_dest(addr, tx, 0);
++ cookie = tx->tx_submit(tx);
++
++ cpu = get_cpu();
++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++ put_cpu();
++
++ return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
++
++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
++ struct dma_chan *chan)
++{
++ tx->chan = chan;
++ spin_lock_init(&tx->lock);
++ INIT_LIST_HEAD(&tx->depend_node);
++ INIT_LIST_HEAD(&tx->depend_list);
++}
++EXPORT_SYMBOL(dma_async_tx_descriptor_init);
++
+ static int __init dma_bus_init(void)
+ {
+ mutex_init(&dma_list_mutex);
+diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.c linux-2.6.22-591/drivers/dma/ioatdma.c
+--- linux-2.6.22-570/drivers/dma/ioatdma.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/dma/ioatdma.c 2007-12-21 15:36:11.000000000 -0500
+@@ -39,6 +39,7 @@
+ #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
+ #define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
+ #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
++#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
+
+ /* internal functions */
+ static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+@@ -71,13 +72,76 @@
+ INIT_LIST_HEAD(&ioat_chan->used_desc);
+ /* This should be made common somewhere in dmaengine.c */
+ ioat_chan->common.device = &device->common;
+- ioat_chan->common.client = NULL;
+ list_add_tail(&ioat_chan->common.device_node,
+ &device->common.channels);
+ }
+ return device->common.chancnt;
+ }
+
++static void
++ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
++{
++ struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++
++ pci_unmap_addr_set(desc, src, addr);
++
++ list_for_each_entry(iter, &desc->group_list, node) {
++ iter->hw->src_addr = addr;
++ addr += ioat_chan->xfercap;
++ }
++
++}
++
++static void
++ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
++{
++ struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++
++ pci_unmap_addr_set(desc, dst, addr);
++
++ list_for_each_entry(iter, &desc->group_list, node) {
++ iter->hw->dst_addr = addr;
++ addr += ioat_chan->xfercap;
++ }
++}
++
++static dma_cookie_t
++ioat_tx_submit(struct dma_async_tx_descriptor *tx)
++{
++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++ struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
++ struct ioat_desc_sw *group_start = list_entry(desc->group_list.next,
++ struct ioat_desc_sw, node);
++ int append = 0;
++ dma_cookie_t cookie;
++
++ spin_lock_bh(&ioat_chan->desc_lock);
++ /* cookie incr and addition to used_list must be atomic */
++ cookie = ioat_chan->common.cookie;
++ cookie++;
++ if (cookie < 0)
++ cookie = 1;
++ ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
++
++ /* write address into NextDescriptor field of last desc in chain */
++ to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = group_start->phys;
++ list_splice_init(&desc->group_list, ioat_chan->used_desc.prev);
++
++ ioat_chan->pending += desc->group_count;
++ if (ioat_chan->pending >= 4) {
++ append = 1;
++ ioat_chan->pending = 0;
++ }
++ spin_unlock_bh(&ioat_chan->desc_lock);
++
++ if (append)
++ ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET,
++ IOAT_CHANCMD_APPEND);
++ return cookie;
++}
++
+ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
+ struct ioat_dma_chan *ioat_chan,
+ gfp_t flags)
+@@ -99,6 +163,11 @@
+ }
+
+ memset(desc, 0, sizeof(*desc));
++ dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
++ desc_sw->async_tx.tx_set_src = ioat_set_src;
++ desc_sw->async_tx.tx_set_dest = ioat_set_dest;
++ desc_sw->async_tx.tx_submit = ioat_tx_submit;
++ INIT_LIST_HEAD(&desc_sw->group_list);
+ desc_sw->hw = desc;
+ desc_sw->phys = phys;
+
+@@ -215,45 +284,25 @@
+ ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+ }
+
+-/**
+- * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction
+- * @ioat_chan: IOAT DMA channel handle
+- * @dest: DMA destination address
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
+- dma_addr_t dest,
+- dma_addr_t src,
+- size_t len)
+-{
+- struct ioat_desc_sw *first;
+- struct ioat_desc_sw *prev;
+- struct ioat_desc_sw *new;
+- dma_cookie_t cookie;
++static struct dma_async_tx_descriptor *
++ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
++{
++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
++ struct ioat_desc_sw *first, *prev, *new;
+ LIST_HEAD(new_chain);
+ u32 copy;
+ size_t orig_len;
+- dma_addr_t orig_src, orig_dst;
+- unsigned int desc_count = 0;
+- unsigned int append = 0;
+-
+- if (!ioat_chan || !dest || !src)
+- return -EFAULT;
++ int desc_count = 0;
+
+ if (!len)
+- return ioat_chan->common.cookie;
++ return NULL;
+
+ orig_len = len;
+- orig_src = src;
+- orig_dst = dest;
+
+ first = NULL;
+ prev = NULL;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+-
+ while (len) {
+ if (!list_empty(&ioat_chan->free_desc)) {
+ new = to_ioat_desc(ioat_chan->free_desc.next);
+@@ -270,9 +319,8 @@
+
+ new->hw->size = copy;
+ new->hw->ctl = 0;
+- new->hw->src_addr = src;
+- new->hw->dst_addr = dest;
+- new->cookie = 0;
++ new->async_tx.cookie = 0;
++ new->async_tx.ack = 1;
+
+ /* chain together the physical address list for the HW */
+ if (!first)
+@@ -281,130 +329,26 @@
+ prev->hw->next = (u64) new->phys;
+
+ prev = new;
+-
+ len -= copy;
+- dest += copy;
+- src += copy;
+-
+ list_add_tail(&new->node, &new_chain);
+ desc_count++;
+ }
+- new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+- new->hw->next = 0;
+
+- /* cookie incr and addition to used_list must be atomic */
++ list_splice(&new_chain, &new->group_list);
+
+- cookie = ioat_chan->common.cookie;
+- cookie++;
+- if (cookie < 0)
+- cookie = 1;
+- ioat_chan->common.cookie = new->cookie = cookie;
++ new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
++ new->hw->next = 0;
++ new->group_count = desc_count;
++ new->async_tx.ack = 0; /* client is in control of this ack */
++ new->async_tx.cookie = -EBUSY;
+
+- pci_unmap_addr_set(new, src, orig_src);
+- pci_unmap_addr_set(new, dst, orig_dst);
+ pci_unmap_len_set(new, src_len, orig_len);
+ pci_unmap_len_set(new, dst_len, orig_len);
+-
+- /* write address into NextDescriptor field of last desc in chain */
+- to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys;
+- list_splice_init(&new_chain, ioat_chan->used_desc.prev);
+-
+- ioat_chan->pending += desc_count;
+- if (ioat_chan->pending >= 20) {
+- append = 1;
+- ioat_chan->pending = 0;
+- }
+-
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+- if (append)
+- ioatdma_chan_write8(ioat_chan,
+- IOAT_CHANCMD_OFFSET,
+- IOAT_CHANCMD_APPEND);
+- return cookie;
+-}
+-
+-/**
+- * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs
+- * @chan: IOAT DMA channel handle
+- * @dest: DMA destination address
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan,
+- void *dest,
+- void *src,
+- size_t len)
+-{
+- dma_addr_t dest_addr;
+- dma_addr_t src_addr;
+- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+- dest_addr = pci_map_single(ioat_chan->device->pdev,
+- dest, len, PCI_DMA_FROMDEVICE);
+- src_addr = pci_map_single(ioat_chan->device->pdev,
+- src, len, PCI_DMA_TODEVICE);
+-
+- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+-}
+-
+-/**
+- * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page
+- * @chan: IOAT DMA channel handle
+- * @page: pointer to the page to copy to
+- * @offset: offset into that page
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
+- struct page *page,
+- unsigned int offset,
+- void *src,
+- size_t len)
+-{
+- dma_addr_t dest_addr;
+- dma_addr_t src_addr;
+- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+- dest_addr = pci_map_page(ioat_chan->device->pdev,
+- page, offset, len, PCI_DMA_FROMDEVICE);
+- src_addr = pci_map_single(ioat_chan->device->pdev,
+- src, len, PCI_DMA_TODEVICE);
+-
+- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
++ return new ? &new->async_tx : NULL;
+ }
+
+-/**
+- * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages
+- * @chan: IOAT DMA channel handle
+- * @dest_pg: pointer to the page to copy to
+- * @dest_off: offset into that page
+- * @src_pg: pointer to the page to copy from
+- * @src_off: offset into that page
+- * @len: transaction length in bytes. This is guaranteed not to make a copy
+- * across a page boundary.
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
+- struct page *dest_pg,
+- unsigned int dest_off,
+- struct page *src_pg,
+- unsigned int src_off,
+- size_t len)
+-{
+- dma_addr_t dest_addr;
+- dma_addr_t src_addr;
+- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+- dest_addr = pci_map_page(ioat_chan->device->pdev,
+- dest_pg, dest_off, len, PCI_DMA_FROMDEVICE);
+- src_addr = pci_map_page(ioat_chan->device->pdev,
+- src_pg, src_off, len, PCI_DMA_TODEVICE);
+-
+- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+-}
+
+ /**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
+@@ -467,8 +411,8 @@
+ * exceeding xfercap, perhaps. If so, only the last one will
+ * have a cookie, and require unmapping.
+ */
+- if (desc->cookie) {
+- cookie = desc->cookie;
++ if (desc->async_tx.cookie) {
++ cookie = desc->async_tx.cookie;
+
+ /* yes we are unmapping both _page and _single alloc'd
+ regions with unmap_page. Is this *really* that bad?
+@@ -484,13 +428,18 @@
+ }
+
+ if (desc->phys != phys_complete) {
+- /* a completed entry, but not the last, so cleanup */
++ /* a completed entry, but not the last, so cleanup
++ * if the client is done with the descriptor
++ */
++ if (desc->async_tx.ack) {
+ list_del(&desc->node);
+ list_add_tail(&desc->node, &chan->free_desc);
++ } else
++ desc->async_tx.cookie = 0;
+ } else {
+ /* last used desc. Do not remove, so we can append from
+ it, but don't look at it next time, either */
+- desc->cookie = 0;
++ desc->async_tx.cookie = 0;
+
+ /* TODO check status bits? */
+ break;
+@@ -506,6 +455,17 @@
+ spin_unlock(&chan->cleanup_lock);
+ }
+
++static void ioat_dma_dependency_added(struct dma_chan *chan)
++{
++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
++ spin_lock_bh(&ioat_chan->desc_lock);
++ if (ioat_chan->pending == 0) {
++ spin_unlock_bh(&ioat_chan->desc_lock);
++ ioat_dma_memcpy_cleanup(ioat_chan);
++ } else
++ spin_unlock_bh(&ioat_chan->desc_lock);
++}
++
+ /**
+ * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
+ * @chan: IOAT DMA channel handle
+@@ -607,6 +567,7 @@
+
+ desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+ desc->hw->next = 0;
++ desc->async_tx.ack = 1;
+
+ list_add_tail(&desc->node, &ioat_chan->used_desc);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+@@ -633,6 +594,8 @@
+ u8 *src;
+ u8 *dest;
+ struct dma_chan *dma_chan;
++ struct dma_async_tx_descriptor *tx;
++ dma_addr_t addr;
+ dma_cookie_t cookie;
+ int err = 0;
+
+@@ -658,7 +621,15 @@
+ goto out;
+ }
+
+- cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE);
++ tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
++ async_tx_ack(tx);
++ addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
++ DMA_TO_DEVICE);
++ ioat_set_src(addr, tx, 0);
++ addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
++ DMA_FROM_DEVICE);
++ ioat_set_dest(addr, tx, 0);
++ cookie = ioat_tx_submit(tx);
+ ioat_dma_memcpy_issue_pending(dma_chan);
+ msleep(1);
+
+@@ -754,13 +725,14 @@
+ INIT_LIST_HEAD(&device->common.channels);
+ enumerate_dma_channels(device);
+
++ dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
+ device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
+ device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
+- device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf;
+- device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg;
+- device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg;
+- device->common.device_memcpy_complete = ioat_dma_is_complete;
+- device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending;
++ device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
++ device->common.device_is_tx_complete = ioat_dma_is_complete;
++ device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
++ device->common.device_dependency_added = ioat_dma_dependency_added;
++ device->common.dev = &pdev->dev;
+ printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
+ device->common.chancnt);
+
+diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.h linux-2.6.22-591/drivers/dma/ioatdma.h
+--- linux-2.6.22-570/drivers/dma/ioatdma.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/dma/ioatdma.h 2007-12-21 15:36:11.000000000 -0500
+@@ -30,9 +30,6 @@
+
+ #define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+
+-extern struct list_head dma_device_list;
+-extern struct list_head dma_client_list;
+-
+ /**
+ * struct ioat_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+@@ -105,15 +102,20 @@
+ /**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor
++ * @async_tx:
+ * @node:
++ * @group_list:
++ * @group_cnt:
+ * @cookie:
+ * @phys:
+ */
+
+ struct ioat_desc_sw {
+ struct ioat_dma_descriptor *hw;
++ struct dma_async_tx_descriptor async_tx;
+ struct list_head node;
+- dma_cookie_t cookie;
++ struct list_head group_list;
++ int group_count;
+ dma_addr_t phys;
+ DECLARE_PCI_UNMAP_ADDR(src)
+ DECLARE_PCI_UNMAP_LEN(src_len)
+@@ -122,4 +124,3 @@
+ };
+
+ #endif /* IOATDMA_H */
+-
+diff -Nurb linux-2.6.22-570/drivers/dma/iop-adma.c linux-2.6.22-591/drivers/dma/iop-adma.c
+--- linux-2.6.22-570/drivers/dma/iop-adma.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/dma/iop-adma.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,1465 @@
++/*
++ * offload engine driver for the Intel Xscale series of i/o processors
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++
++/*
++ * This driver supports the asynchrounous DMA copy and RAID engines available
++ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
++ */
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/async_tx.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/memory.h>
++#include <linux/ioport.h>
++
++#include <asm/arch/adma.h>
++
++#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
++#define to_iop_adma_device(dev) \
++ container_of(dev, struct iop_adma_device, common)
++#define tx_to_iop_adma_slot(tx) \
++ container_of(tx, struct iop_adma_desc_slot, async_tx)
++
++/**
++ * iop_adma_free_slots - flags descriptor slots for reuse
++ * @slot: Slot to free
++ * Caller must hold &iop_chan->lock while calling this function
++ */
++static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
++{
++ int stride = slot->slots_per_op;
++
++ while (stride--) {
++ slot->slots_per_op = 0;
++ slot = list_entry(slot->slot_node.next,
++ struct iop_adma_desc_slot,
++ slot_node);
++ }
++}
++
++static dma_cookie_t
++iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
++{
++ BUG_ON(desc->async_tx.cookie < 0);
++ spin_lock_bh(&desc->async_tx.lock);
++ if (desc->async_tx.cookie > 0) {
++ cookie = desc->async_tx.cookie;
++ desc->async_tx.cookie = 0;
++
++ /* call the callback (must not sleep or submit new
++ * operations to this channel)
++ */
++ if (desc->async_tx.callback)
++ desc->async_tx.callback(
++ desc->async_tx.callback_param);
++
++ /* unmap dma addresses
++ * (unmap_single vs unmap_page?)
++ */
++ if (desc->group_head && desc->unmap_len) {
++ struct iop_adma_desc_slot *unmap = desc->group_head;
++ struct device *dev =
++ &iop_chan->device->pdev->dev;
++ u32 len = unmap->unmap_len;
++ u32 src_cnt = unmap->unmap_src_cnt;
++ dma_addr_t addr = iop_desc_get_dest_addr(unmap,
++ iop_chan);
++
++ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
++ while (src_cnt--) {
++ addr = iop_desc_get_src_addr(unmap,
++ iop_chan,
++ src_cnt);
++ dma_unmap_page(dev, addr, len,
++ DMA_TO_DEVICE);
++ }
++ desc->group_head = NULL;
++ }
++ }
++
++ /* run dependent operations */
++ async_tx_run_dependencies(&desc->async_tx);
++ spin_unlock_bh(&desc->async_tx.lock);
++
++ return cookie;
++}
++
++static int
++iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *iop_chan)
++{
++ /* the client is allowed to attach dependent operations
++ * until 'ack' is set
++ */
++ if (!desc->async_tx.ack)
++ return 0;
++
++ /* leave the last descriptor in the chain
++ * so we can append to it
++ */
++ if (desc->chain_node.next == &iop_chan->chain)
++ return 1;
++
++ dev_dbg(iop_chan->device->common.dev,
++ "\tfree slot: %d slots_per_op: %d\n",
++ desc->idx, desc->slots_per_op);
++
++ list_del(&desc->chain_node);
++ iop_adma_free_slots(desc);
++
++ return 0;
++}
++
++static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
++{
++ struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
++ dma_cookie_t cookie = 0;
++ u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
++ int busy = iop_chan_is_busy(iop_chan);
++ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
++
++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++ /* free completed slots from the chain starting with
++ * the oldest descriptor
++ */
++ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
++ chain_node) {
++ pr_debug("\tcookie: %d slot: %d busy: %d "
++ "this_desc: %#x next_desc: %#x ack: %d\n",
++ iter->async_tx.cookie, iter->idx, busy, iter->phys,
++ iop_desc_get_next_desc(iter),
++ iter->async_tx.ack);
++ prefetch(_iter);
++ prefetch(&_iter->async_tx);
++
++ /* do not advance past the current descriptor loaded into the
++ * hardware channel, subsequent descriptors are either in
++ * process or have not been submitted
++ */
++ if (seen_current)
++ break;
++
++ /* stop the search if we reach the current descriptor and the
++ * channel is busy, or if it appears that the current descriptor
++ * needs to be re-read (i.e. has been appended to)
++ */
++ if (iter->phys == current_desc) {
++ BUG_ON(seen_current++);
++ if (busy || iop_desc_get_next_desc(iter))
++ break;
++ }
++
++ /* detect the start of a group transaction */
++ if (!slot_cnt && !slots_per_op) {
++ slot_cnt = iter->slot_cnt;
++ slots_per_op = iter->slots_per_op;
++ if (slot_cnt <= slots_per_op) {
++ slot_cnt = 0;
++ slots_per_op = 0;
++ }
++ }
++
++ if (slot_cnt) {
++ pr_debug("\tgroup++\n");
++ if (!grp_start)
++ grp_start = iter;
++ slot_cnt -= slots_per_op;
++ }
++
++ /* all the members of a group are complete */
++ if (slots_per_op != 0 && slot_cnt == 0) {
++ struct iop_adma_desc_slot *grp_iter, *_grp_iter;
++ int end_of_chain = 0;
++ pr_debug("\tgroup end\n");
++
++ /* collect the total results */
++ if (grp_start->xor_check_result) {
++ u32 zero_sum_result = 0;
++ slot_cnt = grp_start->slot_cnt;
++ grp_iter = grp_start;
++
++ list_for_each_entry_from(grp_iter,
++ &iop_chan->chain, chain_node) {
++ zero_sum_result |=
++ iop_desc_get_zero_result(grp_iter);
++ pr_debug("\titer%d result: %d\n",
++ grp_iter->idx, zero_sum_result);
++ slot_cnt -= slots_per_op;
++ if (slot_cnt == 0)
++ break;
++ }
++ pr_debug("\tgrp_start->xor_check_result: %p\n",
++ grp_start->xor_check_result);
++ *grp_start->xor_check_result = zero_sum_result;
++ }
++
++ /* clean up the group */
++ slot_cnt = grp_start->slot_cnt;
++ grp_iter = grp_start;
++ list_for_each_entry_safe_from(grp_iter, _grp_iter,
++ &iop_chan->chain, chain_node) {
++ cookie = iop_adma_run_tx_complete_actions(
++ grp_iter, iop_chan, cookie);
++
++ slot_cnt -= slots_per_op;
++ end_of_chain = iop_adma_clean_slot(grp_iter,
++ iop_chan);
++
++ if (slot_cnt == 0 || end_of_chain)
++ break;
++ }
++
++ /* the group should be complete at this point */
++ BUG_ON(slot_cnt);
++
++ slots_per_op = 0;
++ grp_start = NULL;
++ if (end_of_chain)
++ break;
++ else
++ continue;
++ } else if (slots_per_op) /* wait for group completion */
++ continue;
++
++ /* write back zero sum results (single descriptor case) */
++ if (iter->xor_check_result && iter->async_tx.cookie)
++ *iter->xor_check_result =
++ iop_desc_get_zero_result(iter);
++
++ cookie = iop_adma_run_tx_complete_actions(
++ iter, iop_chan, cookie);
++
++ if (iop_adma_clean_slot(iter, iop_chan))
++ break;
++ }
++
++ BUG_ON(!seen_current);
++
++ iop_chan_idle(busy, iop_chan);
++
++ if (cookie > 0) {
++ iop_chan->completed_cookie = cookie;
++ pr_debug("\tcompleted cookie %d\n", cookie);
++ }
++}
++
++static void
++iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
++{
++ spin_lock_bh(&iop_chan->lock);
++ __iop_adma_slot_cleanup(iop_chan);
++ spin_unlock_bh(&iop_chan->lock);
++}
++
++static void iop_adma_tasklet(unsigned long data)
++{
++ struct iop_adma_chan *chan = (struct iop_adma_chan *) data;
++ __iop_adma_slot_cleanup(chan);
++}
++
++static struct iop_adma_desc_slot *
++iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
++ int slots_per_op)
++{
++ struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
++ struct list_head chain = LIST_HEAD_INIT(chain);
++ int slots_found, retry = 0;
++
++ /* start search from the last allocated descrtiptor
++ * if a contiguous allocation can not be found start searching
++ * from the beginning of the list
++ */
++retry:
++ slots_found = 0;
++ if (retry == 0)
++ iter = iop_chan->last_used;
++ else
++ iter = list_entry(&iop_chan->all_slots,
++ struct iop_adma_desc_slot,
++ slot_node);
++
++ list_for_each_entry_safe_continue(
++ iter, _iter, &iop_chan->all_slots, slot_node) {
++ prefetch(_iter);
++ prefetch(&_iter->async_tx);
++ if (iter->slots_per_op) {
++ /* give up after finding the first busy slot
++ * on the second pass through the list
++ */
++ if (retry)
++ break;
++
++ slots_found = 0;
++ continue;
++ }
++
++ /* start the allocation if the slot is correctly aligned */
++ if (!slots_found++) {
++ if (iop_desc_is_aligned(iter, slots_per_op))
++ alloc_start = iter;
++ else {
++ slots_found = 0;
++ continue;
++ }
++ }
++
++ if (slots_found == num_slots) {
++ struct iop_adma_desc_slot *alloc_tail = NULL;
++ struct iop_adma_desc_slot *last_used = NULL;
++ iter = alloc_start;
++ while (num_slots) {
++ int i;
++ dev_dbg(iop_chan->device->common.dev,
++ "allocated slot: %d "
++ "(desc %p phys: %#x) slots_per_op %d\n",
++ iter->idx, iter->hw_desc, iter->phys,
++ slots_per_op);
++
++ /* pre-ack all but the last descriptor */
++ if (num_slots != slots_per_op)
++ iter->async_tx.ack = 1;
++ else
++ iter->async_tx.ack = 0;
++
++ list_add_tail(&iter->chain_node, &chain);
++ alloc_tail = iter;
++ iter->async_tx.cookie = 0;
++ iter->slot_cnt = num_slots;
++ iter->xor_check_result = NULL;
++ for (i = 0; i < slots_per_op; i++) {
++ iter->slots_per_op = slots_per_op - i;
++ last_used = iter;
++ iter = list_entry(iter->slot_node.next,
++ struct iop_adma_desc_slot,
++ slot_node);
++ }
++ num_slots -= slots_per_op;
++ }
++ alloc_tail->group_head = alloc_start;
++ alloc_tail->async_tx.cookie = -EBUSY;
++ list_splice(&chain, &alloc_tail->group_list);
++ iop_chan->last_used = last_used;
++ iop_desc_clear_next_desc(alloc_start);
++ iop_desc_clear_next_desc(alloc_tail);
++ return alloc_tail;
++ }
++ }
++ if (!retry++)
++ goto retry;
++
++ /* try to free some slots if the allocation fails */
++ tasklet_schedule(&iop_chan->irq_tasklet);
++
++ return NULL;
++}
++
++static dma_cookie_t
++iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
++ struct iop_adma_desc_slot *desc)
++{
++ dma_cookie_t cookie = iop_chan->common.cookie;
++ cookie++;
++ if (cookie < 0)
++ cookie = 1;
++ iop_chan->common.cookie = desc->async_tx.cookie = cookie;
++ return cookie;
++}
++
++static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
++{
++ dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
++ iop_chan->pending);
++
++ if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
++ iop_chan->pending = 0;
++ iop_chan_append(iop_chan);
++ }
++}
++
++static dma_cookie_t
++iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
++{
++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
++ struct iop_adma_desc_slot *grp_start, *old_chain_tail;
++ int slot_cnt;
++ int slots_per_op;
++ dma_cookie_t cookie;
++
++ grp_start = sw_desc->group_head;
++ slot_cnt = grp_start->slot_cnt;
++ slots_per_op = grp_start->slots_per_op;
++
++ spin_lock_bh(&iop_chan->lock);
++ cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
++
++ old_chain_tail = list_entry(iop_chan->chain.prev,
++ struct iop_adma_desc_slot, chain_node);
++ list_splice_init(&sw_desc->group_list, &old_chain_tail->chain_node);
++
++ /* fix up the hardware chain */
++ iop_desc_set_next_desc(old_chain_tail, grp_start->phys);
++
++ /* 1/ don't add pre-chained descriptors
++ * 2/ dummy read to flush next_desc write
++ */
++ BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++ /* increment the pending count by the number of slots
++ * memcpy operations have a 1:1 (slot:operation) relation
++ * other operations are heavier and will pop the threshold
++ * more often.
++ */
++ iop_chan->pending += slot_cnt;
++ iop_adma_check_threshold(iop_chan);
++ spin_unlock_bh(&iop_chan->lock);
++
++ dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
++ __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx);
++
++ return cookie;
++}
++
++static void
++iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++ int index)
++{
++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
++
++ /* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */
++ iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr);
++}
++
++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
++
++/* returns the number of allocated descriptors */
++static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
++{
++ char *hw_desc;
++ int idx;
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *slot = NULL;
++ int init = iop_chan->slots_allocated ? 0 : 1;
++ struct iop_adma_platform_data *plat_data =
++ iop_chan->device->pdev->dev.platform_data;
++ int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
++
++ /* Allocate descriptor slots */
++ do {
++ idx = iop_chan->slots_allocated;
++ if (idx == num_descs_in_pool)
++ break;
++
++ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
++ if (!slot) {
++ printk(KERN_INFO "IOP ADMA Channel only initialized"
++ " %d descriptor slots", idx);
++ break;
++ }
++ hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
++ slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
++
++ dma_async_tx_descriptor_init(&slot->async_tx, chan);
++ slot->async_tx.tx_submit = iop_adma_tx_submit;
++ slot->async_tx.tx_set_dest = iop_adma_set_dest;
++ INIT_LIST_HEAD(&slot->chain_node);
++ INIT_LIST_HEAD(&slot->slot_node);
++ INIT_LIST_HEAD(&slot->group_list);
++ hw_desc = (char *) iop_chan->device->dma_desc_pool;
++ slot->phys = (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
++ slot->idx = idx;
++
++ spin_lock_bh(&iop_chan->lock);
++ iop_chan->slots_allocated++;
++ list_add_tail(&slot->slot_node, &iop_chan->all_slots);
++ spin_unlock_bh(&iop_chan->lock);
++ } while (iop_chan->slots_allocated < num_descs_in_pool);
++
++ if (idx && !iop_chan->last_used)
++ iop_chan->last_used = list_entry(iop_chan->all_slots.next,
++ struct iop_adma_desc_slot,
++ slot_node);
++
++ dev_dbg(iop_chan->device->common.dev,
++ "allocated %d descriptor slots last_used: %p\n",
++ iop_chan->slots_allocated, iop_chan->last_used);
++
++ /* initialize the channel and the chain with a null operation */
++ if (init) {
++ if (dma_has_cap(DMA_MEMCPY,
++ iop_chan->device->common.cap_mask))
++ iop_chan_start_null_memcpy(iop_chan);
++ else if (dma_has_cap(DMA_XOR,
++ iop_chan->device->common.cap_mask))
++ iop_chan_start_null_xor(iop_chan);
++ else
++ BUG();
++ }
++
++ return (idx > 0) ? idx : -ENOMEM;
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_interrupt(struct dma_chan *chan)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ int slot_cnt, slots_per_op;
++
++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ iop_desc_init_interrupt(grp_start, iop_chan);
++ grp_start->unmap_len = 0;
++ }
++ spin_unlock_bh(&iop_chan->lock);
++
++ return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++ int index)
++{
++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++ iop_desc_set_memcpy_src_addr(grp_start, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ int slot_cnt, slots_per_op;
++
++ if (unlikely(!len))
++ return NULL;
++ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
++
++ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
++ __FUNCTION__, len);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ iop_desc_init_memcpy(grp_start, int_en);
++ iop_desc_set_byte_count(grp_start, iop_chan, len);
++ sw_desc->unmap_src_cnt = 1;
++ sw_desc->unmap_len = len;
++ sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src;
++ }
++ spin_unlock_bh(&iop_chan->lock);
++
++ return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
++ int int_en)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ int slot_cnt, slots_per_op;
++
++ if (unlikely(!len))
++ return NULL;
++ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
++
++ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
++ __FUNCTION__, len);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ iop_desc_init_memset(grp_start, int_en);
++ iop_desc_set_byte_count(grp_start, iop_chan, len);
++ iop_desc_set_block_fill_val(grp_start, value);
++ sw_desc->unmap_src_cnt = 1;
++ sw_desc->unmap_len = len;
++ }
++ spin_unlock_bh(&iop_chan->lock);
++
++ return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++ int index)
++{
++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++ iop_desc_set_xor_src_addr(grp_start, index, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
++ int int_en)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ int slot_cnt, slots_per_op;
++
++ if (unlikely(!len))
++ return NULL;
++ BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
++
++ dev_dbg(iop_chan->device->common.dev,
++ "%s src_cnt: %d len: %u int_en: %d\n",
++ __FUNCTION__, src_cnt, len, int_en);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ iop_desc_init_xor(grp_start, src_cnt, int_en);
++ iop_desc_set_byte_count(grp_start, iop_chan, len);
++ sw_desc->unmap_src_cnt = src_cnt;
++ sw_desc->unmap_len = len;
++ sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src;
++ }
++ spin_unlock_bh(&iop_chan->lock);
++
++ return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_xor_zero_sum_set_src(dma_addr_t addr,
++ struct dma_async_tx_descriptor *tx,
++ int index)
++{
++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++ iop_desc_set_zero_sum_src_addr(grp_start, index, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
++ size_t len, u32 *result, int int_en)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ int slot_cnt, slots_per_op;
++
++ if (unlikely(!len))
++ return NULL;
++
++ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
++ __FUNCTION__, src_cnt, len);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
++ iop_desc_set_zero_sum_byte_count(grp_start, len);
++ grp_start->xor_check_result = result;
++ pr_debug("\t%s: grp_start->xor_check_result: %p\n",
++ __FUNCTION__, grp_start->xor_check_result);
++ sw_desc->unmap_src_cnt = src_cnt;
++ sw_desc->unmap_len = len;
++ sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src;
++ }
++ spin_unlock_bh(&iop_chan->lock);
++
++ return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void iop_adma_dependency_added(struct dma_chan *chan)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ tasklet_schedule(&iop_chan->irq_tasklet);
++}
++
++static void iop_adma_free_chan_resources(struct dma_chan *chan)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ struct iop_adma_desc_slot *iter, *_iter;
++ int in_use_descs = 0;
++
++ iop_adma_slot_cleanup(iop_chan);
++
++ spin_lock_bh(&iop_chan->lock);
++ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
++ chain_node) {
++ in_use_descs++;
++ list_del(&iter->chain_node);
++ }
++ list_for_each_entry_safe_reverse(
++ iter, _iter, &iop_chan->all_slots, slot_node) {
++ list_del(&iter->slot_node);
++ kfree(iter);
++ iop_chan->slots_allocated--;
++ }
++ iop_chan->last_used = NULL;
++
++ dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
++ __FUNCTION__, iop_chan->slots_allocated);
++ spin_unlock_bh(&iop_chan->lock);
++
++ /* one is ok since we left it on there on purpose */
++ if (in_use_descs > 1)
++ printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
++ in_use_descs - 1);
++}
++
++/**
++ * iop_adma_is_complete - poll the status of an ADMA transaction
++ * @chan: ADMA channel handle
++ * @cookie: ADMA transaction identifier
++ */
++static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
++ dma_cookie_t cookie,
++ dma_cookie_t *done,
++ dma_cookie_t *used)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++ dma_cookie_t last_used;
++ dma_cookie_t last_complete;
++ enum dma_status ret;
++
++ last_used = chan->cookie;
++ last_complete = iop_chan->completed_cookie;
++
++ if (done)
++ *done = last_complete;
++ if (used)
++ *used = last_used;
++
++ ret = dma_async_is_complete(cookie, last_complete, last_used);
++ if (ret == DMA_SUCCESS)
++ return ret;
++
++ iop_adma_slot_cleanup(iop_chan);
++
++ last_used = chan->cookie;
++ last_complete = iop_chan->completed_cookie;
++
++ if (done)
++ *done = last_complete;
++ if (used)
++ *used = last_used;
++
++ return dma_async_is_complete(cookie, last_complete, last_used);
++}
++
++static irqreturn_t iop_adma_eot_handler(int irq, void *data)
++{
++ struct iop_adma_chan *chan = data;
++
++ dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
++
++ tasklet_schedule(&chan->irq_tasklet);
++
++ iop_adma_device_clear_eot_status(chan);
++
++ return IRQ_HANDLED;
++}
++
++static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
++{
++ struct iop_adma_chan *chan = data;
++
++ dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
++
++ tasklet_schedule(&chan->irq_tasklet);
++
++ iop_adma_device_clear_eoc_status(chan);
++
++ return IRQ_HANDLED;
++}
++
++static irqreturn_t iop_adma_err_handler(int irq, void *data)
++{
++ struct iop_adma_chan *chan = data;
++ unsigned long status = iop_chan_get_status(chan);
++
++ dev_printk(KERN_ERR, chan->device->common.dev,
++ "error ( %s%s%s%s%s%s%s)\n",
++ iop_is_err_int_parity(status, chan) ? "int_parity " : "",
++ iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
++ iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
++ iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
++ iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
++ iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
++ iop_is_err_split_tx(status, chan) ? "split_tx " : "");
++
++ iop_adma_device_clear_err_status(chan);
++
++ BUG();
++
++ return IRQ_HANDLED;
++}
++
++static void iop_adma_issue_pending(struct dma_chan *chan)
++{
++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++
++ if (iop_chan->pending) {
++ iop_chan->pending = 0;
++ iop_chan_append(iop_chan);
++ }
++}
++
++/*
++ * Perform a transaction to verify the HW works.
++ */
++#define IOP_ADMA_TEST_SIZE 2000
++
++static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
++{
++ int i;
++ void *src, *dest;
++ dma_addr_t src_dma, dest_dma;
++ struct dma_chan *dma_chan;
++ dma_cookie_t cookie;
++ struct dma_async_tx_descriptor *tx;
++ int err = 0;
++ struct iop_adma_chan *iop_chan;
++
++ dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
++
++ src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
++ if (!src)
++ return -ENOMEM;
++ dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
++ if (!dest) {
++ kfree(src);
++ return -ENOMEM;
++ }
++
++ /* Fill in src buffer */
++ for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
++ ((u8 *) src)[i] = (u8)i;
++
++ memset(dest, 0, IOP_ADMA_TEST_SIZE);
++
++ /* Start copy, using first DMA channel */
++ dma_chan = container_of(device->common.channels.next,
++ struct dma_chan,
++ device_node);
++ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
++ err = -ENODEV;
++ goto out;
++ }
++
++ tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1);
++ dest_dma = dma_map_single(dma_chan->device->dev, dest,
++ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
++ iop_adma_set_dest(dest_dma, tx, 0);
++ src_dma = dma_map_single(dma_chan->device->dev, src,
++ IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
++ iop_adma_memcpy_set_src(src_dma, tx, 0);
++
++ cookie = iop_adma_tx_submit(tx);
++ iop_adma_issue_pending(dma_chan);
++ async_tx_ack(tx);
++ msleep(1);
++
++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
++ DMA_SUCCESS) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test copy timed out, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ iop_chan = to_iop_adma_chan(dma_chan);
++ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
++ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
++ if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test copy failed compare, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++free_resources:
++ iop_adma_free_chan_resources(dma_chan);
++out:
++ kfree(src);
++ kfree(dest);
++ return err;
++}
++
++#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
++static int __devinit
++iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
++{
++ int i, src_idx;
++ struct page *dest;
++ struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
++ struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
++ dma_addr_t dma_addr, dest_dma;
++ struct dma_async_tx_descriptor *tx;
++ struct dma_chan *dma_chan;
++ dma_cookie_t cookie;
++ u8 cmp_byte = 0;
++ u32 cmp_word;
++ u32 zero_sum_result;
++ int err = 0;
++ struct iop_adma_chan *iop_chan;
++
++ dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
++
++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
++ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
++ if (!xor_srcs[src_idx])
++ while (src_idx--) {
++ __free_page(xor_srcs[src_idx]);
++ return -ENOMEM;
++ }
++ }
++
++ dest = alloc_page(GFP_KERNEL);
++ if (!dest)
++ while (src_idx--) {
++ __free_page(xor_srcs[src_idx]);
++ return -ENOMEM;
++ }
++
++ /* Fill in src buffers */
++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
++ u8 *ptr = page_address(xor_srcs[src_idx]);
++ for (i = 0; i < PAGE_SIZE; i++)
++ ptr[i] = (1 << src_idx);
++ }
++
++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
++ cmp_byte ^= (u8) (1 << src_idx);
++
++ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
++ (cmp_byte << 8) | cmp_byte;
++
++ memset(page_address(dest), 0, PAGE_SIZE);
++
++ dma_chan = container_of(device->common.channels.next,
++ struct dma_chan,
++ device_node);
++ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
++ err = -ENODEV;
++ goto out;
++ }
++
++ /* test xor */
++ tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST,
++ PAGE_SIZE, 1);
++ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
++ PAGE_SIZE, DMA_FROM_DEVICE);
++ iop_adma_set_dest(dest_dma, tx, 0);
++
++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
++ dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0,
++ PAGE_SIZE, DMA_TO_DEVICE);
++ iop_adma_xor_set_src(dma_addr, tx, i);
++ }
++
++ cookie = iop_adma_tx_submit(tx);
++ iop_adma_issue_pending(dma_chan);
++ async_tx_ack(tx);
++ msleep(8);
++
++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
++ DMA_SUCCESS) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test xor timed out, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ iop_chan = to_iop_adma_chan(dma_chan);
++ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
++ PAGE_SIZE, DMA_FROM_DEVICE);
++ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
++ u32 *ptr = page_address(dest);
++ if (ptr[i] != cmp_word) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test xor failed compare, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++ }
++ dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
++ PAGE_SIZE, DMA_TO_DEVICE);
++
++ /* skip zero sum if the capability is not present */
++ if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
++ goto free_resources;
++
++ /* zero sum the sources with the destintation page */
++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
++ zero_sum_srcs[i] = xor_srcs[i];
++ zero_sum_srcs[i] = dest;
++
++ zero_sum_result = 1;
++
++ tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
++ PAGE_SIZE, &zero_sum_result, 1);
++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
++ dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
++ 0, PAGE_SIZE, DMA_TO_DEVICE);
++ iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
++ }
++
++ cookie = iop_adma_tx_submit(tx);
++ iop_adma_issue_pending(dma_chan);
++ async_tx_ack(tx);
++ msleep(8);
++
++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test zero sum timed out, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ if (zero_sum_result != 0) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test zero sum failed compare, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ /* test memset */
++ tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1);
++ dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
++ PAGE_SIZE, DMA_FROM_DEVICE);
++ iop_adma_set_dest(dma_addr, tx, 0);
++
++ cookie = iop_adma_tx_submit(tx);
++ iop_adma_issue_pending(dma_chan);
++ async_tx_ack(tx);
++ msleep(8);
++
++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test memset timed out, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
++ u32 *ptr = page_address(dest);
++ if (ptr[i]) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test memset failed compare, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++ }
++
++ /* test for non-zero parity sum */
++ zero_sum_result = 0;
++ tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
++ PAGE_SIZE, &zero_sum_result, 1);
++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
++ dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
++ 0, PAGE_SIZE, DMA_TO_DEVICE);
++ iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
++ }
++
++ cookie = iop_adma_tx_submit(tx);
++ iop_adma_issue_pending(dma_chan);
++ async_tx_ack(tx);
++ msleep(8);
++
++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test non-zero sum timed out, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++ if (zero_sum_result != 1) {
++ dev_printk(KERN_ERR, dma_chan->device->dev,
++ "Self-test non-zero sum failed compare, disabling\n");
++ err = -ENODEV;
++ goto free_resources;
++ }
++
++free_resources:
++ iop_adma_free_chan_resources(dma_chan);
++out:
++ src_idx = IOP_ADMA_NUM_SRC_TEST;
++ while (src_idx--)
++ __free_page(xor_srcs[src_idx]);
++ __free_page(dest);
++ return err;
++}
++
++static int __devexit iop_adma_remove(struct platform_device *dev)
++{
++ struct iop_adma_device *device = platform_get_drvdata(dev);
++ struct dma_chan *chan, *_chan;
++ struct iop_adma_chan *iop_chan;
++ int i;
++ struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
++
++ dma_async_device_unregister(&device->common);
++
++ for (i = 0; i < 3; i++) {
++ unsigned int irq;
++ irq = platform_get_irq(dev, i);
++ free_irq(irq, device);
++ }
++
++ dma_free_coherent(&dev->dev, plat_data->pool_size,
++ device->dma_desc_pool_virt, device->dma_desc_pool);
++
++ do {
++ struct resource *res;
++ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
++ release_mem_region(res->start, res->end - res->start);
++ } while (0);
++
++ list_for_each_entry_safe(chan, _chan, &device->common.channels,
++ device_node) {
++ iop_chan = to_iop_adma_chan(chan);
++ list_del(&chan->device_node);
++ kfree(iop_chan);
++ }
++ kfree(device);
++
++ return 0;
++}
++
++static int __devinit iop_adma_probe(struct platform_device *pdev)
++{
++ struct resource *res;
++ int ret = 0, i;
++ struct iop_adma_device *adev;
++ struct iop_adma_chan *iop_chan;
++ struct dma_device *dma_dev;
++ struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
++
++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ if (!res)
++ return -ENODEV;
++
++ if (!devm_request_mem_region(&pdev->dev, res->start,
++ res->end - res->start, pdev->name))
++ return -EBUSY;
++
++ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
++ if (!adev)
++ return -ENOMEM;
++ dma_dev = &adev->common;
++
++ /* allocate coherent memory for hardware descriptors
++ * note: writecombine gives slightly better performance, but
++ * requires that we explicitly flush the writes
++ */
++ if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
++ plat_data->pool_size,
++ &adev->dma_desc_pool,
++ GFP_KERNEL)) == NULL) {
++ ret = -ENOMEM;
++ goto err_free_adev;
++ }
++
++ dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
++ __FUNCTION__, adev->dma_desc_pool_virt,
++ (void *) adev->dma_desc_pool);
++
++ adev->id = plat_data->hw_id;
++
++ /* discover transaction capabilites from the platform data */
++ dma_dev->cap_mask = plat_data->cap_mask;
++
++ adev->pdev = pdev;
++ platform_set_drvdata(pdev, adev);
++
++ INIT_LIST_HEAD(&dma_dev->channels);
++
++ /* set base routines */
++ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
++ dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
++ dma_dev->device_is_tx_complete = iop_adma_is_complete;
++ dma_dev->device_issue_pending = iop_adma_issue_pending;
++ dma_dev->device_dependency_added = iop_adma_dependency_added;
++ dma_dev->dev = &pdev->dev;
++
++ /* set prep routines based on capability */
++ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
++ dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
++ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
++ dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
++ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
++ dma_dev->max_xor = iop_adma_get_max_xor();
++ dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
++ }
++ if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
++ dma_dev->device_prep_dma_zero_sum =
++ iop_adma_prep_dma_zero_sum;
++ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
++ dma_dev->device_prep_dma_interrupt =
++ iop_adma_prep_dma_interrupt;
++
++ iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
++ if (!iop_chan) {
++ ret = -ENOMEM;
++ goto err_free_dma;
++ }
++ iop_chan->device = adev;
++
++ iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
++ res->end - res->start);
++ if (!iop_chan->mmr_base) {
++ ret = -ENOMEM;
++ goto err_free_iop_chan;
++ }
++ tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
++ iop_chan);
++
++ /* clear errors before enabling interrupts */
++ iop_adma_device_clear_err_status(iop_chan);
++
++ for (i = 0; i < 3; i++) {
++ irq_handler_t handler[] = { iop_adma_eot_handler,
++ iop_adma_eoc_handler,
++ iop_adma_err_handler };
++ int irq = platform_get_irq(pdev, i);
++ if (irq < 0) {
++ ret = -ENXIO;
++ goto err_free_iop_chan;
++ } else {
++ ret = devm_request_irq(&pdev->dev, irq,
++ handler[i], 0, pdev->name, iop_chan);
++ if (ret)
++ goto err_free_iop_chan;
++ }
++ }
++
++ spin_lock_init(&iop_chan->lock);
++ init_timer(&iop_chan->cleanup_watchdog);
++ iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan;
++ iop_chan->cleanup_watchdog.function = iop_adma_tasklet;
++ INIT_LIST_HEAD(&iop_chan->chain);
++ INIT_LIST_HEAD(&iop_chan->all_slots);
++ INIT_RCU_HEAD(&iop_chan->common.rcu);
++ iop_chan->common.device = dma_dev;
++ list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
++
++ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
++ ret = iop_adma_memcpy_self_test(adev);
++ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
++ if (ret)
++ goto err_free_iop_chan;
++ }
++
++ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
++ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
++ ret = iop_adma_xor_zero_sum_self_test(adev);
++ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
++ if (ret)
++ goto err_free_iop_chan;
++ }
++
++ dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
++ "( %s%s%s%s%s%s%s%s%s%s)\n",
++ dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
++ dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
++ dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
++ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
++ dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
++ dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
++ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
++ dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
++ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
++ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
++
++ dma_async_device_register(dma_dev);
++ goto out;
++
++ err_free_iop_chan:
++ kfree(iop_chan);
++ err_free_dma:
++ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
++ adev->dma_desc_pool_virt, adev->dma_desc_pool);
++ err_free_adev:
++ kfree(adev);
++ out:
++ return ret;
++}
++
++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
++{
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ dma_cookie_t cookie;
++ int slot_cnt, slots_per_op;
++
++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++
++ list_splice_init(&sw_desc->group_list, &iop_chan->chain);
++ sw_desc->async_tx.ack = 1;
++ iop_desc_init_memcpy(grp_start, 0);
++ iop_desc_set_byte_count(grp_start, iop_chan, 0);
++ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
++ iop_desc_set_memcpy_src_addr(grp_start, 0);
++
++ cookie = iop_chan->common.cookie;
++ cookie++;
++ if (cookie <= 1)
++ cookie = 2;
++
++ /* initialize the completed cookie to be less than
++ * the most recently used cookie
++ */
++ iop_chan->completed_cookie = cookie - 1;
++ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
++
++ /* channel should not be busy */
++ BUG_ON(iop_chan_is_busy(iop_chan));
++
++ /* clear any prior error-status bits */
++ iop_adma_device_clear_err_status(iop_chan);
++
++ /* disable operation */
++ iop_chan_disable(iop_chan);
++
++ /* set the descriptor address */
++ iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
++
++ /* 1/ don't add pre-chained descriptors
++ * 2/ dummy read to flush next_desc write
++ */
++ BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++ /* run the descriptor */
++ iop_chan_enable(iop_chan);
++ } else
++ dev_printk(KERN_ERR, iop_chan->device->common.dev,
++ "failed to allocate null descriptor\n");
++ spin_unlock_bh(&iop_chan->lock);
++}
++
++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
++{
++ struct iop_adma_desc_slot *sw_desc, *grp_start;
++ dma_cookie_t cookie;
++ int slot_cnt, slots_per_op;
++
++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++ spin_lock_bh(&iop_chan->lock);
++ slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++ if (sw_desc) {
++ grp_start = sw_desc->group_head;
++ list_splice_init(&sw_desc->group_list, &iop_chan->chain);
++ sw_desc->async_tx.ack = 1;
++ iop_desc_init_null_xor(grp_start, 2, 0);
++ iop_desc_set_byte_count(grp_start, iop_chan, 0);
++ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
++ iop_desc_set_xor_src_addr(grp_start, 0, 0);
++ iop_desc_set_xor_src_addr(grp_start, 1, 0);
++
++ cookie = iop_chan->common.cookie;
++ cookie++;
++ if (cookie <= 1)
++ cookie = 2;
++
++ /* initialize the completed cookie to be less than
++ * the most recently used cookie
++ */
++ iop_chan->completed_cookie = cookie - 1;
++ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
++
++ /* channel should not be busy */
++ BUG_ON(iop_chan_is_busy(iop_chan));
++
++ /* clear any prior error-status bits */
++ iop_adma_device_clear_err_status(iop_chan);
++
++ /* disable operation */
++ iop_chan_disable(iop_chan);
++
++ /* set the descriptor address */
++ iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
++
++ /* 1/ don't add pre-chained descriptors
++ * 2/ dummy read to flush next_desc write
++ */
++ BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++ /* run the descriptor */
++ iop_chan_enable(iop_chan);
++ } else
++ dev_printk(KERN_ERR, iop_chan->device->common.dev,
++ "failed to allocate null descriptor\n");
++ spin_unlock_bh(&iop_chan->lock);
++}
++
++static struct platform_driver iop_adma_driver = {
++ .probe = iop_adma_probe,
++ .remove = iop_adma_remove,
++ .driver = {
++ .owner = THIS_MODULE,
++ .name = "iop-adma",
++ },
++};
++
++static int __init iop_adma_init (void)
++{
++ /* it's currently unsafe to unload this module */
++ /* if forced, worst case is that rmmod hangs */
++ __unsafe(THIS_MODULE);
++
++ return platform_driver_register(&iop_adma_driver);
++}
++
++static void __exit iop_adma_exit (void)
++{
++ platform_driver_unregister(&iop_adma_driver);
++ return;
++}
++
++module_init(iop_adma_init);
++module_exit(iop_adma_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("IOP ADMA Engine Driver");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/edac/edac_mc.c linux-2.6.22-591/drivers/edac/edac_mc.c
+--- linux-2.6.22-570/drivers/edac/edac_mc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/edac/edac_mc.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1906,6 +1906,7 @@
+
+ static int edac_kernel_thread(void *arg)
+ {
++ set_freezable();
+ while (!kthread_should_stop()) {
+ do_edac_check();
+
+diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.c linux-2.6.22-591/drivers/firmware/dcdbas.c
+--- linux-2.6.22-570/drivers/firmware/dcdbas.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/firmware/dcdbas.c 2007-12-21 15:36:11.000000000 -0500
+@@ -149,8 +149,9 @@
+ return count;
+ }
+
+-static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos,
+- size_t count)
++static ssize_t smi_data_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t count)
+ {
+ size_t max_read;
+ ssize_t ret;
+@@ -170,8 +171,9 @@
+ return ret;
+ }
+
+-static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos,
+- size_t count)
++static ssize_t smi_data_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t count)
+ {
+ ssize_t ret;
+
+diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.h linux-2.6.22-591/drivers/firmware/dcdbas.h
+--- linux-2.6.22-570/drivers/firmware/dcdbas.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/firmware/dcdbas.h 2007-12-21 15:36:11.000000000 -0500
+@@ -67,8 +67,7 @@
+ #define DCDBAS_BIN_ATTR_RW(_name) \
+ struct bin_attribute bin_attr_##_name = { \
+ .attr = { .name = __stringify(_name), \
+- .mode = 0600, \
+- .owner = THIS_MODULE }, \
++ .mode = 0600 }, \
+ .read = _name##_read, \
+ .write = _name##_write, \
+ }
+diff -Nurb linux-2.6.22-570/drivers/firmware/dell_rbu.c linux-2.6.22-591/drivers/firmware/dell_rbu.c
+--- linux-2.6.22-570/drivers/firmware/dell_rbu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/firmware/dell_rbu.c 2007-12-21 15:36:11.000000000 -0500
+@@ -543,8 +543,9 @@
+ return ret_count;
+ }
+
+-static ssize_t read_rbu_data(struct kobject *kobj, char *buffer,
+- loff_t pos, size_t count)
++static ssize_t read_rbu_data(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buffer, loff_t pos, size_t count)
+ {
+ ssize_t ret_count = 0;
+
+@@ -591,8 +592,9 @@
+ spin_unlock(&rbu_data.lock);
+ }
+
+-static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer,
+- loff_t pos, size_t count)
++static ssize_t read_rbu_image_type(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buffer, loff_t pos, size_t count)
+ {
+ int size = 0;
+ if (!pos)
+@@ -600,8 +602,9 @@
+ return size;
+ }
+
+-static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer,
+- loff_t pos, size_t count)
++static ssize_t write_rbu_image_type(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buffer, loff_t pos, size_t count)
+ {
+ int rc = count;
+ int req_firm_rc = 0;
+@@ -660,8 +663,9 @@
+ return rc;
+ }
+
+-static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer,
+- loff_t pos, size_t count)
++static ssize_t read_rbu_packet_size(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buffer, loff_t pos, size_t count)
+ {
+ int size = 0;
+ if (!pos) {
+@@ -672,8 +676,9 @@
+ return size;
+ }
+
+-static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer,
+- loff_t pos, size_t count)
++static ssize_t write_rbu_packet_size(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buffer, loff_t pos, size_t count)
+ {
+ unsigned long temp;
+ spin_lock(&rbu_data.lock);
+@@ -687,18 +692,18 @@
+ }
+
+ static struct bin_attribute rbu_data_attr = {
+- .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444},
++ .attr = {.name = "data", .mode = 0444},
+ .read = read_rbu_data,
+ };
+
+ static struct bin_attribute rbu_image_type_attr = {
+- .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644},
++ .attr = {.name = "image_type", .mode = 0644},
+ .read = read_rbu_image_type,
+ .write = write_rbu_image_type,
+ };
+
+ static struct bin_attribute rbu_packet_size_attr = {
+- .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644},
++ .attr = {.name = "packet_size", .mode = 0644},
+ .read = read_rbu_packet_size,
+ .write = write_rbu_packet_size,
+ };
+diff -Nurb linux-2.6.22-570/drivers/firmware/edd.c linux-2.6.22-591/drivers/firmware/edd.c
+--- linux-2.6.22-570/drivers/firmware/edd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/firmware/edd.c 2007-12-21 15:36:11.000000000 -0500
+@@ -74,7 +74,7 @@
+
+ #define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \
+ struct edd_attribute edd_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \
++ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .test = _test, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/firmware/efivars.c linux-2.6.22-591/drivers/firmware/efivars.c
+--- linux-2.6.22-570/drivers/firmware/efivars.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/firmware/efivars.c 2007-12-21 15:36:11.000000000 -0500
+@@ -131,21 +131,21 @@
+
+ #define EFI_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute efi_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++ .attr = {.name = __stringify(_name), .mode = _mode}, \
+ .show = _show, \
+ .store = _store, \
+ };
+
+ #define EFIVAR_ATTR(_name, _mode, _show, _store) \
+ struct efivar_attribute efivar_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++ .attr = {.name = __stringify(_name), .mode = _mode}, \
+ .show = _show, \
+ .store = _store, \
+ };
+
+ #define VAR_SUBSYS_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute var_subsys_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++ .attr = {.name = __stringify(_name), .mode = _mode}, \
+ .show = _show, \
+ .store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/i2c/chips/eeprom.c linux-2.6.22-591/drivers/i2c/chips/eeprom.c
+--- linux-2.6.22-570/drivers/i2c/chips/eeprom.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/i2c/chips/eeprom.c 2007-12-21 15:36:11.000000000 -0500
+@@ -110,7 +110,8 @@
+ mutex_unlock(&data->update_lock);
+ }
+
+-static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
+ struct eeprom_data *data = i2c_get_clientdata(client);
+@@ -150,7 +151,6 @@
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = EEPROM_SIZE,
+ .read = eeprom_read,
+diff -Nurb linux-2.6.22-570/drivers/i2c/chips/max6875.c linux-2.6.22-591/drivers/i2c/chips/max6875.c
+--- linux-2.6.22-570/drivers/i2c/chips/max6875.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/i2c/chips/max6875.c 2007-12-21 15:36:11.000000000 -0500
+@@ -125,8 +125,9 @@
+ mutex_unlock(&data->update_lock);
+ }
+
+-static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++static ssize_t max6875_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct i2c_client *client = kobj_to_i2c_client(kobj);
+ struct max6875_data *data = i2c_get_clientdata(client);
+@@ -152,7 +153,6 @@
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = USER_EEPROM_SIZE,
+ .read = max6875_read,
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c
+--- linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/bitops.h>
+ #include <linux/kdev_t.h>
++#include <linux/freezer.h>
+ #include <linux/suspend.h>
+ #include <linux/kthread.h>
+ #include <linux/preempt.h>
+@@ -1133,8 +1134,6 @@
+ struct list_head tmp;
+ int may_schedule;
+
+- current->flags |= PF_NOFREEZE;
+-
+ while (!kthread_should_stop()) {
+
+ INIT_LIST_HEAD(&tmp);
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/nodemgr.c linux-2.6.22-591/drivers/ieee1394/nodemgr.c
+--- linux-2.6.22-570/drivers/ieee1394/nodemgr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/ieee1394/nodemgr.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1669,6 +1669,7 @@
+ unsigned int g, generation = 0;
+ int i, reset_cycles = 0;
+
++ set_freezable();
+ /* Setup our device-model entries */
+ nodemgr_create_host_dev_files(host);
+
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/sbp2.c linux-2.6.22-591/drivers/ieee1394/sbp2.c
+--- linux-2.6.22-570/drivers/ieee1394/sbp2.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/ieee1394/sbp2.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1505,69 +1505,6 @@
+ }
+ }
+
+-static void sbp2_prep_command_orb_no_sg(struct sbp2_command_orb *orb,
+- struct sbp2_fwhost_info *hi,
+- struct sbp2_command_info *cmd,
+- struct scatterlist *sgpnt,
+- u32 orb_direction,
+- unsigned int scsi_request_bufflen,
+- void *scsi_request_buffer,
+- enum dma_data_direction dma_dir)
+-{
+- cmd->dma_dir = dma_dir;
+- cmd->dma_size = scsi_request_bufflen;
+- cmd->dma_type = CMD_DMA_SINGLE;
+- cmd->cmd_dma = dma_map_single(hi->host->device.parent,
+- scsi_request_buffer,
+- cmd->dma_size, cmd->dma_dir);
+- orb->data_descriptor_hi = ORB_SET_NODE_ID(hi->host->node_id);
+- orb->misc |= ORB_SET_DIRECTION(orb_direction);
+-
+- /* handle case where we get a command w/o s/g enabled
+- * (but check for transfers larger than 64K) */
+- if (scsi_request_bufflen <= SBP2_MAX_SG_ELEMENT_LENGTH) {
+-
+- orb->data_descriptor_lo = cmd->cmd_dma;
+- orb->misc |= ORB_SET_DATA_SIZE(scsi_request_bufflen);
+-
+- } else {
+- /* The buffer is too large. Turn this into page tables. */
+-
+- struct sbp2_unrestricted_page_table *sg_element =
+- &cmd->scatter_gather_element[0];
+- u32 sg_count, sg_len;
+- dma_addr_t sg_addr;
+-
+- orb->data_descriptor_lo = cmd->sge_dma;
+- orb->misc |= ORB_SET_PAGE_TABLE_PRESENT(0x1);
+-
+- /* fill out our SBP-2 page tables; split up the large buffer */
+- sg_count = 0;
+- sg_len = scsi_request_bufflen;
+- sg_addr = cmd->cmd_dma;
+- while (sg_len) {
+- sg_element[sg_count].segment_base_lo = sg_addr;
+- if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) {
+- sg_element[sg_count].length_segment_base_hi =
+- PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH);
+- sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH;
+- sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH;
+- } else {
+- sg_element[sg_count].length_segment_base_hi =
+- PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len);
+- sg_len = 0;
+- }
+- sg_count++;
+- }
+-
+- orb->misc |= ORB_SET_DATA_SIZE(sg_count);
+-
+- sbp2util_cpu_to_be32_buffer(sg_element,
+- (sizeof(struct sbp2_unrestricted_page_table)) *
+- sg_count);
+- }
+-}
+-
+ static void sbp2_create_command_orb(struct sbp2_lu *lu,
+ struct sbp2_command_info *cmd,
+ unchar *scsi_cmd,
+@@ -1611,13 +1548,9 @@
+ orb->data_descriptor_hi = 0x0;
+ orb->data_descriptor_lo = 0x0;
+ orb->misc |= ORB_SET_DIRECTION(1);
+- } else if (scsi_use_sg)
++ } else
+ sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sgpnt,
+ orb_direction, dma_dir);
+- else
+- sbp2_prep_command_orb_no_sg(orb, hi, cmd, sgpnt, orb_direction,
+- scsi_request_bufflen,
+- scsi_request_buffer, dma_dir);
+
+ sbp2util_cpu_to_be32_buffer(orb, sizeof(*orb));
+
+@@ -1706,15 +1639,15 @@
+ void (*done)(struct scsi_cmnd *))
+ {
+ unchar *scsi_cmd = (unchar *)SCpnt->cmnd;
+- unsigned int request_bufflen = SCpnt->request_bufflen;
++ unsigned int request_bufflen = scsi_bufflen(SCpnt);
+ struct sbp2_command_info *cmd;
+
+ cmd = sbp2util_allocate_command_orb(lu, SCpnt, done);
+ if (!cmd)
+ return -EIO;
+
+- sbp2_create_command_orb(lu, cmd, scsi_cmd, SCpnt->use_sg,
+- request_bufflen, SCpnt->request_buffer,
++ sbp2_create_command_orb(lu, cmd, scsi_cmd, scsi_sg_count(SCpnt),
++ request_bufflen, scsi_sglist(SCpnt),
+ SCpnt->sc_data_direction);
+ sbp2_link_orb_command(lu, cmd);
+
+diff -Nurb linux-2.6.22-570/drivers/infiniband/core/addr.c linux-2.6.22-591/drivers/infiniband/core/addr.c
+--- linux-2.6.22-570/drivers/infiniband/core/addr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/core/addr.c 2007-12-21 15:36:14.000000000 -0500
+@@ -110,7 +110,7 @@
+ __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+ int ret;
+
+- dev = ip_dev_find(ip);
++ dev = ip_dev_find(&init_net, ip);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+
+@@ -157,6 +157,7 @@
+ u32 dst_ip = dst_in->sin_addr.s_addr;
+
+ memset(&fl, 0, sizeof fl);
++ fl.fl_net = &init_net;
+ fl.nl_u.ip4_u.daddr = dst_ip;
+ if (ip_route_output_key(&rt, &fl))
+ return;
+@@ -178,6 +179,7 @@
+ int ret;
+
+ memset(&fl, 0, sizeof fl);
++ fl.fl_net = &init_net;
+ fl.nl_u.ip4_u.daddr = dst_ip;
+ fl.nl_u.ip4_u.saddr = src_ip;
+ ret = ip_route_output_key(&rt, &fl);
+@@ -262,7 +264,7 @@
+ __be32 dst_ip = dst_in->sin_addr.s_addr;
+ int ret;
+
+- dev = ip_dev_find(dst_ip);
++ dev = ip_dev_find(&init_net, dst_ip);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+
+diff -Nurb linux-2.6.22-570/drivers/infiniband/core/cma.c linux-2.6.22-591/drivers/infiniband/core/cma.c
+--- linux-2.6.22-570/drivers/infiniband/core/cma.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/core/cma.c 2007-12-21 15:36:14.000000000 -0500
+@@ -1267,7 +1267,7 @@
+ atomic_inc(&conn_id->dev_remove);
+ conn_id->state = CMA_CONNECT;
+
+- dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
++ dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
+ if (!dev) {
+ ret = -EADDRNOTAVAIL;
+ cma_enable_remove(conn_id);
+@@ -1880,18 +1880,18 @@
+ if (ret)
+ goto err1;
+
+- if (port > sysctl_local_port_range[1]) {
+- if (next_port != sysctl_local_port_range[0]) {
++ if (port > init_net.sysctl_local_port_range[1]) {
++ if (next_port != init_net.sysctl_local_port_range[0]) {
+ idr_remove(ps, port);
+- next_port = sysctl_local_port_range[0];
++ next_port = init_net.sysctl_local_port_range[0];
+ goto retry;
+ }
+ ret = -EADDRNOTAVAIL;
+ goto err2;
+ }
+
+- if (port == sysctl_local_port_range[1])
+- next_port = sysctl_local_port_range[0];
++ if (port == init_net.sysctl_local_port_range[1])
++ next_port = init_net.sysctl_local_port_range[0];
+ else
+ next_port = port + 1;
+
+@@ -2774,8 +2774,9 @@
+
+ get_random_bytes(&next_port, sizeof next_port);
+ next_port = ((unsigned int) next_port %
+- (sysctl_local_port_range[1] - sysctl_local_port_range[0])) +
+- sysctl_local_port_range[0];
++ (init_net.sysctl_local_port_range[1] -
++ init_net.sysctl_local_port_range[0])) +
++ init_net.sysctl_local_port_range[0];
+ cma_wq = create_singlethread_workqueue("rdma_cm");
+ if (!cma_wq)
+ return -ENOMEM;
+diff -Nurb linux-2.6.22-570/drivers/infiniband/core/sysfs.c linux-2.6.22-591/drivers/infiniband/core/sysfs.c
+--- linux-2.6.22-570/drivers/infiniband/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/core/sysfs.c 2007-12-21 15:36:11.000000000 -0500
+@@ -479,7 +479,6 @@
+
+ element->attr.attr.name = element->name;
+ element->attr.attr.mode = S_IRUGO;
+- element->attr.attr.owner = THIS_MODULE;
+ element->attr.show = show;
+ element->index = i;
+
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-12-21 15:36:11.000000000 -0500
+@@ -134,19 +134,9 @@
+ {
+ struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data;
+ struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
+- struct scsi_cmnd *sc = ctask->sc;
+
+ iser_ctask->command_sent = 0;
+ iser_ctask->iser_conn = iser_conn;
+-
+- if (sc->sc_data_direction == DMA_TO_DEVICE) {
+- BUG_ON(ctask->total_length == 0);
+-
+- debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
+- ctask->itt, ctask->total_length, ctask->imm_count,
+- ctask->unsol_count);
+- }
+-
+ iser_ctask_rdma_init(iser_ctask);
+ }
+
+@@ -219,6 +209,14 @@
+ struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
+ int error = 0;
+
++ if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) {
++ BUG_ON(scsi_bufflen(ctask->sc) == 0);
++
++ debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
++ ctask->itt, scsi_bufflen(ctask->sc),
++ ctask->imm_count, ctask->unsol_count);
++ }
++
+ debug_scsi("ctask deq [cid %d itt 0x%x]\n",
+ conn->id, ctask->itt);
+
+@@ -375,6 +373,7 @@
+ static struct iscsi_cls_session *
+ iscsi_iser_session_create(struct iscsi_transport *iscsit,
+ struct scsi_transport_template *scsit,
++ uint16_t cmds_max, uint16_t qdepth,
+ uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+ struct iscsi_cls_session *cls_session;
+@@ -386,7 +385,13 @@
+ struct iscsi_iser_cmd_task *iser_ctask;
+ struct iser_desc *desc;
+
++ /*
++ * we do not support setting can_queue cmd_per_lun from userspace yet
++ * because we preallocate so many resources
++ */
+ cls_session = iscsi_session_setup(iscsit, scsit,
++ ISCSI_DEF_XMIT_CMDS_MAX,
++ ISCSI_MAX_CMD_PER_LUN,
+ sizeof(struct iscsi_iser_cmd_task),
+ sizeof(struct iser_desc),
+ initial_cmdsn, &hn);
+@@ -545,7 +550,7 @@
+ static struct scsi_host_template iscsi_iser_sht = {
+ .name = "iSCSI Initiator over iSER, v." DRV_VER,
+ .queuecommand = iscsi_queuecommand,
+- .can_queue = ISCSI_XMIT_CMDS_MAX - 1,
++ .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1,
+ .sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
+ .max_sectors = 1024,
+ .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN,
+@@ -574,8 +579,12 @@
+ ISCSI_EXP_STATSN |
+ ISCSI_PERSISTENT_PORT |
+ ISCSI_PERSISTENT_ADDRESS |
+- ISCSI_TARGET_NAME |
+- ISCSI_TPGT,
++ ISCSI_TARGET_NAME | ISCSI_TPGT |
++ ISCSI_USERNAME | ISCSI_PASSWORD |
++ ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN,
++ .host_param_mask = ISCSI_HOST_HWADDRESS |
++ ISCSI_HOST_NETDEV_NAME |
++ ISCSI_HOST_INITIATOR_NAME,
+ .host_template = &iscsi_iser_sht,
+ .conndata_size = sizeof(struct iscsi_conn),
+ .max_lun = ISCSI_ISER_MAX_LUN,
+@@ -592,6 +601,9 @@
+ .get_session_param = iscsi_session_get_param,
+ .start_conn = iscsi_iser_conn_start,
+ .stop_conn = iscsi_conn_stop,
++ /* iscsi host params */
++ .get_host_param = iscsi_host_get_param,
++ .set_host_param = iscsi_host_set_param,
+ /* IO */
+ .send_pdu = iscsi_conn_send_pdu,
+ .get_stats = iscsi_iser_conn_get_stats,
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-12-21 15:36:11.000000000 -0500
+@@ -98,7 +98,7 @@
+ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
+ * SCSI_TMFUNC(2), LOGOUT(1) */
+
+-#define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \
++#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \
+ ISER_MAX_RX_MISC_PDUS + \
+ ISER_MAX_TX_MISC_PDUS)
+
+@@ -110,7 +110,7 @@
+
+ #define ISER_INFLIGHT_DATAOUTS 8
+
+-#define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \
++#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+ (1 + ISER_INFLIGHT_DATAOUTS) + \
+ ISER_MAX_TX_MISC_PDUS + \
+ ISER_MAX_RX_MISC_PDUS)
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c 2007-12-21 15:36:11.000000000 -0500
+@@ -351,18 +351,12 @@
+ else
+ data_buf = &iser_ctask->data[ISER_DIR_OUT];
+
+- if (sc->use_sg) { /* using a scatter list */
+- data_buf->buf = sc->request_buffer;
+- data_buf->size = sc->use_sg;
+- } else if (sc->request_bufflen) {
+- /* using a single buffer - convert it into one entry SG */
+- sg_init_one(&data_buf->sg_single,
+- sc->request_buffer, sc->request_bufflen);
+- data_buf->buf = &data_buf->sg_single;
+- data_buf->size = 1;
++ if (scsi_sg_count(sc)) { /* using a scatter list */
++ data_buf->buf = scsi_sglist(sc);
++ data_buf->size = scsi_sg_count(sc);
+ }
+
+- data_buf->data_len = sc->request_bufflen;
++ data_buf->data_len = scsi_bufflen(sc);
+
+ if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+ err = iser_prepare_read_cmd(ctask, edtl);
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c 2007-12-21 15:36:11.000000000 -0500
+@@ -155,8 +155,8 @@
+ params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
+ /* make the pool size twice the max number of SCSI commands *
+ * the ML is expected to queue, watermark for unmap at 50% */
+- params.pool_size = ISCSI_XMIT_CMDS_MAX * 2;
+- params.dirty_watermark = ISCSI_XMIT_CMDS_MAX;
++ params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
++ params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
+ params.cache = 0;
+ params.flush_function = NULL;
+ params.access = (IB_ACCESS_LOCAL_WRITE |
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c 2007-12-21 15:36:11.000000000 -0500
+@@ -455,10 +455,7 @@
+ struct srp_target_port *target,
+ struct srp_request *req)
+ {
+- struct scatterlist *scat;
+- int nents;
+-
+- if (!scmnd->request_buffer ||
++ if (!scsi_sglist(scmnd) ||
+ (scmnd->sc_data_direction != DMA_TO_DEVICE &&
+ scmnd->sc_data_direction != DMA_FROM_DEVICE))
+ return;
+@@ -468,20 +465,8 @@
+ req->fmr = NULL;
+ }
+
+- /*
+- * This handling of non-SG commands can be killed when the
+- * SCSI midlayer no longer generates non-SG commands.
+- */
+- if (likely(scmnd->use_sg)) {
+- nents = scmnd->use_sg;
+- scat = scmnd->request_buffer;
+- } else {
+- nents = 1;
+- scat = &req->fake_sg;
+- }
+-
+- ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents,
+- scmnd->sc_data_direction);
++ ib_dma_unmap_sg(target->srp_host->dev->dev, scsi_sglist(scmnd),
++ scsi_sg_count(scmnd), scmnd->sc_data_direction);
+ }
+
+ static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
+@@ -595,6 +580,7 @@
+ int ret;
+ struct srp_device *dev = target->srp_host->dev;
+ struct ib_device *ibdev = dev->dev;
++ struct scatterlist *sg;
+
+ if (!dev->fmr_pool)
+ return -ENODEV;
+@@ -604,16 +590,16 @@
+ return -EINVAL;
+
+ len = page_cnt = 0;
+- for (i = 0; i < sg_cnt; ++i) {
+- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++ scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+
+- if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) {
++ if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) {
+ if (i > 0)
+ return -EINVAL;
+ else
+ ++page_cnt;
+ }
+- if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) &
++ if ((ib_sg_dma_address(ibdev, sg) + dma_len) &
+ ~dev->fmr_page_mask) {
+ if (i < sg_cnt - 1)
+ return -EINVAL;
+@@ -633,12 +619,12 @@
+ return -ENOMEM;
+
+ page_cnt = 0;
+- for (i = 0; i < sg_cnt; ++i) {
+- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++ scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+
+ for (j = 0; j < dma_len; j += dev->fmr_page_size)
+ dma_pages[page_cnt++] =
+- (ib_sg_dma_address(ibdev, &scat[i]) &
++ (ib_sg_dma_address(ibdev, sg) &
+ dev->fmr_page_mask) + j;
+ }
+
+@@ -673,7 +659,7 @@
+ struct srp_device *dev;
+ struct ib_device *ibdev;
+
+- if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
++ if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
+ return sizeof (struct srp_cmd);
+
+ if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
+@@ -683,18 +669,8 @@
+ return -EINVAL;
+ }
+
+- /*
+- * This handling of non-SG commands can be killed when the
+- * SCSI midlayer no longer generates non-SG commands.
+- */
+- if (likely(scmnd->use_sg)) {
+- nents = scmnd->use_sg;
+- scat = scmnd->request_buffer;
+- } else {
+- nents = 1;
+- scat = &req->fake_sg;
+- sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
+- }
++ nents = scsi_sg_count(scmnd);
++ scat = scsi_sglist(scmnd);
+
+ dev = target->srp_host->dev;
+ ibdev = dev->dev;
+@@ -724,6 +700,7 @@
+ * descriptor.
+ */
+ struct srp_indirect_buf *buf = (void *) cmd->add_data;
++ struct scatterlist *sg;
+ u32 datalen = 0;
+ int i;
+
+@@ -732,11 +709,11 @@
+ sizeof (struct srp_indirect_buf) +
+ count * sizeof (struct srp_direct_buf);
+
+- for (i = 0; i < count; ++i) {
+- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++ scsi_for_each_sg(scmnd, sg, count, i) {
++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+
+ buf->desc_list[i].va =
+- cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i]));
++ cpu_to_be64(ib_sg_dma_address(ibdev, sg));
+ buf->desc_list[i].key =
+ cpu_to_be32(dev->mr->rkey);
+ buf->desc_list[i].len = cpu_to_be32(dma_len);
+@@ -802,9 +779,9 @@
+ }
+
+ if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
+- scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt);
++ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+ else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+- scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt);
++ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+
+ if (!req->tsk_mgmt) {
+ scmnd->host_scribble = (void *) -1L;
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h
+--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h 2007-12-21 15:36:11.000000000 -0500
+@@ -106,11 +106,6 @@
+ struct srp_iu *cmd;
+ struct srp_iu *tsk_mgmt;
+ struct ib_pool_fmr *fmr;
+- /*
+- * Fake scatterlist used when scmnd->use_sg==0. Can be killed
+- * when the SCSI midlayer no longer generates non-SG commands.
+- */
+- struct scatterlist fake_sg;
+ struct completion done;
+ short index;
+ u8 cmd_done;
+diff -Nurb linux-2.6.22-570/drivers/input/gameport/gameport.c linux-2.6.22-591/drivers/input/gameport/gameport.c
+--- linux-2.6.22-570/drivers/input/gameport/gameport.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/input/gameport/gameport.c 2007-12-21 15:36:11.000000000 -0500
+@@ -445,6 +445,7 @@
+
+ static int gameport_thread(void *nothing)
+ {
++ set_freezable();
+ do {
+ gameport_handle_event();
+ wait_event_interruptible(gameport_wait,
+diff -Nurb linux-2.6.22-570/drivers/input/mouse/psmouse.h linux-2.6.22-591/drivers/input/mouse/psmouse.h
+--- linux-2.6.22-570/drivers/input/mouse/psmouse.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/input/mouse/psmouse.h 2007-12-21 15:36:11.000000000 -0500
+@@ -118,7 +118,6 @@
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = _mode, \
+- .owner = THIS_MODULE, \
+ }, \
+ .show = psmouse_attr_show_helper, \
+ .store = psmouse_attr_set_helper, \
+diff -Nurb linux-2.6.22-570/drivers/input/serio/serio.c linux-2.6.22-591/drivers/input/serio/serio.c
+--- linux-2.6.22-570/drivers/input/serio/serio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/input/serio/serio.c 2007-12-21 15:36:11.000000000 -0500
+@@ -384,6 +384,7 @@
+
+ static int serio_thread(void *nothing)
+ {
++ set_freezable();
+ do {
+ serio_handle_event();
+ wait_event_interruptible(serio_wait,
+diff -Nurb linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c
+--- linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c 2007-12-21 15:36:11.000000000 -0500
+@@ -292,6 +292,7 @@
+
+ sched_setscheduler(tsk, SCHED_FIFO, ¶m);
+
++ set_freezable();
+ while (!kthread_should_stop()) {
+ unsigned int x, y, p;
+ long timeout;
+diff -Nurb linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c
+--- linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c 2007-12-21 15:36:14.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/fs.h>
+ #endif
+ #include <linux/isdnif.h>
++#include <net/net_namespace.h>
+ #include "isdn_divert.h"
+
+
+@@ -284,12 +285,12 @@
+ init_waitqueue_head(&rd_queue);
+
+ #ifdef CONFIG_PROC_FS
+- isdn_proc_entry = proc_mkdir("net/isdn", NULL);
++ isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net);
+ if (!isdn_proc_entry)
+ return (-1);
+ isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry);
+ if (!isdn_divert_entry) {
+- remove_proc_entry("net/isdn", NULL);
++ remove_proc_entry("isdn", init_net.proc_net);
+ return (-1);
+ }
+ isdn_divert_entry->proc_fops = &isdn_fops;
+@@ -309,7 +310,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ remove_proc_entry("divert", isdn_proc_entry);
+- remove_proc_entry("net/isdn", NULL);
++ remove_proc_entry("isdn", init_net.proc_net);
+ #endif /* CONFIG_PROC_FS */
+
+ return (0);
+diff -Nurb linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c
+--- linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c 2007-12-21 15:36:14.000000000 -0500
+@@ -15,6 +15,7 @@
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+
+ #include "platform.h"
+ #include "di_defs.h"
+@@ -86,7 +87,7 @@
+
+ static int DIVA_INIT_FUNCTION create_proc(void)
+ {
+- proc_net_eicon = proc_mkdir("net/eicon", NULL);
++ proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
+
+ if (proc_net_eicon) {
+ if ((proc_didd =
+@@ -102,7 +103,7 @@
+ static void remove_proc(void)
+ {
+ remove_proc_entry(DRIVERLNAME, proc_net_eicon);
+- remove_proc_entry("net/eicon", NULL);
++ remove_proc_entry("eicon", init_net.proc_net);
+ }
+
+ static int DIVA_INIT_FUNCTION divadidd_init(void)
+diff -Nurb linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c
+--- linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c 2007-12-21 15:36:14.000000000 -0500
+@@ -392,7 +392,7 @@
+ hysdn_card *card;
+ unsigned char conf_name[20];
+
+- hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net);
++ hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net);
+ if (!hysdn_proc_entry) {
+ printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n");
+ return (-1);
+@@ -437,5 +437,5 @@
+ card = card->next; /* point to next card */
+ }
+
+- remove_proc_entry(PROC_SUBDIR_NAME, proc_net);
++ remove_proc_entry(PROC_SUBDIR_NAME, init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_adt746x.c linux-2.6.22-591/drivers/macintosh/therm_adt746x.c
+--- linux-2.6.22-570/drivers/macintosh/therm_adt746x.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/macintosh/therm_adt746x.c 2007-12-21 15:36:11.000000000 -0500
+@@ -335,6 +335,7 @@
+ {
+ struct thermostat* th = arg;
+
++ set_freezable();
+ while(!kthread_should_stop()) {
+ try_to_freeze();
+ msleep_interruptible(2000);
+diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_pm72.c linux-2.6.22-591/drivers/macintosh/therm_pm72.c
+--- linux-2.6.22-570/drivers/macintosh/therm_pm72.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/macintosh/therm_pm72.c 2007-12-21 15:36:11.000000000 -0500
+@@ -1770,7 +1770,8 @@
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL };
+
+- return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
++ return call_usermodehelper(critical_overtemp_path,
++ argv, envp, UMH_WAIT_EXEC);
+ }
+
+
+diff -Nurb linux-2.6.22-570/drivers/macintosh/windfarm_core.c linux-2.6.22-591/drivers/macintosh/windfarm_core.c
+--- linux-2.6.22-570/drivers/macintosh/windfarm_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/macintosh/windfarm_core.c 2007-12-21 15:36:11.000000000 -0500
+@@ -80,7 +80,8 @@
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL };
+
+- return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
++ return call_usermodehelper(critical_overtemp_path,
++ argv, envp, UMH_WAIT_EXEC);
+ }
+ EXPORT_SYMBOL_GPL(wf_critical_overtemp);
+
+@@ -92,6 +93,7 @@
+
+ DBG("wf: thread started\n");
+
++ set_freezable();
+ while(!kthread_should_stop()) {
+ if (time_after_eq(jiffies, next)) {
+ wf_notify(WF_EVENT_TICK, NULL);
+@@ -212,7 +214,6 @@
+ list_add(&new_ct->link, &wf_controls);
+
+ new_ct->attr.attr.name = new_ct->name;
+- new_ct->attr.attr.owner = THIS_MODULE;
+ new_ct->attr.attr.mode = 0644;
+ new_ct->attr.show = wf_show_control;
+ new_ct->attr.store = wf_store_control;
+@@ -325,7 +326,6 @@
+ list_add(&new_sr->link, &wf_sensors);
+
+ new_sr->attr.attr.name = new_sr->name;
+- new_sr->attr.attr.owner = THIS_MODULE;
+ new_sr->attr.attr.mode = 0444;
+ new_sr->attr.show = wf_show_sensor;
+ new_sr->attr.store = NULL;
+diff -Nurb linux-2.6.22-570/drivers/md/Kconfig linux-2.6.22-591/drivers/md/Kconfig
+--- linux-2.6.22-570/drivers/md/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/md/Kconfig 2007-12-21 15:36:11.000000000 -0500
+@@ -109,6 +109,8 @@
+ config MD_RAID456
+ tristate "RAID-4/RAID-5/RAID-6 mode"
+ depends on BLK_DEV_MD
++ select ASYNC_MEMCPY
++ select ASYNC_XOR
+ ---help---
+ A RAID-5 set of N drives with a capacity of C MB per drive provides
+ the capacity of C * (N - 1) MB, and protects against a failure
+@@ -271,6 +273,11 @@
+
+ If unsure, say N.
+
++config DM_NETLINK
++ bool "DM netlink events (EXPERIMENTAL)"
++ depends on BLK_DEV_DM && EXPERIMENTAL
++ ---help---
++ Generate netlink events for DM events.
+ endmenu
+
+ endif
+diff -Nurb linux-2.6.22-570/drivers/md/Makefile linux-2.6.22-591/drivers/md/Makefile
+--- linux-2.6.22-570/drivers/md/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/md/Makefile 2007-12-21 15:36:11.000000000 -0500
+@@ -17,7 +17,7 @@
+ hostprogs-y := mktables
+
+ # Note: link order is important. All raid personalities
+-# and xor.o must come before md.o, as they each initialise
++# and must come before md.o, as they each initialise
+ # themselves, and md.o may use the personalities when it
+ # auto-initialised.
+
+@@ -25,7 +25,7 @@
+ obj-$(CONFIG_MD_RAID0) += raid0.o
+ obj-$(CONFIG_MD_RAID1) += raid1.o
+ obj-$(CONFIG_MD_RAID10) += raid10.o
+-obj-$(CONFIG_MD_RAID456) += raid456.o xor.o
++obj-$(CONFIG_MD_RAID456) += raid456.o
+ obj-$(CONFIG_MD_MULTIPATH) += multipath.o
+ obj-$(CONFIG_MD_FAULTY) += faulty.o
+ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
+@@ -46,6 +46,10 @@
+ altivec_flags := -maltivec -mabi=altivec
+ endif
+
++ifeq ($(CONFIG_DM_NETLINK),y)
++dm-mod-objs += dm-netlink.o
++endif
++
+ targets += raid6int1.c
+ $(obj)/raid6int1.c: UNROLL := 1
+ $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
+diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.c linux-2.6.22-591/drivers/md/dm-netlink.c
+--- linux-2.6.22-570/drivers/md/dm-netlink.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/md/dm-netlink.c 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,103 @@
++/*
++ * Device Mapper Netlink Support (dm-netlink)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright IBM Corporation, 2005, 2006
++ * Author: Mike Anderson <andmike@us.ibm.com>
++ */
++#include <linux/module.h>
++#include <linux/mempool.h>
++#include <linux/time.h>
++#include <linux/jiffies.h>
++#include <linux/security.h>
++#include <net/sock.h>
++#include <net/netlink.h>
++
++#include "dm.h"
++#include "dm-netlink.h"
++
++#define DM_MSG_PREFIX "netlink"
++
++#define DM_EVENT_SKB_SIZE NLMSG_GOODSIZE
++
++struct dm_event_cache {
++ struct kmem_cache *cache;
++ unsigned skb_size;
++};
++
++static struct dm_event_cache _dme_cache;
++
++static int dme_cache_init(struct dm_event_cache *dc, unsigned skb_size)
++{
++ dc->skb_size = skb_size;
++
++ dc->cache = KMEM_CACHE(dm_event, 0);
++ if (!dc->cache)
++ return -ENOMEM;
++
++ return 0;
++}
++
++static void dme_cache_destroy(struct dm_event_cache *dc)
++{
++ kmem_cache_destroy(dc->cache);
++}
++
++static void dme_cache_event_put(struct dm_event *evt)
++{
++ struct dm_event_cache *dc = evt->cdata;
++
++ kmem_cache_free(dc->cache, evt);
++}
++
++static struct dm_event *dme_cache_event_get(struct dm_event_cache *dc,
++ struct mapped_device *md)
++{
++ struct dm_event *evt;
++
++ evt = kmem_cache_alloc(dc->cache, GFP_ATOMIC);
++ if (!evt)
++ return NULL;
++
++ INIT_LIST_HEAD(&evt->elist);
++ evt->cdata = dc;
++ evt->md = md;
++ evt->skb = alloc_skb(dc->skb_size, GFP_ATOMIC);
++ if (!evt->skb)
++ goto cache_err;
++
++ return evt;
++
++cache_err:
++ dme_cache_event_put(evt);
++ return NULL;
++}
++
++int __init dm_netlink_init(void)
++{
++ int r;
++
++ r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE);
++ if (!r)
++ DMINFO("version 1.0.0 loaded");
++
++ return r;
++}
++
++void dm_netlink_exit(void)
++{
++ dme_cache_destroy(&_dme_cache);
++}
+diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.h linux-2.6.22-591/drivers/md/dm-netlink.h
+--- linux-2.6.22-570/drivers/md/dm-netlink.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/md/dm-netlink.h 2007-12-21 15:36:11.000000000 -0500
+@@ -0,0 +1,50 @@
++/*
++ * Device Mapper Netlink Support
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright IBM Corporation, 2005, 2006
++ * Author: Mike Anderson <andmike@us.ibm.com>
++ */
++#ifndef DM_NETLINK_H
++#define DM_NETLINK_H
++
++struct dm_event_cache;
++struct mapped_device;
++struct dm_event {
++ struct dm_event_cache *cdata;
++ struct mapped_device *md;
++ struct sk_buff *skb;
++ struct list_head elist;
++};
++
++#ifdef CONFIG_DM_NETLINK
++
++int dm_netlink_init(void);
++void dm_netlink_exit(void);
++
++#else /* CONFIG_DM_NETLINK */
++
++static inline int __init dm_netlink_init(void)
++{
++ return 0;
++}
++static inline void dm_netlink_exit(void)
++{
++}
++
++#endif /* CONFIG_DM_NETLINK */
++
++#endif /* DM_NETLINK_H */
+diff -Nurb linux-2.6.22-570/drivers/md/dm.c linux-2.6.22-591/drivers/md/dm.c
+--- linux-2.6.22-570/drivers/md/dm.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/drivers/md/dm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -7,6 +7,7 @@
+
+ #include "dm.h"
+ #include "dm-bio-list.h"
++#include "dm-netlink.h"
+
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -180,6 +181,7 @@
+ dm_linear_init,
+ dm_stripe_init,
+ dm_interface_init,
++ dm_netlink_init,
+ };
+
+ void (*_exits[])(void) = {
+@@ -188,6 +190,7 @@
+ dm_linear_exit,
+ dm_stripe_exit,
+ dm_interface_exit,
++ dm_netlink_exit,
+ };
+
+ static int __init dm_init(void)
+diff -Nurb linux-2.6.22-570/drivers/md/md.c linux-2.6.22-591/drivers/md/md.c
+--- linux-2.6.22-570/drivers/md/md.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/md/md.c 2007-12-21 15:36:12.000000000 -0500
+@@ -4642,7 +4642,6 @@
+ * many dirty RAID5 blocks.
+ */
+
+- current->flags |= PF_NOFREEZE;
+ allow_signal(SIGKILL);
+ while (!kthread_should_stop()) {
+
+@@ -5814,7 +5813,7 @@
+ }
+ }
+
+-module_init(md_init)
++subsys_initcall(md_init);
+ module_exit(md_exit)
+
+ static int get_ro(char *buffer, struct kernel_param *kp)
+diff -Nurb linux-2.6.22-570/drivers/md/raid5.c linux-2.6.22-591/drivers/md/raid5.c
+--- linux-2.6.22-570/drivers/md/raid5.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/md/raid5.c 2007-12-21 15:36:12.000000000 -0500
+@@ -52,6 +52,7 @@
+ #include "raid6.h"
+
+ #include <linux/raid/bitmap.h>
++#include <linux/async_tx.h>
+
+ /*
+ * Stripe cache
+@@ -80,7 +81,6 @@
+ /*
+ * The following can be used to debug the driver
+ */
+-#define RAID5_DEBUG 0
+ #define RAID5_PARANOIA 1
+ #if RAID5_PARANOIA && defined(CONFIG_SMP)
+ # define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock)
+@@ -88,8 +88,7 @@
+ # define CHECK_DEVLOCK()
+ #endif
+
+-#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x)))
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ #define inline
+ #define __inline__
+ #endif
+@@ -125,6 +124,7 @@
+ }
+ md_wakeup_thread(conf->mddev->thread);
+ } else {
++ BUG_ON(sh->ops.pending);
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+@@ -152,7 +152,8 @@
+
+ static inline void remove_hash(struct stripe_head *sh)
+ {
+- PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
++ pr_debug("remove_hash(), stripe %llu\n",
++ (unsigned long long)sh->sector);
+
+ hlist_del_init(&sh->hash);
+ }
+@@ -161,7 +162,8 @@
+ {
+ struct hlist_head *hp = stripe_hash(conf, sh->sector);
+
+- PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
++ pr_debug("insert_hash(), stripe %llu\n",
++ (unsigned long long)sh->sector);
+
+ CHECK_DEVLOCK();
+ hlist_add_head(&sh->hash, hp);
+@@ -224,9 +226,10 @@
+
+ BUG_ON(atomic_read(&sh->count) != 0);
+ BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
++ BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
+
+ CHECK_DEVLOCK();
+- PRINTK("init_stripe called, stripe %llu\n",
++ pr_debug("init_stripe called, stripe %llu\n",
+ (unsigned long long)sh->sector);
+
+ remove_hash(sh);
+@@ -240,11 +243,11 @@
+ for (i = sh->disks; i--; ) {
+ struct r5dev *dev = &sh->dev[i];
+
+- if (dev->toread || dev->towrite || dev->written ||
++ if (dev->toread || dev->read || dev->towrite || dev->written ||
+ test_bit(R5_LOCKED, &dev->flags)) {
+- printk("sector=%llx i=%d %p %p %p %d\n",
++ printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
+ (unsigned long long)sh->sector, i, dev->toread,
+- dev->towrite, dev->written,
++ dev->read, dev->towrite, dev->written,
+ test_bit(R5_LOCKED, &dev->flags));
+ BUG();
+ }
+@@ -260,11 +263,11 @@
+ struct hlist_node *hn;
+
+ CHECK_DEVLOCK();
+- PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
++ pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
+ hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
+ if (sh->sector == sector && sh->disks == disks)
+ return sh;
+- PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
++ pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
+ return NULL;
+ }
+
+@@ -276,7 +279,7 @@
+ {
+ struct stripe_head *sh;
+
+- PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector);
++ pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
+
+ spin_lock_irq(&conf->device_lock);
+
+@@ -324,179 +327,762 @@
+ return sh;
+ }
+
+-static int grow_one_stripe(raid5_conf_t *conf)
++/* test_and_ack_op() ensures that we only dequeue an operation once */
++#define test_and_ack_op(op, pend) \
++do { \
++ if (test_bit(op, &sh->ops.pending) && \
++ !test_bit(op, &sh->ops.complete)) { \
++ if (test_and_set_bit(op, &sh->ops.ack)) \
++ clear_bit(op, &pend); \
++ else \
++ ack++; \
++ } else \
++ clear_bit(op, &pend); \
++} while (0)
++
++/* find new work to run, do not resubmit work that is already
++ * in flight
++ */
++static unsigned long get_stripe_work(struct stripe_head *sh)
++{
++ unsigned long pending;
++ int ack = 0;
++
++ pending = sh->ops.pending;
++
++ test_and_ack_op(STRIPE_OP_BIOFILL, pending);
++ test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
++ test_and_ack_op(STRIPE_OP_PREXOR, pending);
++ test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
++ test_and_ack_op(STRIPE_OP_POSTXOR, pending);
++ test_and_ack_op(STRIPE_OP_CHECK, pending);
++ if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
++ ack++;
++
++ sh->ops.count -= ack;
++ BUG_ON(sh->ops.count < 0);
++
++ return pending;
++}
++
++static int
++raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
++static int
++raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
++
++static void ops_run_io(struct stripe_head *sh)
+ {
+- struct stripe_head *sh;
+- sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+- if (!sh)
+- return 0;
+- memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
+- sh->raid_conf = conf;
+- spin_lock_init(&sh->lock);
++ raid5_conf_t *conf = sh->raid_conf;
++ int i, disks = sh->disks;
+
+- if (grow_buffers(sh, conf->raid_disks)) {
+- shrink_buffers(sh, conf->raid_disks);
+- kmem_cache_free(conf->slab_cache, sh);
+- return 0;
+- }
+- sh->disks = conf->raid_disks;
+- /* we just created an active stripe so... */
+- atomic_set(&sh->count, 1);
+- atomic_inc(&conf->active_stripes);
+- INIT_LIST_HEAD(&sh->lru);
+- release_stripe(sh);
+- return 1;
+-}
++ might_sleep();
+
+-static int grow_stripes(raid5_conf_t *conf, int num)
+-{
+- struct kmem_cache *sc;
+- int devs = conf->raid_disks;
++ for (i = disks; i--; ) {
++ int rw;
++ struct bio *bi;
++ mdk_rdev_t *rdev;
++ if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
++ rw = WRITE;
++ else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
++ rw = READ;
++ else
++ continue;
+
+- sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
+- sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+- conf->active_name = 0;
+- sc = kmem_cache_create(conf->cache_name[conf->active_name],
+- sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
+- 0, 0, NULL, NULL);
+- if (!sc)
+- return 1;
+- conf->slab_cache = sc;
+- conf->pool_size = devs;
+- while (num--)
+- if (!grow_one_stripe(conf))
+- return 1;
+- return 0;
++ bi = &sh->dev[i].req;
++
++ bi->bi_rw = rw;
++ if (rw == WRITE)
++ bi->bi_end_io = raid5_end_write_request;
++ else
++ bi->bi_end_io = raid5_end_read_request;
++
++ rcu_read_lock();
++ rdev = rcu_dereference(conf->disks[i].rdev);
++ if (rdev && test_bit(Faulty, &rdev->flags))
++ rdev = NULL;
++ if (rdev)
++ atomic_inc(&rdev->nr_pending);
++ rcu_read_unlock();
++
++ if (rdev) {
++ if (test_bit(STRIPE_SYNCING, &sh->state) ||
++ test_bit(STRIPE_EXPAND_SOURCE, &sh->state) ||
++ test_bit(STRIPE_EXPAND_READY, &sh->state))
++ md_sync_acct(rdev->bdev, STRIPE_SECTORS);
++
++ bi->bi_bdev = rdev->bdev;
++ pr_debug("%s: for %llu schedule op %ld on disc %d\n",
++ __FUNCTION__, (unsigned long long)sh->sector,
++ bi->bi_rw, i);
++ atomic_inc(&sh->count);
++ bi->bi_sector = sh->sector + rdev->data_offset;
++ bi->bi_flags = 1 << BIO_UPTODATE;
++ bi->bi_vcnt = 1;
++ bi->bi_max_vecs = 1;
++ bi->bi_idx = 0;
++ bi->bi_io_vec = &sh->dev[i].vec;
++ bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
++ bi->bi_io_vec[0].bv_offset = 0;
++ bi->bi_size = STRIPE_SIZE;
++ bi->bi_next = NULL;
++ if (rw == WRITE &&
++ test_bit(R5_ReWrite, &sh->dev[i].flags))
++ atomic_add(STRIPE_SECTORS,
++ &rdev->corrected_errors);
++ generic_make_request(bi);
++ } else {
++ if (rw == WRITE)
++ set_bit(STRIPE_DEGRADED, &sh->state);
++ pr_debug("skip op %ld on disc %d for sector %llu\n",
++ bi->bi_rw, i, (unsigned long long)sh->sector);
++ clear_bit(R5_LOCKED, &sh->dev[i].flags);
++ set_bit(STRIPE_HANDLE, &sh->state);
++ }
++ }
+ }
+
+-#ifdef CONFIG_MD_RAID5_RESHAPE
+-static int resize_stripes(raid5_conf_t *conf, int newsize)
++static struct dma_async_tx_descriptor *
++async_copy_data(int frombio, struct bio *bio, struct page *page,
++ sector_t sector, struct dma_async_tx_descriptor *tx)
+ {
+- /* Make all the stripes able to hold 'newsize' devices.
+- * New slots in each stripe get 'page' set to a new page.
+- *
+- * This happens in stages:
+- * 1/ create a new kmem_cache and allocate the required number of
+- * stripe_heads.
+- * 2/ gather all the old stripe_heads and tranfer the pages across
+- * to the new stripe_heads. This will have the side effect of
+- * freezing the array as once all stripe_heads have been collected,
+- * no IO will be possible. Old stripe heads are freed once their
+- * pages have been transferred over, and the old kmem_cache is
+- * freed when all stripes are done.
+- * 3/ reallocate conf->disks to be suitable bigger. If this fails,
+- * we simple return a failre status - no need to clean anything up.
+- * 4/ allocate new pages for the new slots in the new stripe_heads.
+- * If this fails, we don't bother trying the shrink the
+- * stripe_heads down again, we just leave them as they are.
+- * As each stripe_head is processed the new one is released into
+- * active service.
+- *
+- * Once step2 is started, we cannot afford to wait for a write,
+- * so we use GFP_NOIO allocations.
+- */
+- struct stripe_head *osh, *nsh;
+- LIST_HEAD(newstripes);
+- struct disk_info *ndisks;
+- int err = 0;
+- struct kmem_cache *sc;
++ struct bio_vec *bvl;
++ struct page *bio_page;
+ int i;
++ int page_offset;
+
+- if (newsize <= conf->pool_size)
+- return 0; /* never bother to shrink */
++ if (bio->bi_sector >= sector)
++ page_offset = (signed)(bio->bi_sector - sector) * 512;
++ else
++ page_offset = (signed)(sector - bio->bi_sector) * -512;
++ bio_for_each_segment(bvl, bio, i) {
++ int len = bio_iovec_idx(bio, i)->bv_len;
++ int clen;
++ int b_offset = 0;
+
+- md_allow_write(conf->mddev);
++ if (page_offset < 0) {
++ b_offset = -page_offset;
++ page_offset += b_offset;
++ len -= b_offset;
++ }
+
+- /* Step 1 */
+- sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+- sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+- 0, 0, NULL, NULL);
+- if (!sc)
+- return -ENOMEM;
++ if (len > 0 && page_offset + len > STRIPE_SIZE)
++ clen = STRIPE_SIZE - page_offset;
++ else
++ clen = len;
+
+- for (i = conf->max_nr_stripes; i; i--) {
+- nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+- if (!nsh)
++ if (clen > 0) {
++ b_offset += bio_iovec_idx(bio, i)->bv_offset;
++ bio_page = bio_iovec_idx(bio, i)->bv_page;
++ if (frombio)
++ tx = async_memcpy(page, bio_page, page_offset,
++ b_offset, clen,
++ ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC,
++ tx, NULL, NULL);
++ else
++ tx = async_memcpy(bio_page, page, b_offset,
++ page_offset, clen,
++ ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST,
++ tx, NULL, NULL);
++ }
++ if (clen < len) /* hit end of page */
+ break;
++ page_offset += len;
++ }
+
+- memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
++ return tx;
++}
+
+- nsh->raid_conf = conf;
+- spin_lock_init(&nsh->lock);
++static void ops_complete_biofill(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++ struct bio *return_bi = NULL, *bi;
++ raid5_conf_t *conf = sh->raid_conf;
++ int i, more_to_read = 0;
+
+- list_add(&nsh->lru, &newstripes);
+- }
+- if (i) {
+- /* didn't get enough, give up */
+- while (!list_empty(&newstripes)) {
+- nsh = list_entry(newstripes.next, struct stripe_head, lru);
+- list_del(&nsh->lru);
+- kmem_cache_free(sc, nsh);
+- }
+- kmem_cache_destroy(sc);
+- return -ENOMEM;
+- }
+- /* Step 2 - Must use GFP_NOIO now.
+- * OK, we have enough stripes, start collecting inactive
+- * stripes and copying them over
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ /* clear completed biofills */
++ for (i = sh->disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ /* check if this stripe has new incoming reads */
++ if (dev->toread)
++ more_to_read++;
++
++ /* acknowledge completion of a biofill operation */
++ /* and check if we need to reply to a read request
+ */
+- list_for_each_entry(nsh, &newstripes, lru) {
++ if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) {
++ struct bio *rbi, *rbi2;
++ clear_bit(R5_Wantfill, &dev->flags);
++
++ /* The access to dev->read is outside of the
++ * spin_lock_irq(&conf->device_lock), but is protected
++ * by the STRIPE_OP_BIOFILL pending bit
++ */
++ BUG_ON(!dev->read);
++ rbi = dev->read;
++ dev->read = NULL;
++ while (rbi && rbi->bi_sector <
++ dev->sector + STRIPE_SECTORS) {
++ rbi2 = r5_next_bio(rbi, dev->sector);
+ spin_lock_irq(&conf->device_lock);
+- wait_event_lock_irq(conf->wait_for_stripe,
+- !list_empty(&conf->inactive_list),
+- conf->device_lock,
+- unplug_slaves(conf->mddev)
+- );
+- osh = get_free_stripe(conf);
++ if (--rbi->bi_phys_segments == 0) {
++ rbi->bi_next = return_bi;
++ return_bi = rbi;
++ }
+ spin_unlock_irq(&conf->device_lock);
+- atomic_set(&nsh->count, 1);
+- for(i=0; i<conf->pool_size; i++)
+- nsh->dev[i].page = osh->dev[i].page;
+- for( ; i<newsize; i++)
+- nsh->dev[i].page = NULL;
+- kmem_cache_free(conf->slab_cache, osh);
++ rbi = rbi2;
+ }
+- kmem_cache_destroy(conf->slab_cache);
++ }
++ }
++ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
++ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+
+- /* Step 3.
+- * At this point, we are holding all the stripes so the array
+- * is completely stalled, so now is a good time to resize
+- * conf->disks.
+- */
+- ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+- if (ndisks) {
+- for (i=0; i<conf->raid_disks; i++)
+- ndisks[i] = conf->disks[i];
+- kfree(conf->disks);
+- conf->disks = ndisks;
+- } else
+- err = -ENOMEM;
++ bi = return_bi;
++ while (bi) {
++ int bytes = bi->bi_size;
+
+- /* Step 4, return new stripes to service */
+- while(!list_empty(&newstripes)) {
+- nsh = list_entry(newstripes.next, struct stripe_head, lru);
+- list_del_init(&nsh->lru);
+- for (i=conf->raid_disks; i < newsize; i++)
+- if (nsh->dev[i].page == NULL) {
+- struct page *p = alloc_page(GFP_NOIO);
+- nsh->dev[i].page = p;
+- if (!p)
+- err = -ENOMEM;
+- }
+- release_stripe(nsh);
++ return_bi = bi->bi_next;
++ bi->bi_next = NULL;
++ bi->bi_size = 0;
++ bi->bi_end_io(bi, bytes,
++ test_bit(BIO_UPTODATE, &bi->bi_flags) ? 0 : -EIO);
++ bi = return_bi;
+ }
+- /* critical section pass, GFP_NOIO no longer needed */
+
+- conf->slab_cache = sc;
+- conf->active_name = 1-conf->active_name;
+- conf->pool_size = newsize;
+- return err;
++ if (more_to_read)
++ set_bit(STRIPE_HANDLE, &sh->state);
++ release_stripe(sh);
+ }
+-#endif
+
+-static int drop_one_stripe(raid5_conf_t *conf)
++static void ops_run_biofill(struct stripe_head *sh)
+ {
+- struct stripe_head *sh;
++ struct dma_async_tx_descriptor *tx = NULL;
++ raid5_conf_t *conf = sh->raid_conf;
++ int i;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ for (i = sh->disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (test_bit(R5_Wantfill, &dev->flags)) {
++ struct bio *rbi;
++ spin_lock_irq(&conf->device_lock);
++ dev->read = rbi = dev->toread;
++ dev->toread = NULL;
++ spin_unlock_irq(&conf->device_lock);
++ while (rbi && rbi->bi_sector <
++ dev->sector + STRIPE_SECTORS) {
++ tx = async_copy_data(0, rbi, dev->page,
++ dev->sector, tx);
++ rbi = r5_next_bio(rbi, dev->sector);
++ }
++ }
++ }
++
++ atomic_inc(&sh->count);
++ async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
++ ops_complete_biofill, sh);
++}
++
++static void ops_complete_compute5(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++ int target = sh->ops.target;
++ struct r5dev *tgt = &sh->dev[target];
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ set_bit(R5_UPTODATE, &tgt->flags);
++ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
++ clear_bit(R5_Wantcompute, &tgt->flags);
++ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++ set_bit(STRIPE_HANDLE, &sh->state);
++ release_stripe(sh);
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_compute5(struct stripe_head *sh, unsigned long pending)
++{
++ /* kernel stack size limits the total number of disks */
++ int disks = sh->disks;
++ struct page *xor_srcs[disks];
++ int target = sh->ops.target;
++ struct r5dev *tgt = &sh->dev[target];
++ struct page *xor_dest = tgt->page;
++ int count = 0;
++ struct dma_async_tx_descriptor *tx;
++ int i;
++
++ pr_debug("%s: stripe %llu block: %d\n",
++ __FUNCTION__, (unsigned long long)sh->sector, target);
++ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
++
++ for (i = disks; i--; )
++ if (i != target)
++ xor_srcs[count++] = sh->dev[i].page;
++
++ atomic_inc(&sh->count);
++
++ if (unlikely(count == 1))
++ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
++ 0, NULL, ops_complete_compute5, sh);
++ else
++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++ ASYNC_TX_XOR_ZERO_DST, NULL,
++ ops_complete_compute5, sh);
++
++ /* ack now if postxor is not set to be run */
++ if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))
++ async_tx_ack(tx);
++
++ return tx;
++}
++
++static void ops_complete_prexor(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++ /* kernel stack size limits the total number of disks */
++ int disks = sh->disks;
++ struct page *xor_srcs[disks];
++ int count = 0, pd_idx = sh->pd_idx, i;
++
++ /* existing parity data subtracted */
++ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ /* Only process blocks that are known to be uptodate */
++ if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags))
++ xor_srcs[count++] = dev->page;
++ }
++
++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++ ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
++ ops_complete_prexor, sh);
++
++ return tx;
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++ int disks = sh->disks;
++ int pd_idx = sh->pd_idx, i;
++
++ /* check if prexor is active which means only process blocks
++ * that are part of a read-modify-write (Wantprexor)
++ */
++ int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ struct bio *chosen;
++ int towrite;
++
++ towrite = 0;
++ if (prexor) { /* rmw */
++ if (dev->towrite &&
++ test_bit(R5_Wantprexor, &dev->flags))
++ towrite = 1;
++ } else { /* rcw */
++ if (i != pd_idx && dev->towrite &&
++ test_bit(R5_LOCKED, &dev->flags))
++ towrite = 1;
++ }
++
++ if (towrite) {
++ struct bio *wbi;
++
++ spin_lock(&sh->lock);
++ chosen = dev->towrite;
++ dev->towrite = NULL;
++ BUG_ON(dev->written);
++ wbi = dev->written = chosen;
++ spin_unlock(&sh->lock);
++
++ while (wbi && wbi->bi_sector <
++ dev->sector + STRIPE_SECTORS) {
++ tx = async_copy_data(1, wbi, dev->page,
++ dev->sector, tx);
++ wbi = r5_next_bio(wbi, dev->sector);
++ }
++ }
++ }
++
++ return tx;
++}
++
++static void ops_complete_postxor(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++ set_bit(STRIPE_HANDLE, &sh->state);
++ release_stripe(sh);
++}
++
++static void ops_complete_write(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++ int disks = sh->disks, i, pd_idx = sh->pd_idx;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (dev->written || i == pd_idx)
++ set_bit(R5_UPTODATE, &dev->flags);
++ }
++
++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++
++ set_bit(STRIPE_HANDLE, &sh->state);
++ release_stripe(sh);
++}
++
++static void
++ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++ /* kernel stack size limits the total number of disks */
++ int disks = sh->disks;
++ struct page *xor_srcs[disks];
++
++ int count = 0, pd_idx = sh->pd_idx, i;
++ struct page *xor_dest;
++ int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++ unsigned long flags;
++ dma_async_tx_callback callback;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ /* check if prexor is active which means only process blocks
++ * that are part of a read-modify-write (written)
++ */
++ if (prexor) {
++ xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (dev->written)
++ xor_srcs[count++] = dev->page;
++ }
++ } else {
++ xor_dest = sh->dev[pd_idx].page;
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (i != pd_idx)
++ xor_srcs[count++] = dev->page;
++ }
++ }
++
++ /* check whether this postxor is part of a write */
++ callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
++ ops_complete_write : ops_complete_postxor;
++
++ /* 1/ if we prexor'd then the dest is reused as a source
++ * 2/ if we did not prexor then we are redoing the parity
++ * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
++ * for the synchronous xor case
++ */
++ flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
++ (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
++
++ atomic_inc(&sh->count);
++
++ if (unlikely(count == 1)) {
++ flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
++ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
++ flags, tx, callback, sh);
++ } else
++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++ flags, tx, callback, sh);
++}
++
++static void ops_complete_check(void *stripe_head_ref)
++{
++ struct stripe_head *sh = stripe_head_ref;
++ int pd_idx = sh->pd_idx;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
++ sh->ops.zero_sum_result == 0)
++ set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
++
++ set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
++ set_bit(STRIPE_HANDLE, &sh->state);
++ release_stripe(sh);
++}
++
++static void ops_run_check(struct stripe_head *sh)
++{
++ /* kernel stack size limits the total number of disks */
++ int disks = sh->disks;
++ struct page *xor_srcs[disks];
++ struct dma_async_tx_descriptor *tx;
++
++ int count = 0, pd_idx = sh->pd_idx, i;
++ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++
++ pr_debug("%s: stripe %llu\n", __FUNCTION__,
++ (unsigned long long)sh->sector);
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (i != pd_idx)
++ xor_srcs[count++] = dev->page;
++ }
++
++ tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++ &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
++
++ if (tx)
++ set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
++ else
++ clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
++
++ atomic_inc(&sh->count);
++ tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
++ ops_complete_check, sh);
++}
++
++static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
++{
++ int overlap_clear = 0, i, disks = sh->disks;
++ struct dma_async_tx_descriptor *tx = NULL;
++
++ if (test_bit(STRIPE_OP_BIOFILL, &pending)) {
++ ops_run_biofill(sh);
++ overlap_clear++;
++ }
++
++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))
++ tx = ops_run_compute5(sh, pending);
++
++ if (test_bit(STRIPE_OP_PREXOR, &pending))
++ tx = ops_run_prexor(sh, tx);
++
++ if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
++ tx = ops_run_biodrain(sh, tx);
++ overlap_clear++;
++ }
++
++ if (test_bit(STRIPE_OP_POSTXOR, &pending))
++ ops_run_postxor(sh, tx);
++
++ if (test_bit(STRIPE_OP_CHECK, &pending))
++ ops_run_check(sh);
++
++ if (test_bit(STRIPE_OP_IO, &pending))
++ ops_run_io(sh);
++
++ if (overlap_clear)
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (test_and_clear_bit(R5_Overlap, &dev->flags))
++ wake_up(&sh->raid_conf->wait_for_overlap);
++ }
++}
++
++static int grow_one_stripe(raid5_conf_t *conf)
++{
++ struct stripe_head *sh;
++ sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
++ if (!sh)
++ return 0;
++ memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
++ sh->raid_conf = conf;
++ spin_lock_init(&sh->lock);
++
++ if (grow_buffers(sh, conf->raid_disks)) {
++ shrink_buffers(sh, conf->raid_disks);
++ kmem_cache_free(conf->slab_cache, sh);
++ return 0;
++ }
++ sh->disks = conf->raid_disks;
++ /* we just created an active stripe so... */
++ atomic_set(&sh->count, 1);
++ atomic_inc(&conf->active_stripes);
++ INIT_LIST_HEAD(&sh->lru);
++ release_stripe(sh);
++ return 1;
++}
++
++static int grow_stripes(raid5_conf_t *conf, int num)
++{
++ struct kmem_cache *sc;
++ int devs = conf->raid_disks;
++
++ sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
++ sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
++ conf->active_name = 0;
++ sc = kmem_cache_create(conf->cache_name[conf->active_name],
++ sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
++ 0, 0, NULL, NULL);
++ if (!sc)
++ return 1;
++ conf->slab_cache = sc;
++ conf->pool_size = devs;
++ while (num--)
++ if (!grow_one_stripe(conf))
++ return 1;
++ return 0;
++}
++
++#ifdef CONFIG_MD_RAID5_RESHAPE
++static int resize_stripes(raid5_conf_t *conf, int newsize)
++{
++ /* Make all the stripes able to hold 'newsize' devices.
++ * New slots in each stripe get 'page' set to a new page.
++ *
++ * This happens in stages:
++ * 1/ create a new kmem_cache and allocate the required number of
++ * stripe_heads.
++ * 2/ gather all the old stripe_heads and tranfer the pages across
++ * to the new stripe_heads. This will have the side effect of
++ * freezing the array as once all stripe_heads have been collected,
++ * no IO will be possible. Old stripe heads are freed once their
++ * pages have been transferred over, and the old kmem_cache is
++ * freed when all stripes are done.
++ * 3/ reallocate conf->disks to be suitable bigger. If this fails,
++ * we simple return a failre status - no need to clean anything up.
++ * 4/ allocate new pages for the new slots in the new stripe_heads.
++ * If this fails, we don't bother trying the shrink the
++ * stripe_heads down again, we just leave them as they are.
++ * As each stripe_head is processed the new one is released into
++ * active service.
++ *
++ * Once step2 is started, we cannot afford to wait for a write,
++ * so we use GFP_NOIO allocations.
++ */
++ struct stripe_head *osh, *nsh;
++ LIST_HEAD(newstripes);
++ struct disk_info *ndisks;
++ int err = 0;
++ struct kmem_cache *sc;
++ int i;
++
++ if (newsize <= conf->pool_size)
++ return 0; /* never bother to shrink */
++
++ md_allow_write(conf->mddev);
++
++ /* Step 1 */
++ sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
++ sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
++ 0, 0, NULL, NULL);
++ if (!sc)
++ return -ENOMEM;
++
++ for (i = conf->max_nr_stripes; i; i--) {
++ nsh = kmem_cache_alloc(sc, GFP_KERNEL);
++ if (!nsh)
++ break;
++
++ memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
++
++ nsh->raid_conf = conf;
++ spin_lock_init(&nsh->lock);
++
++ list_add(&nsh->lru, &newstripes);
++ }
++ if (i) {
++ /* didn't get enough, give up */
++ while (!list_empty(&newstripes)) {
++ nsh = list_entry(newstripes.next, struct stripe_head, lru);
++ list_del(&nsh->lru);
++ kmem_cache_free(sc, nsh);
++ }
++ kmem_cache_destroy(sc);
++ return -ENOMEM;
++ }
++ /* Step 2 - Must use GFP_NOIO now.
++ * OK, we have enough stripes, start collecting inactive
++ * stripes and copying them over
++ */
++ list_for_each_entry(nsh, &newstripes, lru) {
++ spin_lock_irq(&conf->device_lock);
++ wait_event_lock_irq(conf->wait_for_stripe,
++ !list_empty(&conf->inactive_list),
++ conf->device_lock,
++ unplug_slaves(conf->mddev)
++ );
++ osh = get_free_stripe(conf);
++ spin_unlock_irq(&conf->device_lock);
++ atomic_set(&nsh->count, 1);
++ for(i=0; i<conf->pool_size; i++)
++ nsh->dev[i].page = osh->dev[i].page;
++ for( ; i<newsize; i++)
++ nsh->dev[i].page = NULL;
++ kmem_cache_free(conf->slab_cache, osh);
++ }
++ kmem_cache_destroy(conf->slab_cache);
++
++ /* Step 3.
++ * At this point, we are holding all the stripes so the array
++ * is completely stalled, so now is a good time to resize
++ * conf->disks.
++ */
++ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
++ if (ndisks) {
++ for (i=0; i<conf->raid_disks; i++)
++ ndisks[i] = conf->disks[i];
++ kfree(conf->disks);
++ conf->disks = ndisks;
++ } else
++ err = -ENOMEM;
++
++ /* Step 4, return new stripes to service */
++ while(!list_empty(&newstripes)) {
++ nsh = list_entry(newstripes.next, struct stripe_head, lru);
++ list_del_init(&nsh->lru);
++ for (i=conf->raid_disks; i < newsize; i++)
++ if (nsh->dev[i].page == NULL) {
++ struct page *p = alloc_page(GFP_NOIO);
++ nsh->dev[i].page = p;
++ if (!p)
++ err = -ENOMEM;
++ }
++ release_stripe(nsh);
++ }
++ /* critical section pass, GFP_NOIO no longer needed */
++
++ conf->slab_cache = sc;
++ conf->active_name = 1-conf->active_name;
++ conf->pool_size = newsize;
++ return err;
++}
++#endif
++
++static int drop_one_stripe(raid5_conf_t *conf)
++{
++ struct stripe_head *sh;
+
+ spin_lock_irq(&conf->device_lock);
+ sh = get_free_stripe(conf);
+@@ -537,7 +1123,7 @@
+ if (bi == &sh->dev[i].req)
+ break;
+
+- PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n",
++ pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+ (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ uptodate);
+ if (i == disks) {
+@@ -613,7 +1199,7 @@
+ if (bi == &sh->dev[i].req)
+ break;
+
+- PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n",
++ pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+ (unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ uptodate);
+ if (i == disks) {
+@@ -658,7 +1244,7 @@
+ {
+ char b[BDEVNAME_SIZE];
+ raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
+- PRINTK("raid5: error called\n");
++ pr_debug("raid5: error called\n");
+
+ if (!test_bit(Faulty, &rdev->flags)) {
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+@@ -918,135 +1504,11 @@
+
+ #define check_xor() do { \
+ if (count == MAX_XOR_BLOCKS) { \
+- xor_block(count, STRIPE_SIZE, ptr); \
+- count = 1; \
++ xor_blocks(count, STRIPE_SIZE, dest, ptr);\
++ count = 0; \
+ } \
+ } while(0)
+
+-
+-static void compute_block(struct stripe_head *sh, int dd_idx)
+-{
+- int i, count, disks = sh->disks;
+- void *ptr[MAX_XOR_BLOCKS], *p;
+-
+- PRINTK("compute_block, stripe %llu, idx %d\n",
+- (unsigned long long)sh->sector, dd_idx);
+-
+- ptr[0] = page_address(sh->dev[dd_idx].page);
+- memset(ptr[0], 0, STRIPE_SIZE);
+- count = 1;
+- for (i = disks ; i--; ) {
+- if (i == dd_idx)
+- continue;
+- p = page_address(sh->dev[i].page);
+- if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
+- ptr[count++] = p;
+- else
+- printk(KERN_ERR "compute_block() %d, stripe %llu, %d"
+- " not present\n", dd_idx,
+- (unsigned long long)sh->sector, i);
+-
+- check_xor();
+- }
+- if (count != 1)
+- xor_block(count, STRIPE_SIZE, ptr);
+- set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+-}
+-
+-static void compute_parity5(struct stripe_head *sh, int method)
+-{
+- raid5_conf_t *conf = sh->raid_conf;
+- int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
+- void *ptr[MAX_XOR_BLOCKS];
+- struct bio *chosen;
+-
+- PRINTK("compute_parity5, stripe %llu, method %d\n",
+- (unsigned long long)sh->sector, method);
+-
+- count = 1;
+- ptr[0] = page_address(sh->dev[pd_idx].page);
+- switch(method) {
+- case READ_MODIFY_WRITE:
+- BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags));
+- for (i=disks ; i-- ;) {
+- if (i==pd_idx)
+- continue;
+- if (sh->dev[i].towrite &&
+- test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
+- ptr[count++] = page_address(sh->dev[i].page);
+- chosen = sh->dev[i].towrite;
+- sh->dev[i].towrite = NULL;
+-
+- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+- wake_up(&conf->wait_for_overlap);
+-
+- BUG_ON(sh->dev[i].written);
+- sh->dev[i].written = chosen;
+- check_xor();
+- }
+- }
+- break;
+- case RECONSTRUCT_WRITE:
+- memset(ptr[0], 0, STRIPE_SIZE);
+- for (i= disks; i-- ;)
+- if (i!=pd_idx && sh->dev[i].towrite) {
+- chosen = sh->dev[i].towrite;
+- sh->dev[i].towrite = NULL;
+-
+- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+- wake_up(&conf->wait_for_overlap);
+-
+- BUG_ON(sh->dev[i].written);
+- sh->dev[i].written = chosen;
+- }
+- break;
+- case CHECK_PARITY:
+- break;
+- }
+- if (count>1) {
+- xor_block(count, STRIPE_SIZE, ptr);
+- count = 1;
+- }
+-
+- for (i = disks; i--;)
+- if (sh->dev[i].written) {
+- sector_t sector = sh->dev[i].sector;
+- struct bio *wbi = sh->dev[i].written;
+- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
+- copy_data(1, wbi, sh->dev[i].page, sector);
+- wbi = r5_next_bio(wbi, sector);
+- }
+-
+- set_bit(R5_LOCKED, &sh->dev[i].flags);
+- set_bit(R5_UPTODATE, &sh->dev[i].flags);
+- }
+-
+- switch(method) {
+- case RECONSTRUCT_WRITE:
+- case CHECK_PARITY:
+- for (i=disks; i--;)
+- if (i != pd_idx) {
+- ptr[count++] = page_address(sh->dev[i].page);
+- check_xor();
+- }
+- break;
+- case READ_MODIFY_WRITE:
+- for (i = disks; i--;)
+- if (sh->dev[i].written) {
+- ptr[count++] = page_address(sh->dev[i].page);
+- check_xor();
+- }
+- }
+- if (count != 1)
+- xor_block(count, STRIPE_SIZE, ptr);
+-
+- if (method != CHECK_PARITY) {
+- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+- set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
+- } else
+- clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+-}
+-
+ static void compute_parity6(struct stripe_head *sh, int method)
+ {
+ raid6_conf_t *conf = sh->raid_conf;
+@@ -1058,7 +1520,7 @@
+ qd_idx = raid6_next_disk(pd_idx, disks);
+ d0_idx = raid6_next_disk(qd_idx, disks);
+
+- PRINTK("compute_parity, stripe %llu, method %d\n",
++ pr_debug("compute_parity, stripe %llu, method %d\n",
+ (unsigned long long)sh->sector, method);
+
+ switch(method) {
+@@ -1132,20 +1594,20 @@
+ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
+ {
+ int i, count, disks = sh->disks;
+- void *ptr[MAX_XOR_BLOCKS], *p;
++ void *ptr[MAX_XOR_BLOCKS], *dest, *p;
+ int pd_idx = sh->pd_idx;
+ int qd_idx = raid6_next_disk(pd_idx, disks);
+
+- PRINTK("compute_block_1, stripe %llu, idx %d\n",
++ pr_debug("compute_block_1, stripe %llu, idx %d\n",
+ (unsigned long long)sh->sector, dd_idx);
+
+ if ( dd_idx == qd_idx ) {
+ /* We're actually computing the Q drive */
+ compute_parity6(sh, UPDATE_PARITY);
+ } else {
+- ptr[0] = page_address(sh->dev[dd_idx].page);
+- if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
+- count = 1;
++ dest = page_address(sh->dev[dd_idx].page);
++ if (!nozero) memset(dest, 0, STRIPE_SIZE);
++ count = 0;
+ for (i = disks ; i--; ) {
+ if (i == dd_idx || i == qd_idx)
+ continue;
+@@ -1159,8 +1621,8 @@
+
+ check_xor();
+ }
+- if (count != 1)
+- xor_block(count, STRIPE_SIZE, ptr);
++ if (count)
++ xor_blocks(count, STRIPE_SIZE, dest, ptr);
+ if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+ else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+ }
+@@ -1183,7 +1645,7 @@
+ BUG_ON(faila == failb);
+ if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
+
+- PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
++ pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
+ (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
+
+ if ( failb == disks-1 ) {
+@@ -1229,7 +1691,79 @@
+ }
+ }
+
++static int
++handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
++{
++ int i, pd_idx = sh->pd_idx, disks = sh->disks;
++ int locked = 0;
++
++ if (rcw) {
++ /* if we are not expanding this is a proper write request, and
++ * there will be bios with new data to be drained into the
++ * stripe cache
++ */
++ if (!expand) {
++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++ sh->ops.count++;
++ }
++
++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++ sh->ops.count++;
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++
++ if (dev->towrite) {
++ set_bit(R5_LOCKED, &dev->flags);
++ if (!expand)
++ clear_bit(R5_UPTODATE, &dev->flags);
++ locked++;
++ }
++ }
++ } else {
++ BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
++ test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
++
++ set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++
++ sh->ops.count += 3;
++
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (i == pd_idx)
++ continue;
++
++ /* For a read-modify write there may be blocks that are
++ * locked for reading while others are ready to be
++ * written so we distinguish these blocks by the
++ * R5_Wantprexor bit
++ */
++ if (dev->towrite &&
++ (test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Wantcompute, &dev->flags))) {
++ set_bit(R5_Wantprexor, &dev->flags);
++ set_bit(R5_LOCKED, &dev->flags);
++ clear_bit(R5_UPTODATE, &dev->flags);
++ locked++;
++ }
++ }
++ }
++
++ /* keep the parity disk locked while asynchronous operations
++ * are in flight
++ */
++ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
++ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
++ locked++;
++
++ pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
++ __FUNCTION__, (unsigned long long)sh->sector,
++ locked, sh->ops.pending);
+
++ return locked;
++}
+
+ /*
+ * Each stripe/dev can have one or more bion attached.
+@@ -1242,7 +1776,7 @@
+ raid5_conf_t *conf = sh->raid_conf;
+ int firstwrite=0;
+
+- PRINTK("adding bh b#%llu to stripe s#%llu\n",
++ pr_debug("adding bh b#%llu to stripe s#%llu\n",
+ (unsigned long long)bi->bi_sector,
+ (unsigned long long)sh->sector);
+
+@@ -1271,7 +1805,7 @@
+ spin_unlock_irq(&conf->device_lock);
+ spin_unlock(&sh->lock);
+
+- PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n",
++ pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
+ (unsigned long long)bi->bi_sector,
+ (unsigned long long)sh->sector, dd_idx);
+
+@@ -1326,116 +1860,14 @@
+ return pd_idx;
+ }
+
+-
+-/*
+- * handle_stripe - do things to a stripe.
+- *
+- * We lock the stripe and then examine the state of various bits
+- * to see what needs to be done.
+- * Possible results:
+- * return some read request which now have data
+- * return some write requests which are safely on disc
+- * schedule a read on some buffers
+- * schedule a write of some buffers
+- * return confirmation of parity correctness
+- *
+- * Parity calculations are done inside the stripe lock
+- * buffers are taken off read_list or write_list, and bh_cache buffers
+- * get BH_Lock set before the stripe lock is released.
+- *
+- */
+-
+-static void handle_stripe5(struct stripe_head *sh)
++static void
++handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
++ struct stripe_head_state *s, int disks,
++ struct bio **return_bi)
+ {
+- raid5_conf_t *conf = sh->raid_conf;
+- int disks = sh->disks;
+- struct bio *return_bi= NULL;
+- struct bio *bi;
+ int i;
+- int syncing, expanding, expanded;
+- int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
+- int non_overwrite = 0;
+- int failed_num=0;
+- struct r5dev *dev;
+-
+- PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
+- (unsigned long long)sh->sector, atomic_read(&sh->count),
+- sh->pd_idx);
+-
+- spin_lock(&sh->lock);
+- clear_bit(STRIPE_HANDLE, &sh->state);
+- clear_bit(STRIPE_DELAYED, &sh->state);
+-
+- syncing = test_bit(STRIPE_SYNCING, &sh->state);
+- expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+- expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+- /* Now to look around and see what can be done */
+-
+- rcu_read_lock();
+- for (i=disks; i--; ) {
+- mdk_rdev_t *rdev;
+- dev = &sh->dev[i];
+- clear_bit(R5_Insync, &dev->flags);
+-
+- PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
+- i, dev->flags, dev->toread, dev->towrite, dev->written);
+- /* maybe we can reply to a read */
+- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
+- struct bio *rbi, *rbi2;
+- PRINTK("Return read for disc %d\n", i);
+- spin_lock_irq(&conf->device_lock);
+- rbi = dev->toread;
+- dev->toread = NULL;
+- if (test_and_clear_bit(R5_Overlap, &dev->flags))
+- wake_up(&conf->wait_for_overlap);
+- spin_unlock_irq(&conf->device_lock);
+- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+- copy_data(0, rbi, dev->page, dev->sector);
+- rbi2 = r5_next_bio(rbi, dev->sector);
+- spin_lock_irq(&conf->device_lock);
+- if (--rbi->bi_phys_segments == 0) {
+- rbi->bi_next = return_bi;
+- return_bi = rbi;
+- }
+- spin_unlock_irq(&conf->device_lock);
+- rbi = rbi2;
+- }
+- }
+-
+- /* now count some things */
+- if (test_bit(R5_LOCKED, &dev->flags)) locked++;
+- if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+-
+-
+- if (dev->toread) to_read++;
+- if (dev->towrite) {
+- to_write++;
+- if (!test_bit(R5_OVERWRITE, &dev->flags))
+- non_overwrite++;
+- }
+- if (dev->written) written++;
+- rdev = rcu_dereference(conf->disks[i].rdev);
+- if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+- /* The ReadError flag will just be confusing now */
+- clear_bit(R5_ReadError, &dev->flags);
+- clear_bit(R5_ReWrite, &dev->flags);
+- }
+- if (!rdev || !test_bit(In_sync, &rdev->flags)
+- || test_bit(R5_ReadError, &dev->flags)) {
+- failed++;
+- failed_num = i;
+- } else
+- set_bit(R5_Insync, &dev->flags);
+- }
+- rcu_read_unlock();
+- PRINTK("locked=%d uptodate=%d to_read=%d"
+- " to_write=%d failed=%d failed_num=%d\n",
+- locked, uptodate, to_read, to_write, failed, failed_num);
+- /* check if the array has lost two devices and, if so, some requests might
+- * need to be failed
+- */
+- if (failed > 1 && to_read+to_write+written) {
+- for (i=disks; i--; ) {
++ for (i = disks; i--; ) {
++ struct bio *bi;
+ int bitmap_end = 0;
+
+ if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+@@ -1447,23 +1879,26 @@
+ md_error(conf->mddev, rdev);
+ rcu_read_unlock();
+ }
+-
+ spin_lock_irq(&conf->device_lock);
+ /* fail all writes first */
+ bi = sh->dev[i].towrite;
+ sh->dev[i].towrite = NULL;
+- if (bi) { to_write--; bitmap_end = 1; }
++ if (bi) {
++ s->to_write--;
++ bitmap_end = 1;
++ }
+
+ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ wake_up(&conf->wait_for_overlap);
+
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
++ while (bi && bi->bi_sector <
++ sh->dev[i].sector + STRIPE_SECTORS) {
+ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+ clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ if (--bi->bi_phys_segments == 0) {
+ md_write_end(conf->mddev);
+- bi->bi_next = return_bi;
+- return_bi = bi;
++ bi->bi_next = *return_bi;
++ *return_bi = bi;
+ }
+ bi = nextbi;
+ }
+@@ -1471,78 +1906,235 @@
+ bi = sh->dev[i].written;
+ sh->dev[i].written = NULL;
+ if (bi) bitmap_end = 1;
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
++ while (bi && bi->bi_sector <
++ sh->dev[i].sector + STRIPE_SECTORS) {
+ struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+ clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ if (--bi->bi_phys_segments == 0) {
+ md_write_end(conf->mddev);
+- bi->bi_next = return_bi;
+- return_bi = bi;
++ bi->bi_next = *return_bi;
++ *return_bi = bi;
++ }
++ bi = bi2;
++ }
++
++ /* fail any reads if this device is non-operational and
++ * the data has not reached the cache yet.
++ */
++ if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
++ (!test_bit(R5_Insync, &sh->dev[i].flags) ||
++ test_bit(R5_ReadError, &sh->dev[i].flags))) {
++ bi = sh->dev[i].toread;
++ sh->dev[i].toread = NULL;
++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
++ wake_up(&conf->wait_for_overlap);
++ if (bi) s->to_read--;
++ while (bi && bi->bi_sector <
++ sh->dev[i].sector + STRIPE_SECTORS) {
++ struct bio *nextbi =
++ r5_next_bio(bi, sh->dev[i].sector);
++ clear_bit(BIO_UPTODATE, &bi->bi_flags);
++ if (--bi->bi_phys_segments == 0) {
++ bi->bi_next = *return_bi;
++ *return_bi = bi;
++ }
++ bi = nextbi;
++ }
++ }
++ spin_unlock_irq(&conf->device_lock);
++ if (bitmap_end)
++ bitmap_endwrite(conf->mddev->bitmap, sh->sector,
++ STRIPE_SECTORS, 0, 0);
++ }
++
++}
++
++/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
++ * to process
++ */
++static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
++ struct stripe_head_state *s, int disk_idx, int disks)
++{
++ struct r5dev *dev = &sh->dev[disk_idx];
++ struct r5dev *failed_dev = &sh->dev[s->failed_num];
++
++ /* don't schedule compute operations or reads on the parity block while
++ * a check is in flight
++ */
++ if ((disk_idx == sh->pd_idx) &&
++ test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
++ return ~0;
++
++ /* is the data in this block needed, and can we get it? */
++ if (!test_bit(R5_LOCKED, &dev->flags) &&
++ !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
++ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
++ s->syncing || s->expanding || (s->failed &&
++ (failed_dev->toread || (failed_dev->towrite &&
++ !test_bit(R5_OVERWRITE, &failed_dev->flags)
++ ))))) {
++ /* 1/ We would like to get this block, possibly by computing it,
++ * but we might not be able to.
++ *
++ * 2/ Since parity check operations potentially make the parity
++ * block !uptodate it will need to be refreshed before any
++ * compute operations on data disks are scheduled.
++ *
++ * 3/ We hold off parity block re-reads until check operations
++ * have quiesced.
++ */
++ if ((s->uptodate == disks - 1) &&
++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
++ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
++ set_bit(R5_Wantcompute, &dev->flags);
++ sh->ops.target = disk_idx;
++ s->req_compute = 1;
++ sh->ops.count++;
++ /* Careful: from this point on 'uptodate' is in the eye
++ * of raid5_run_ops which services 'compute' operations
++ * before writes. R5_Wantcompute flags a block that will
++ * be R5_UPTODATE by the time it is needed for a
++ * subsequent operation.
++ */
++ s->uptodate++;
++ return 0; /* uptodate + compute == disks */
++ } else if ((s->uptodate < disks - 1) &&
++ test_bit(R5_Insync, &dev->flags)) {
++ /* Note: we hold off compute operations while checks are
++ * in flight, but we still prefer 'compute' over 'read'
++ * hence we only read if (uptodate < * disks-1)
++ */
++ set_bit(R5_LOCKED, &dev->flags);
++ set_bit(R5_Wantread, &dev->flags);
++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ s->locked++;
++ pr_debug("Reading block %d (sync=%d)\n", disk_idx,
++ s->syncing);
+ }
+- bi = bi2;
+ }
+
+- /* fail any reads if this device is non-operational */
+- if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+- test_bit(R5_ReadError, &sh->dev[i].flags)) {
+- bi = sh->dev[i].toread;
+- sh->dev[i].toread = NULL;
+- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+- wake_up(&conf->wait_for_overlap);
+- if (bi) to_read--;
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+- clear_bit(BIO_UPTODATE, &bi->bi_flags);
+- if (--bi->bi_phys_segments == 0) {
+- bi->bi_next = return_bi;
+- return_bi = bi;
+- }
+- bi = nextbi;
++ return ~0;
++}
++
++static void handle_issuing_new_read_requests5(struct stripe_head *sh,
++ struct stripe_head_state *s, int disks)
++{
++ int i;
++
++ /* Clear completed compute operations. Parity recovery
++ * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
++ * later on in this routine
++ */
++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
++ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
++ }
++
++ /* look for blocks to read/compute, skip this if a compute
++ * is already in flight, or if the stripe contents are in the
++ * midst of changing due to a write
++ */
++ if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
++ !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++ for (i = disks; i--; )
++ if (__handle_issuing_new_read_requests5(
++ sh, s, i, disks) == 0)
++ break;
+ }
++ set_bit(STRIPE_HANDLE, &sh->state);
++}
++
++static void handle_issuing_new_read_requests6(struct stripe_head *sh,
++ struct stripe_head_state *s, struct r6_state *r6s,
++ int disks)
++{
++ int i;
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (!test_bit(R5_LOCKED, &dev->flags) &&
++ !test_bit(R5_UPTODATE, &dev->flags) &&
++ (dev->toread || (dev->towrite &&
++ !test_bit(R5_OVERWRITE, &dev->flags)) ||
++ s->syncing || s->expanding ||
++ (s->failed >= 1 &&
++ (sh->dev[r6s->failed_num[0]].toread ||
++ s->to_write)) ||
++ (s->failed >= 2 &&
++ (sh->dev[r6s->failed_num[1]].toread ||
++ s->to_write)))) {
++ /* we would like to get this block, possibly
++ * by computing it, but we might not be able to
++ */
++ if (s->uptodate == disks-1) {
++ pr_debug("Computing stripe %llu block %d\n",
++ (unsigned long long)sh->sector, i);
++ compute_block_1(sh, i, 0);
++ s->uptodate++;
++ } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
++ /* Computing 2-failure is *very* expensive; only
++ * do it if failed >= 2
++ */
++ int other;
++ for (other = disks; other--; ) {
++ if (other == i)
++ continue;
++ if (!test_bit(R5_UPTODATE,
++ &sh->dev[other].flags))
++ break;
+ }
+- spin_unlock_irq(&conf->device_lock);
+- if (bitmap_end)
+- bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+- STRIPE_SECTORS, 0, 0);
++ BUG_ON(other < 0);
++ pr_debug("Computing stripe %llu blocks %d,%d\n",
++ (unsigned long long)sh->sector,
++ i, other);
++ compute_block_2(sh, i, other);
++ s->uptodate += 2;
++ } else if (test_bit(R5_Insync, &dev->flags)) {
++ set_bit(R5_LOCKED, &dev->flags);
++ set_bit(R5_Wantread, &dev->flags);
++ s->locked++;
++ pr_debug("Reading block %d (sync=%d)\n",
++ i, s->syncing);
+ }
+ }
+- if (failed > 1 && syncing) {
+- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+- clear_bit(STRIPE_SYNCING, &sh->state);
+- syncing = 0;
+ }
++ set_bit(STRIPE_HANDLE, &sh->state);
++}
+
+- /* might be able to return some write requests if the parity block
+- * is safe, or on a failed drive
+- */
+- dev = &sh->dev[sh->pd_idx];
+- if ( written &&
+- ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
+- test_bit(R5_UPTODATE, &dev->flags))
+- || (failed == 1 && failed_num == sh->pd_idx))
+- ) {
+- /* any written block on an uptodate or failed drive can be returned.
++
++/* handle_completed_write_requests
++ * any written block on an uptodate or failed drive can be returned.
+ * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
+ * never LOCKED, so we don't need to test 'failed' directly.
+ */
+- for (i=disks; i--; )
++static void handle_completed_write_requests(raid5_conf_t *conf,
++ struct stripe_head *sh, int disks, struct bio **return_bi)
++{
++ int i;
++ struct r5dev *dev;
++
++ for (i = disks; i--; )
+ if (sh->dev[i].written) {
+ dev = &sh->dev[i];
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
+- test_bit(R5_UPTODATE, &dev->flags) ) {
++ test_bit(R5_UPTODATE, &dev->flags)) {
+ /* We can return any write requests */
+ struct bio *wbi, *wbi2;
+ int bitmap_end = 0;
+- PRINTK("Return write for disc %d\n", i);
++ pr_debug("Return write for disc %d\n", i);
+ spin_lock_irq(&conf->device_lock);
+ wbi = dev->written;
+ dev->written = NULL;
+- while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
++ while (wbi && wbi->bi_sector <
++ dev->sector + STRIPE_SECTORS) {
+ wbi2 = r5_next_bio(wbi, dev->sector);
+ if (--wbi->bi_phys_segments == 0) {
+ md_write_end(conf->mddev);
+- wbi->bi_next = return_bi;
+- return_bi = wbi;
++ wbi->bi_next = *return_bi;
++ *return_bi = wbi;
+ }
+ wbi = wbi2;
+ }
+@@ -1550,89 +2142,63 @@
+ bitmap_end = 1;
+ spin_unlock_irq(&conf->device_lock);
+ if (bitmap_end)
+- bitmap_endwrite(conf->mddev->bitmap, sh->sector,
++ bitmap_endwrite(conf->mddev->bitmap,
++ sh->sector,
+ STRIPE_SECTORS,
+- !test_bit(STRIPE_DEGRADED, &sh->state), 0);
+- }
+- }
+- }
+-
+- /* Now we might consider reading some blocks, either to check/generate
+- * parity, or to satisfy requests
+- * or to load a block that is being partially written.
+- */
+- if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) {
+- for (i=disks; i--;) {
+- dev = &sh->dev[i];
+- if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+- (dev->toread ||
+- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+- syncing ||
+- expanding ||
+- (failed && (sh->dev[failed_num].toread ||
+- (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
+- )
+- ) {
+- /* we would like to get this block, possibly
+- * by computing it, but we might not be able to
+- */
+- if (uptodate == disks-1) {
+- PRINTK("Computing block %d\n", i);
+- compute_block(sh, i);
+- uptodate++;
+- } else if (test_bit(R5_Insync, &dev->flags)) {
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantread, &dev->flags);
+- locked++;
+- PRINTK("Reading block %d (sync=%d)\n",
+- i, syncing);
+- }
++ !test_bit(STRIPE_DEGRADED, &sh->state),
++ 0);
+ }
+ }
+- set_bit(STRIPE_HANDLE, &sh->state);
+- }
++}
+
+- /* now to consider writing and what else, if anything should be read */
+- if (to_write) {
+- int rmw=0, rcw=0;
+- for (i=disks ; i--;) {
++static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
++ struct stripe_head *sh, struct stripe_head_state *s, int disks)
++{
++ int rmw = 0, rcw = 0, i;
++ for (i = disks; i--; ) {
+ /* would I have to read this buffer for read_modify_write */
+- dev = &sh->dev[i];
++ struct r5dev *dev = &sh->dev[i];
+ if ((dev->towrite || i == sh->pd_idx) &&
+- (!test_bit(R5_LOCKED, &dev->flags)
+- ) &&
+- !test_bit(R5_UPTODATE, &dev->flags)) {
+- if (test_bit(R5_Insync, &dev->flags)
+-/* && !(!mddev->insync && i == sh->pd_idx) */
+- )
++ !test_bit(R5_LOCKED, &dev->flags) &&
++ !(test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Wantcompute, &dev->flags))) {
++ if (test_bit(R5_Insync, &dev->flags))
+ rmw++;
+- else rmw += 2*disks; /* cannot read it */
++ else
++ rmw += 2*disks; /* cannot read it */
+ }
+ /* Would I have to read this buffer for reconstruct_write */
+ if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+- (!test_bit(R5_LOCKED, &dev->flags)
+- ) &&
+- !test_bit(R5_UPTODATE, &dev->flags)) {
++ !test_bit(R5_LOCKED, &dev->flags) &&
++ !(test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Wantcompute, &dev->flags))) {
+ if (test_bit(R5_Insync, &dev->flags)) rcw++;
+- else rcw += 2*disks;
++ else
++ rcw += 2*disks;
+ }
+ }
+- PRINTK("for sector %llu, rmw=%d rcw=%d\n",
++ pr_debug("for sector %llu, rmw=%d rcw=%d\n",
+ (unsigned long long)sh->sector, rmw, rcw);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ if (rmw < rcw && rmw > 0)
+ /* prefer read-modify-write, but need to get some data */
+- for (i=disks; i--;) {
+- dev = &sh->dev[i];
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
+ if ((dev->towrite || i == sh->pd_idx) &&
+- !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
++ !test_bit(R5_LOCKED, &dev->flags) &&
++ !(test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Wantcompute, &dev->flags)) &&
+ test_bit(R5_Insync, &dev->flags)) {
+- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+- {
+- PRINTK("Read_old block %d for r-m-w\n", i);
++ if (
++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++ pr_debug("Read_old block "
++ "%d for r-m-w\n", i);
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+- locked++;
++ if (!test_and_set_bit(
++ STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ s->locked++;
+ } else {
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
+@@ -1641,165 +2207,367 @@
+ }
+ if (rcw <= rmw && rcw > 0)
+ /* want reconstruct write, but need to get some data */
+- for (i=disks; i--;) {
+- dev = &sh->dev[i];
+- if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+- !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (!test_bit(R5_OVERWRITE, &dev->flags) &&
++ i != sh->pd_idx &&
++ !test_bit(R5_LOCKED, &dev->flags) &&
++ !(test_bit(R5_UPTODATE, &dev->flags) ||
++ test_bit(R5_Wantcompute, &dev->flags)) &&
+ test_bit(R5_Insync, &dev->flags)) {
+- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+- {
+- PRINTK("Read_old block %d for Reconstruct\n", i);
++ if (
++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++ pr_debug("Read_old block "
++ "%d for Reconstruct\n", i);
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantread, &dev->flags);
+- locked++;
++ if (!test_and_set_bit(
++ STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ s->locked++;
++ } else {
++ set_bit(STRIPE_DELAYED, &sh->state);
++ set_bit(STRIPE_HANDLE, &sh->state);
++ }
++ }
++ }
++ /* now if nothing is locked, and if we have enough data,
++ * we can start a write request
++ */
++ /* since handle_stripe can be called at any time we need to handle the
++ * case where a compute block operation has been submitted and then a
++ * subsequent call wants to start a write request. raid5_run_ops only
++ * handles the case where compute block and postxor are requested
++ * simultaneously. If this is not the case then new writes need to be
++ * held off until the compute completes.
++ */
++ if ((s->req_compute ||
++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
++ (s->locked == 0 && (rcw == 0 || rmw == 0) &&
++ !test_bit(STRIPE_BIT_DELAY, &sh->state)))
++ s->locked += handle_write_operations5(sh, rcw == 0, 0);
++}
++
++static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
++ struct stripe_head *sh, struct stripe_head_state *s,
++ struct r6_state *r6s, int disks)
++{
++ int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
++ int qd_idx = r6s->qd_idx;
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ /* Would I have to read this buffer for reconstruct_write */
++ if (!test_bit(R5_OVERWRITE, &dev->flags)
++ && i != pd_idx && i != qd_idx
++ && (!test_bit(R5_LOCKED, &dev->flags)
++ ) &&
++ !test_bit(R5_UPTODATE, &dev->flags)) {
++ if (test_bit(R5_Insync, &dev->flags)) rcw++;
++ else {
++ pr_debug("raid6: must_compute: "
++ "disk %d flags=%#lx\n", i, dev->flags);
++ must_compute++;
++ }
++ }
++ }
++ pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
++ (unsigned long long)sh->sector, rcw, must_compute);
++ set_bit(STRIPE_HANDLE, &sh->state);
++
++ if (rcw > 0)
++ /* want reconstruct write, but need to get some data */
++ for (i = disks; i--; ) {
++ struct r5dev *dev = &sh->dev[i];
++ if (!test_bit(R5_OVERWRITE, &dev->flags)
++ && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
++ && !test_bit(R5_LOCKED, &dev->flags) &&
++ !test_bit(R5_UPTODATE, &dev->flags) &&
++ test_bit(R5_Insync, &dev->flags)) {
++ if (
++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++ pr_debug("Read_old stripe %llu "
++ "block %d for Reconstruct\n",
++ (unsigned long long)sh->sector, i);
++ set_bit(R5_LOCKED, &dev->flags);
++ set_bit(R5_Wantread, &dev->flags);
++ s->locked++;
+ } else {
++ pr_debug("Request delayed stripe %llu "
++ "block %d for Reconstruct\n",
++ (unsigned long long)sh->sector, i);
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ }
+ }
+ }
+- /* now if nothing is locked, and if we have enough data, we can start a write request */
+- if (locked == 0 && (rcw == 0 ||rmw == 0) &&
++ /* now if nothing is locked, and if we have enough data, we can start a
++ * write request
++ */
++ if (s->locked == 0 && rcw == 0 &&
+ !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
+- PRINTK("Computing parity...\n");
+- compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
++ if (must_compute > 0) {
++ /* We have failed blocks and need to compute them */
++ switch (s->failed) {
++ case 0:
++ BUG();
++ case 1:
++ compute_block_1(sh, r6s->failed_num[0], 0);
++ break;
++ case 2:
++ compute_block_2(sh, r6s->failed_num[0],
++ r6s->failed_num[1]);
++ break;
++ default: /* This request should have been failed? */
++ BUG();
++ }
++ }
++
++ pr_debug("Computing parity for stripe %llu\n",
++ (unsigned long long)sh->sector);
++ compute_parity6(sh, RECONSTRUCT_WRITE);
+ /* now every locked buffer is ready to be written */
+- for (i=disks; i--;)
++ for (i = disks; i--; )
+ if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
+- PRINTK("Writing block %d\n", i);
+- locked++;
++ pr_debug("Writing stripe %llu block %d\n",
++ (unsigned long long)sh->sector, i);
++ s->locked++;
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
+- if (!test_bit(R5_Insync, &sh->dev[i].flags)
+- || (i==sh->pd_idx && failed == 0))
+- set_bit(STRIPE_INSYNC, &sh->state);
+ }
++ /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
++ set_bit(STRIPE_INSYNC, &sh->state);
++
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ atomic_dec(&conf->preread_active_stripes);
+- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
++ if (atomic_read(&conf->preread_active_stripes) <
++ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+ }
+- }
++}
+
+- /* maybe we need to check and possibly fix the parity for this stripe
+- * Any reads will already have been scheduled, so we just see if enough data
+- * is available
+- */
+- if (syncing && locked == 0 &&
+- !test_bit(STRIPE_INSYNC, &sh->state)) {
++static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
++ struct stripe_head_state *s, int disks)
++{
+ set_bit(STRIPE_HANDLE, &sh->state);
+- if (failed == 0) {
+- BUG_ON(uptodate != disks);
+- compute_parity5(sh, CHECK_PARITY);
+- uptodate--;
+- if (page_is_zero(sh->dev[sh->pd_idx].page)) {
+- /* parity is correct (on disc, not in buffer any more) */
++ /* Take one of the following actions:
++ * 1/ start a check parity operation if (uptodate == disks)
++ * 2/ finish a check parity operation and act on the result
++ * 3/ skip to the writeback section if we previously
++ * initiated a recovery operation
++ */
++ if (s->failed == 0 &&
++ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++ if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
++ BUG_ON(s->uptodate != disks);
++ clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
++ sh->ops.count++;
++ s->uptodate--;
++ } else if (
++ test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
++ clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
++ clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
++
++ if (sh->ops.zero_sum_result == 0)
++ /* parity is correct (on disc,
++ * not in buffer any more)
++ */
+ set_bit(STRIPE_INSYNC, &sh->state);
+- } else {
+- conf->mddev->resync_mismatches += STRIPE_SECTORS;
+- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
++ else {
++ conf->mddev->resync_mismatches +=
++ STRIPE_SECTORS;
++ if (test_bit(
++ MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ /* don't try to repair!! */
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+- compute_block(sh, sh->pd_idx);
+- uptodate++;
++ set_bit(STRIPE_OP_COMPUTE_BLK,
++ &sh->ops.pending);
++ set_bit(STRIPE_OP_MOD_REPAIR_PD,
++ &sh->ops.pending);
++ set_bit(R5_Wantcompute,
++ &sh->dev[sh->pd_idx].flags);
++ sh->ops.target = sh->pd_idx;
++ sh->ops.count++;
++ s->uptodate++;
++ }
++ }
+ }
+ }
++
++ /* check if we can clear a parity disk reconstruct */
++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
++ test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++
++ clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+ }
+- if (!test_bit(STRIPE_INSYNC, &sh->state)) {
++
++ /* Wait for check parity and compute block operations to complete
++ * before write-back
++ */
++ if (!test_bit(STRIPE_INSYNC, &sh->state) &&
++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
++ struct r5dev *dev;
+ /* either failed parity check, or recovery is happening */
+- if (failed==0)
+- failed_num = sh->pd_idx;
+- dev = &sh->dev[failed_num];
++ if (s->failed == 0)
++ s->failed_num = sh->pd_idx;
++ dev = &sh->dev[s->failed_num];
+ BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+- BUG_ON(uptodate != disks);
++ BUG_ON(s->uptodate != disks);
+
+ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++
+ clear_bit(STRIPE_DEGRADED, &sh->state);
+- locked++;
++ s->locked++;
+ set_bit(STRIPE_INSYNC, &sh->state);
+ }
++}
++
++
++static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
++ struct stripe_head_state *s,
++ struct r6_state *r6s, struct page *tmp_page,
++ int disks)
++{
++ int update_p = 0, update_q = 0;
++ struct r5dev *dev;
++ int pd_idx = sh->pd_idx;
++ int qd_idx = r6s->qd_idx;
++
++ set_bit(STRIPE_HANDLE, &sh->state);
++
++ BUG_ON(s->failed > 2);
++ BUG_ON(s->uptodate < disks);
++ /* Want to check and possibly repair P and Q.
++ * However there could be one 'failed' device, in which
++ * case we can only check one of them, possibly using the
++ * other to generate missing data
++ */
++
++ /* If !tmp_page, we cannot do the calculations,
++ * but as we have set STRIPE_HANDLE, we will soon be called
++ * by stripe_handle with a tmp_page - just wait until then.
++ */
++ if (tmp_page) {
++ if (s->failed == r6s->q_failed) {
++ /* The only possible failed device holds 'Q', so it
++ * makes sense to check P (If anything else were failed,
++ * we would have used P to recreate it).
++ */
++ compute_block_1(sh, pd_idx, 1);
++ if (!page_is_zero(sh->dev[pd_idx].page)) {
++ compute_block_1(sh, pd_idx, 0);
++ update_p = 1;
++ }
+ }
+- if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+- md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+- clear_bit(STRIPE_SYNCING, &sh->state);
++ if (!r6s->q_failed && s->failed < 2) {
++ /* q is not failed, and we didn't use it to generate
++ * anything, so it makes sense to check it
++ */
++ memcpy(page_address(tmp_page),
++ page_address(sh->dev[qd_idx].page),
++ STRIPE_SIZE);
++ compute_parity6(sh, UPDATE_PARITY);
++ if (memcmp(page_address(tmp_page),
++ page_address(sh->dev[qd_idx].page),
++ STRIPE_SIZE) != 0) {
++ clear_bit(STRIPE_INSYNC, &sh->state);
++ update_q = 1;
++ }
++ }
++ if (update_p || update_q) {
++ conf->mddev->resync_mismatches += STRIPE_SECTORS;
++ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
++ /* don't try to repair!! */
++ update_p = update_q = 0;
+ }
+
+- /* If the failed drive is just a ReadError, then we might need to progress
+- * the repair/check process
++ /* now write out any block on a failed drive,
++ * or P or Q if they need it
+ */
+- if (failed == 1 && ! conf->mddev->ro &&
+- test_bit(R5_ReadError, &sh->dev[failed_num].flags)
+- && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
+- && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
+- ) {
+- dev = &sh->dev[failed_num];
+- if (!test_bit(R5_ReWrite, &dev->flags)) {
++
++ if (s->failed == 2) {
++ dev = &sh->dev[r6s->failed_num[1]];
++ s->locked++;
++ set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantwrite, &dev->flags);
+- set_bit(R5_ReWrite, &dev->flags);
++ }
++ if (s->failed >= 1) {
++ dev = &sh->dev[r6s->failed_num[0]];
++ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+- locked++;
+- } else {
+- /* let's read it back */
+- set_bit(R5_Wantread, &dev->flags);
++ set_bit(R5_Wantwrite, &dev->flags);
++ }
++
++ if (update_p) {
++ dev = &sh->dev[pd_idx];
++ s->locked++;
+ set_bit(R5_LOCKED, &dev->flags);
+- locked++;
++ set_bit(R5_Wantwrite, &dev->flags);
+ }
++ if (update_q) {
++ dev = &sh->dev[qd_idx];
++ s->locked++;
++ set_bit(R5_LOCKED, &dev->flags);
++ set_bit(R5_Wantwrite, &dev->flags);
+ }
++ clear_bit(STRIPE_DEGRADED, &sh->state);
+
+- if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+- /* Need to write out all blocks after computing parity */
+- sh->disks = conf->raid_disks;
+- sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
+- compute_parity5(sh, RECONSTRUCT_WRITE);
+- for (i= conf->raid_disks; i--;) {
+- set_bit(R5_LOCKED, &sh->dev[i].flags);
+- locked++;
+- set_bit(R5_Wantwrite, &sh->dev[i].flags);
+- }
+- clear_bit(STRIPE_EXPANDING, &sh->state);
+- } else if (expanded) {
+- clear_bit(STRIPE_EXPAND_READY, &sh->state);
+- atomic_dec(&conf->reshape_stripes);
+- wake_up(&conf->wait_for_overlap);
+- md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
++ set_bit(STRIPE_INSYNC, &sh->state);
+ }
++}
++
++static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
++ struct r6_state *r6s)
++{
++ int i;
+
+- if (expanding && locked == 0) {
+ /* We have read all the blocks in this stripe and now we need to
+ * copy some of them into a target stripe for expand.
+ */
++ struct dma_async_tx_descriptor *tx = NULL;
+ clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+- for (i=0; i< sh->disks; i++)
+- if (i != sh->pd_idx) {
++ for (i = 0; i < sh->disks; i++)
++ if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) {
+ int dd_idx, pd_idx, j;
+ struct stripe_head *sh2;
+
+ sector_t bn = compute_blocknr(sh, i);
+ sector_t s = raid5_compute_sector(bn, conf->raid_disks,
+- conf->raid_disks-1,
+- &dd_idx, &pd_idx, conf);
+- sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1);
++ conf->raid_disks -
++ conf->max_degraded, &dd_idx,
++ &pd_idx, conf);
++ sh2 = get_active_stripe(conf, s, conf->raid_disks,
++ pd_idx, 1);
+ if (sh2 == NULL)
+ /* so far only the early blocks of this stripe
+ * have been requested. When later blocks
+ * get requested, we will try again
+ */
+ continue;
+- if(!test_bit(STRIPE_EXPANDING, &sh2->state) ||
++ if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+ test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
+ /* must have already done this block */
+ release_stripe(sh2);
+ continue;
+ }
+- memcpy(page_address(sh2->dev[dd_idx].page),
+- page_address(sh->dev[i].page),
+- STRIPE_SIZE);
++
++ /* place all the copies on one channel */
++ tx = async_memcpy(sh2->dev[dd_idx].page,
++ sh->dev[i].page, 0, 0, STRIPE_SIZE,
++ ASYNC_TX_DEP_ACK, tx, NULL, NULL);
++
+ set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
+ set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
+- for (j=0; j<conf->raid_disks; j++)
++ for (j = 0; j < conf->raid_disks; j++)
+ if (j != sh2->pd_idx &&
++ (r6s && j != r6s->qd_idx) &&
+ !test_bit(R5_Expanded, &sh2->dev[j].flags))
+ break;
+ if (j == conf->raid_disks) {
+@@ -1807,153 +2575,91 @@
+ set_bit(STRIPE_HANDLE, &sh2->state);
+ }
+ release_stripe(sh2);
+- }
+- }
+-
+- spin_unlock(&sh->lock);
+-
+- while ((bi=return_bi)) {
+- int bytes = bi->bi_size;
+-
+- return_bi = bi->bi_next;
+- bi->bi_next = NULL;
+- bi->bi_size = 0;
+- bi->bi_end_io(bi, bytes,
+- test_bit(BIO_UPTODATE, &bi->bi_flags)
+- ? 0 : -EIO);
+- }
+- for (i=disks; i-- ;) {
+- int rw;
+- struct bio *bi;
+- mdk_rdev_t *rdev;
+- if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
+- rw = WRITE;
+- else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
+- rw = READ;
+- else
+- continue;
+-
+- bi = &sh->dev[i].req;
+-
+- bi->bi_rw = rw;
+- if (rw == WRITE)
+- bi->bi_end_io = raid5_end_write_request;
+- else
+- bi->bi_end_io = raid5_end_read_request;
+-
+- rcu_read_lock();
+- rdev = rcu_dereference(conf->disks[i].rdev);
+- if (rdev && test_bit(Faulty, &rdev->flags))
+- rdev = NULL;
+- if (rdev)
+- atomic_inc(&rdev->nr_pending);
+- rcu_read_unlock();
+-
+- if (rdev) {
+- if (syncing || expanding || expanded)
+- md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+
+- bi->bi_bdev = rdev->bdev;
+- PRINTK("for %llu schedule op %ld on disc %d\n",
+- (unsigned long long)sh->sector, bi->bi_rw, i);
+- atomic_inc(&sh->count);
+- bi->bi_sector = sh->sector + rdev->data_offset;
+- bi->bi_flags = 1 << BIO_UPTODATE;
+- bi->bi_vcnt = 1;
+- bi->bi_max_vecs = 1;
+- bi->bi_idx = 0;
+- bi->bi_io_vec = &sh->dev[i].vec;
+- bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+- bi->bi_io_vec[0].bv_offset = 0;
+- bi->bi_size = STRIPE_SIZE;
+- bi->bi_next = NULL;
+- if (rw == WRITE &&
+- test_bit(R5_ReWrite, &sh->dev[i].flags))
+- atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
+- generic_make_request(bi);
+- } else {
+- if (rw == WRITE)
+- set_bit(STRIPE_DEGRADED, &sh->state);
+- PRINTK("skip op %ld on disc %d for sector %llu\n",
+- bi->bi_rw, i, (unsigned long long)sh->sector);
+- clear_bit(R5_LOCKED, &sh->dev[i].flags);
+- set_bit(STRIPE_HANDLE, &sh->state);
++ /* done submitting copies, wait for them to complete */
++ if (i + 1 >= sh->disks) {
++ async_tx_ack(tx);
++ dma_wait_for_async_tx(tx);
+ }
+ }
+ }
+
+-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
++/*
++ * handle_stripe - do things to a stripe.
++ *
++ * We lock the stripe and then examine the state of various bits
++ * to see what needs to be done.
++ * Possible results:
++ * return some read request which now have data
++ * return some write requests which are safely on disc
++ * schedule a read on some buffers
++ * schedule a write of some buffers
++ * return confirmation of parity correctness
++ *
++ * buffers are taken off read_list or write_list, and bh_cache buffers
++ * get BH_Lock set before the stripe lock is released.
++ *
++ */
++
++static void handle_stripe5(struct stripe_head *sh)
+ {
+- raid6_conf_t *conf = sh->raid_conf;
+- int disks = sh->disks;
+- struct bio *return_bi= NULL;
+- struct bio *bi;
+- int i;
+- int syncing, expanding, expanded;
+- int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
+- int non_overwrite = 0;
+- int failed_num[2] = {0, 0};
+- struct r5dev *dev, *pdev, *qdev;
+- int pd_idx = sh->pd_idx;
+- int qd_idx = raid6_next_disk(pd_idx, disks);
+- int p_failed, q_failed;
++ raid5_conf_t *conf = sh->raid_conf;
++ int disks = sh->disks, i;
++ struct bio *return_bi = NULL, *bi;
++ struct stripe_head_state s;
++ struct r5dev *dev;
++ unsigned long pending = 0;
+
+- PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
+- (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
+- pd_idx, qd_idx);
++ memset(&s, 0, sizeof(s));
++ pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
++ "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
++ atomic_read(&sh->count), sh->pd_idx,
++ sh->ops.pending, sh->ops.ack, sh->ops.complete);
+
+ spin_lock(&sh->lock);
+ clear_bit(STRIPE_HANDLE, &sh->state);
+ clear_bit(STRIPE_DELAYED, &sh->state);
+
+- syncing = test_bit(STRIPE_SYNCING, &sh->state);
+- expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+- expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
++ s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
++ s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
++ s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+ /* Now to look around and see what can be done */
+
+ rcu_read_lock();
+ for (i=disks; i--; ) {
+ mdk_rdev_t *rdev;
+- dev = &sh->dev[i];
++ struct r5dev *dev = &sh->dev[i];
+ clear_bit(R5_Insync, &dev->flags);
+
+- PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
+- i, dev->flags, dev->toread, dev->towrite, dev->written);
+- /* maybe we can reply to a read */
+- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
+- struct bio *rbi, *rbi2;
+- PRINTK("Return read for disc %d\n", i);
+- spin_lock_irq(&conf->device_lock);
+- rbi = dev->toread;
+- dev->toread = NULL;
+- if (test_and_clear_bit(R5_Overlap, &dev->flags))
+- wake_up(&conf->wait_for_overlap);
+- spin_unlock_irq(&conf->device_lock);
+- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+- copy_data(0, rbi, dev->page, dev->sector);
+- rbi2 = r5_next_bio(rbi, dev->sector);
+- spin_lock_irq(&conf->device_lock);
+- if (--rbi->bi_phys_segments == 0) {
+- rbi->bi_next = return_bi;
+- return_bi = rbi;
+- }
+- spin_unlock_irq(&conf->device_lock);
+- rbi = rbi2;
+- }
+- }
+-
+- /* now count some things */
+- if (test_bit(R5_LOCKED, &dev->flags)) locked++;
+- if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
++ pr_debug("check %d: state 0x%lx toread %p read %p write %p "
++ "written %p\n", i, dev->flags, dev->toread, dev->read,
++ dev->towrite, dev->written);
+
++ /* maybe we can request a biofill operation
++ *
++ * new wantfill requests are only permitted while
++ * STRIPE_OP_BIOFILL is clear
++ */
++ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
++ !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
++ set_bit(R5_Wantfill, &dev->flags);
+
+- if (dev->toread) to_read++;
++ /* now count some things */
++ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
++ if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
++ if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
++
++ if (test_bit(R5_Wantfill, &dev->flags))
++ s.to_fill++;
++ else if (dev->toread)
++ s.to_read++;
+ if (dev->towrite) {
+- to_write++;
++ s.to_write++;
+ if (!test_bit(R5_OVERWRITE, &dev->flags))
+- non_overwrite++;
++ s.non_overwrite++;
+ }
+- if (dev->written) written++;
++ if (dev->written)
++ s.written++;
+ rdev = rcu_dereference(conf->disks[i].rdev);
+ if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+ /* The ReadError flag will just be confusing now */
+@@ -1962,376 +2668,361 @@
+ }
+ if (!rdev || !test_bit(In_sync, &rdev->flags)
+ || test_bit(R5_ReadError, &dev->flags)) {
+- if ( failed < 2 )
+- failed_num[failed] = i;
+- failed++;
++ s.failed++;
++ s.failed_num = i;
+ } else
+ set_bit(R5_Insync, &dev->flags);
+ }
+ rcu_read_unlock();
+- PRINTK("locked=%d uptodate=%d to_read=%d"
+- " to_write=%d failed=%d failed_num=%d,%d\n",
+- locked, uptodate, to_read, to_write, failed,
+- failed_num[0], failed_num[1]);
+- /* check if the array has lost >2 devices and, if so, some requests might
+- * need to be failed
+- */
+- if (failed > 2 && to_read+to_write+written) {
+- for (i=disks; i--; ) {
+- int bitmap_end = 0;
+-
+- if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+- mdk_rdev_t *rdev;
+- rcu_read_lock();
+- rdev = rcu_dereference(conf->disks[i].rdev);
+- if (rdev && test_bit(In_sync, &rdev->flags))
+- /* multiple read failures in one stripe */
+- md_error(conf->mddev, rdev);
+- rcu_read_unlock();
+- }
+-
+- spin_lock_irq(&conf->device_lock);
+- /* fail all writes first */
+- bi = sh->dev[i].towrite;
+- sh->dev[i].towrite = NULL;
+- if (bi) { to_write--; bitmap_end = 1; }
+-
+- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+- wake_up(&conf->wait_for_overlap);
+
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+- clear_bit(BIO_UPTODATE, &bi->bi_flags);
+- if (--bi->bi_phys_segments == 0) {
+- md_write_end(conf->mddev);
+- bi->bi_next = return_bi;
+- return_bi = bi;
+- }
+- bi = nextbi;
+- }
+- /* and fail all 'written' */
+- bi = sh->dev[i].written;
+- sh->dev[i].written = NULL;
+- if (bi) bitmap_end = 1;
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
+- struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+- clear_bit(BIO_UPTODATE, &bi->bi_flags);
+- if (--bi->bi_phys_segments == 0) {
+- md_write_end(conf->mddev);
+- bi->bi_next = return_bi;
+- return_bi = bi;
+- }
+- bi = bi2;
+- }
++ if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
++ sh->ops.count++;
+
+- /* fail any reads if this device is non-operational */
+- if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+- test_bit(R5_ReadError, &sh->dev[i].flags)) {
+- bi = sh->dev[i].toread;
+- sh->dev[i].toread = NULL;
+- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+- wake_up(&conf->wait_for_overlap);
+- if (bi) to_read--;
+- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+- clear_bit(BIO_UPTODATE, &bi->bi_flags);
+- if (--bi->bi_phys_segments == 0) {
+- bi->bi_next = return_bi;
+- return_bi = bi;
+- }
+- bi = nextbi;
+- }
+- }
+- spin_unlock_irq(&conf->device_lock);
+- if (bitmap_end)
+- bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+- STRIPE_SECTORS, 0, 0);
+- }
+- }
+- if (failed > 2 && syncing) {
++ pr_debug("locked=%d uptodate=%d to_read=%d"
++ " to_write=%d failed=%d failed_num=%d\n",
++ s.locked, s.uptodate, s.to_read, s.to_write,
++ s.failed, s.failed_num);
++ /* check if the array has lost two devices and, if so, some requests might
++ * need to be failed
++ */
++ if (s.failed > 1 && s.to_read+s.to_write+s.written)
++ handle_requests_to_failed_array(conf, sh, &s, disks,
++ &return_bi);
++ if (s.failed > 1 && s.syncing) {
+ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+- syncing = 0;
++ s.syncing = 0;
+ }
+
+- /*
+- * might be able to return some write requests if the parity blocks
+- * are safe, or on a failed drive
+- */
+- pdev = &sh->dev[pd_idx];
+- p_failed = (failed >= 1 && failed_num[0] == pd_idx)
+- || (failed >= 2 && failed_num[1] == pd_idx);
+- qdev = &sh->dev[qd_idx];
+- q_failed = (failed >= 1 && failed_num[0] == qd_idx)
+- || (failed >= 2 && failed_num[1] == qd_idx);
+-
+- if ( written &&
+- ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
+- && !test_bit(R5_LOCKED, &pdev->flags)
+- && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
+- ( q_failed || ((test_bit(R5_Insync, &qdev->flags)
+- && !test_bit(R5_LOCKED, &qdev->flags)
+- && test_bit(R5_UPTODATE, &qdev->flags))) ) ) {
+- /* any written block on an uptodate or failed drive can be
+- * returned. Note that if we 'wrote' to a failed drive,
+- * it will be UPTODATE, but never LOCKED, so we don't need
+- * to test 'failed' directly.
++ /* might be able to return some write requests if the parity block
++ * is safe, or on a failed drive
+ */
+- for (i=disks; i--; )
+- if (sh->dev[i].written) {
+- dev = &sh->dev[i];
+- if (!test_bit(R5_LOCKED, &dev->flags) &&
+- test_bit(R5_UPTODATE, &dev->flags) ) {
+- /* We can return any write requests */
+- int bitmap_end = 0;
+- struct bio *wbi, *wbi2;
+- PRINTK("Return write for stripe %llu disc %d\n",
+- (unsigned long long)sh->sector, i);
+- spin_lock_irq(&conf->device_lock);
+- wbi = dev->written;
+- dev->written = NULL;
+- while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+- wbi2 = r5_next_bio(wbi, dev->sector);
+- if (--wbi->bi_phys_segments == 0) {
+- md_write_end(conf->mddev);
+- wbi->bi_next = return_bi;
+- return_bi = wbi;
+- }
+- wbi = wbi2;
+- }
+- if (dev->towrite == NULL)
+- bitmap_end = 1;
+- spin_unlock_irq(&conf->device_lock);
+- if (bitmap_end)
+- bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+- STRIPE_SECTORS,
+- !test_bit(STRIPE_DEGRADED, &sh->state), 0);
+- }
+- }
+- }
++ dev = &sh->dev[sh->pd_idx];
++ if ( s.written &&
++ ((test_bit(R5_Insync, &dev->flags) &&
++ !test_bit(R5_LOCKED, &dev->flags) &&
++ test_bit(R5_UPTODATE, &dev->flags)) ||
++ (s.failed == 1 && s.failed_num == sh->pd_idx)))
++ handle_completed_write_requests(conf, sh, disks, &return_bi);
+
+ /* Now we might consider reading some blocks, either to check/generate
+ * parity, or to satisfy requests
+ * or to load a block that is being partially written.
+ */
+- if (to_read || non_overwrite || (to_write && failed) ||
+- (syncing && (uptodate < disks)) || expanding) {
+- for (i=disks; i--;) {
+- dev = &sh->dev[i];
+- if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+- (dev->toread ||
+- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+- syncing ||
+- expanding ||
+- (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
+- (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
+- )
+- ) {
+- /* we would like to get this block, possibly
+- * by computing it, but we might not be able to
++ if (s.to_read || s.non_overwrite ||
++ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
++ test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
++ handle_issuing_new_read_requests5(sh, &s, disks);
++
++ /* Now we check to see if any write operations have recently
++ * completed
+ */
+- if (uptodate == disks-1) {
+- PRINTK("Computing stripe %llu block %d\n",
+- (unsigned long long)sh->sector, i);
+- compute_block_1(sh, i, 0);
+- uptodate++;
+- } else if ( uptodate == disks-2 && failed >= 2 ) {
+- /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
+- int other;
+- for (other=disks; other--;) {
+- if ( other == i )
+- continue;
+- if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) )
+- break;
+- }
+- BUG_ON(other < 0);
+- PRINTK("Computing stripe %llu blocks %d,%d\n",
+- (unsigned long long)sh->sector, i, other);
+- compute_block_2(sh, i, other);
+- uptodate += 2;
+- } else if (test_bit(R5_Insync, &dev->flags)) {
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantread, &dev->flags);
+- locked++;
+- PRINTK("Reading block %d (sync=%d)\n",
+- i, syncing);
+- }
+- }
+- }
+- set_bit(STRIPE_HANDLE, &sh->state);
++
++ /* leave prexor set until postxor is done, allows us to distinguish
++ * a rmw from a rcw during biodrain
++ */
++ if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
++ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
++
++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++
++ for (i = disks; i--; )
++ clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+ }
+
+- /* now to consider writing and what else, if anything should be read */
+- if (to_write) {
+- int rcw=0, must_compute=0;
+- for (i=disks ; i--;) {
++ /* if only POSTXOR is set then this is an 'expand' postxor */
++ if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
++ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
++
++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++
++ /* All the 'written' buffers and the parity block are ready to
++ * be written back to disk
++ */
++ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
++ for (i = disks; i--; ) {
+ dev = &sh->dev[i];
+- /* Would I have to read this buffer for reconstruct_write */
+- if (!test_bit(R5_OVERWRITE, &dev->flags)
+- && i != pd_idx && i != qd_idx
+- && (!test_bit(R5_LOCKED, &dev->flags)
+- ) &&
+- !test_bit(R5_UPTODATE, &dev->flags)) {
+- if (test_bit(R5_Insync, &dev->flags)) rcw++;
+- else {
+- PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags);
+- must_compute++;
++ if (test_bit(R5_LOCKED, &dev->flags) &&
++ (i == sh->pd_idx || dev->written)) {
++ pr_debug("Writing block %d\n", i);
++ set_bit(R5_Wantwrite, &dev->flags);
++ if (!test_and_set_bit(
++ STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ if (!test_bit(R5_Insync, &dev->flags) ||
++ (i == sh->pd_idx && s.failed == 0))
++ set_bit(STRIPE_INSYNC, &sh->state);
++ }
+ }
++ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++ atomic_dec(&conf->preread_active_stripes);
++ if (atomic_read(&conf->preread_active_stripes) <
++ IO_THRESHOLD)
++ md_wakeup_thread(conf->mddev->thread);
+ }
+ }
+- PRINTK("for sector %llu, rcw=%d, must_compute=%d\n",
+- (unsigned long long)sh->sector, rcw, must_compute);
+- set_bit(STRIPE_HANDLE, &sh->state);
+
+- if (rcw > 0)
+- /* want reconstruct write, but need to get some data */
+- for (i=disks; i--;) {
+- dev = &sh->dev[i];
+- if (!test_bit(R5_OVERWRITE, &dev->flags)
+- && !(failed == 0 && (i == pd_idx || i == qd_idx))
+- && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+- test_bit(R5_Insync, &dev->flags)) {
+- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+- {
+- PRINTK("Read_old stripe %llu block %d for Reconstruct\n",
+- (unsigned long long)sh->sector, i);
++ /* Now to consider new write requests and what else, if anything
++ * should be read. We do not handle new writes when:
++ * 1/ A 'write' operation (copy+xor) is already in flight.
++ * 2/ A 'check' operation is in flight, as it may clobber the parity
++ * block.
++ */
++ if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
++ handle_issuing_new_write_requests5(conf, sh, &s, disks);
++
++ /* maybe we need to check and possibly fix the parity for this stripe
++ * Any reads will already have been scheduled, so we just see if enough
++ * data is available. The parity check is held off while parity
++ * dependent operations are in flight.
++ */
++ if ((s.syncing && s.locked == 0 &&
++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
++ !test_bit(STRIPE_INSYNC, &sh->state)) ||
++ test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
++ test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending))
++ handle_parity_checks5(conf, sh, &s, disks);
++
++ if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
++ md_done_sync(conf->mddev, STRIPE_SECTORS,1);
++ clear_bit(STRIPE_SYNCING, &sh->state);
++ }
++
++ /* If the failed drive is just a ReadError, then we might need to progress
++ * the repair/check process
++ */
++ if (s.failed == 1 && !conf->mddev->ro &&
++ test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
++ && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
++ && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
++ ) {
++ dev = &sh->dev[s.failed_num];
++ if (!test_bit(R5_ReWrite, &dev->flags)) {
++ set_bit(R5_Wantwrite, &dev->flags);
++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ set_bit(R5_ReWrite, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantread, &dev->flags);
+- locked++;
++ s.locked++;
+ } else {
+- PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
+- (unsigned long long)sh->sector, i);
+- set_bit(STRIPE_DELAYED, &sh->state);
+- set_bit(STRIPE_HANDLE, &sh->state);
++ /* let's read it back */
++ set_bit(R5_Wantread, &dev->flags);
++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
++ set_bit(R5_LOCKED, &dev->flags);
++ s.locked++;
+ }
+ }
++
++ /* Finish postxor operations initiated by the expansion
++ * process
++ */
++ if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
++ !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
++
++ clear_bit(STRIPE_EXPANDING, &sh->state);
++
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++
++ for (i = conf->raid_disks; i--; ) {
++ set_bit(R5_Wantwrite, &sh->dev[i].flags);
++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++ sh->ops.count++;
+ }
+- /* now if nothing is locked, and if we have enough data, we can start a write request */
+- if (locked == 0 && rcw == 0 &&
+- !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
+- if ( must_compute > 0 ) {
+- /* We have failed blocks and need to compute them */
+- switch ( failed ) {
+- case 0: BUG();
+- case 1: compute_block_1(sh, failed_num[0], 0); break;
+- case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
+- default: BUG(); /* This request should have been failed? */
+ }
++
++ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++ /* Need to write out all blocks after computing parity */
++ sh->disks = conf->raid_disks;
++ sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
++ conf->raid_disks);
++ s.locked += handle_write_operations5(sh, 0, 1);
++ } else if (s.expanded &&
++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++ clear_bit(STRIPE_EXPAND_READY, &sh->state);
++ atomic_dec(&conf->reshape_stripes);
++ wake_up(&conf->wait_for_overlap);
++ md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ }
+
+- PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector);
+- compute_parity6(sh, RECONSTRUCT_WRITE);
+- /* now every locked buffer is ready to be written */
+- for (i=disks; i--;)
+- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
+- PRINTK("Writing stripe %llu block %d\n",
+- (unsigned long long)sh->sector, i);
+- locked++;
+- set_bit(R5_Wantwrite, &sh->dev[i].flags);
++ if (s.expanding && s.locked == 0)
++ handle_stripe_expansion(conf, sh, NULL);
++
++ if (sh->ops.count)
++ pending = get_stripe_work(sh);
++
++ spin_unlock(&sh->lock);
++
++ if (pending)
++ raid5_run_ops(sh, pending);
++
++ while ((bi=return_bi)) {
++ int bytes = bi->bi_size;
++
++ return_bi = bi->bi_next;
++ bi->bi_next = NULL;
++ bi->bi_size = 0;
++ bi->bi_end_io(bi, bytes,
++ test_bit(BIO_UPTODATE, &bi->bi_flags)
++ ? 0 : -EIO);
+ }
+- /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
+- set_bit(STRIPE_INSYNC, &sh->state);
++}
+
+- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+- atomic_dec(&conf->preread_active_stripes);
+- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+- md_wakeup_thread(conf->mddev->thread);
++static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
++{
++ raid6_conf_t *conf = sh->raid_conf;
++ int disks = sh->disks;
++ struct bio *return_bi = NULL;
++ struct bio *bi;
++ int i, pd_idx = sh->pd_idx;
++ struct stripe_head_state s;
++ struct r6_state r6s;
++ struct r5dev *dev, *pdev, *qdev;
++
++ r6s.qd_idx = raid6_next_disk(pd_idx, disks);
++ pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
++ "pd_idx=%d, qd_idx=%d\n",
++ (unsigned long long)sh->sector, sh->state,
++ atomic_read(&sh->count), pd_idx, r6s.qd_idx);
++ memset(&s, 0, sizeof(s));
++
++ spin_lock(&sh->lock);
++ clear_bit(STRIPE_HANDLE, &sh->state);
++ clear_bit(STRIPE_DELAYED, &sh->state);
++
++ s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
++ s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
++ s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
++ /* Now to look around and see what can be done */
++
++ rcu_read_lock();
++ for (i=disks; i--; ) {
++ mdk_rdev_t *rdev;
++ dev = &sh->dev[i];
++ clear_bit(R5_Insync, &dev->flags);
++
++ pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
++ i, dev->flags, dev->toread, dev->towrite, dev->written);
++ /* maybe we can reply to a read */
++ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
++ struct bio *rbi, *rbi2;
++ pr_debug("Return read for disc %d\n", i);
++ spin_lock_irq(&conf->device_lock);
++ rbi = dev->toread;
++ dev->toread = NULL;
++ if (test_and_clear_bit(R5_Overlap, &dev->flags))
++ wake_up(&conf->wait_for_overlap);
++ spin_unlock_irq(&conf->device_lock);
++ while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
++ copy_data(0, rbi, dev->page, dev->sector);
++ rbi2 = r5_next_bio(rbi, dev->sector);
++ spin_lock_irq(&conf->device_lock);
++ if (--rbi->bi_phys_segments == 0) {
++ rbi->bi_next = return_bi;
++ return_bi = rbi;
+ }
++ spin_unlock_irq(&conf->device_lock);
++ rbi = rbi2;
+ }
+ }
+
+- /* maybe we need to check and possibly fix the parity for this stripe
+- * Any reads will already have been scheduled, so we just see if enough data
+- * is available
+- */
+- if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+- int update_p = 0, update_q = 0;
+- struct r5dev *dev;
+-
+- set_bit(STRIPE_HANDLE, &sh->state);
++ /* now count some things */
++ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
++ if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+
+- BUG_ON(failed>2);
+- BUG_ON(uptodate < disks);
+- /* Want to check and possibly repair P and Q.
+- * However there could be one 'failed' device, in which
+- * case we can only check one of them, possibly using the
+- * other to generate missing data
+- */
+
+- /* If !tmp_page, we cannot do the calculations,
+- * but as we have set STRIPE_HANDLE, we will soon be called
+- * by stripe_handle with a tmp_page - just wait until then.
+- */
+- if (tmp_page) {
+- if (failed == q_failed) {
+- /* The only possible failed device holds 'Q', so it makes
+- * sense to check P (If anything else were failed, we would
+- * have used P to recreate it).
+- */
+- compute_block_1(sh, pd_idx, 1);
+- if (!page_is_zero(sh->dev[pd_idx].page)) {
+- compute_block_1(sh,pd_idx,0);
+- update_p = 1;
+- }
++ if (dev->toread)
++ s.to_read++;
++ if (dev->towrite) {
++ s.to_write++;
++ if (!test_bit(R5_OVERWRITE, &dev->flags))
++ s.non_overwrite++;
+ }
+- if (!q_failed && failed < 2) {
+- /* q is not failed, and we didn't use it to generate
+- * anything, so it makes sense to check it
+- */
+- memcpy(page_address(tmp_page),
+- page_address(sh->dev[qd_idx].page),
+- STRIPE_SIZE);
+- compute_parity6(sh, UPDATE_PARITY);
+- if (memcmp(page_address(tmp_page),
+- page_address(sh->dev[qd_idx].page),
+- STRIPE_SIZE)!= 0) {
+- clear_bit(STRIPE_INSYNC, &sh->state);
+- update_q = 1;
++ if (dev->written)
++ s.written++;
++ rdev = rcu_dereference(conf->disks[i].rdev);
++ if (!rdev || !test_bit(In_sync, &rdev->flags)) {
++ /* The ReadError flag will just be confusing now */
++ clear_bit(R5_ReadError, &dev->flags);
++ clear_bit(R5_ReWrite, &dev->flags);
+ }
++ if (!rdev || !test_bit(In_sync, &rdev->flags)
++ || test_bit(R5_ReadError, &dev->flags)) {
++ if (s.failed < 2)
++ r6s.failed_num[s.failed] = i;
++ s.failed++;
++ } else
++ set_bit(R5_Insync, &dev->flags);
+ }
+- if (update_p || update_q) {
+- conf->mddev->resync_mismatches += STRIPE_SECTORS;
+- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+- /* don't try to repair!! */
+- update_p = update_q = 0;
++ rcu_read_unlock();
++ pr_debug("locked=%d uptodate=%d to_read=%d"
++ " to_write=%d failed=%d failed_num=%d,%d\n",
++ s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
++ r6s.failed_num[0], r6s.failed_num[1]);
++ /* check if the array has lost >2 devices and, if so, some requests
++ * might need to be failed
++ */
++ if (s.failed > 2 && s.to_read+s.to_write+s.written)
++ handle_requests_to_failed_array(conf, sh, &s, disks,
++ &return_bi);
++ if (s.failed > 2 && s.syncing) {
++ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
++ clear_bit(STRIPE_SYNCING, &sh->state);
++ s.syncing = 0;
+ }
+
+- /* now write out any block on a failed drive,
+- * or P or Q if they need it
++ /*
++ * might be able to return some write requests if the parity blocks
++ * are safe, or on a failed drive
+ */
++ pdev = &sh->dev[pd_idx];
++ r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
++ || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
++ qdev = &sh->dev[r6s.qd_idx];
++ r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
++ || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
+
+- if (failed == 2) {
+- dev = &sh->dev[failed_num[1]];
+- locked++;
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantwrite, &dev->flags);
+- }
+- if (failed >= 1) {
+- dev = &sh->dev[failed_num[0]];
+- locked++;
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantwrite, &dev->flags);
+- }
++ if ( s.written &&
++ ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
++ && !test_bit(R5_LOCKED, &pdev->flags)
++ && test_bit(R5_UPTODATE, &pdev->flags)))) &&
++ ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
++ && !test_bit(R5_LOCKED, &qdev->flags)
++ && test_bit(R5_UPTODATE, &qdev->flags)))))
++ handle_completed_write_requests(conf, sh, disks, &return_bi);
+
+- if (update_p) {
+- dev = &sh->dev[pd_idx];
+- locked ++;
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantwrite, &dev->flags);
+- }
+- if (update_q) {
+- dev = &sh->dev[qd_idx];
+- locked++;
+- set_bit(R5_LOCKED, &dev->flags);
+- set_bit(R5_Wantwrite, &dev->flags);
+- }
+- clear_bit(STRIPE_DEGRADED, &sh->state);
++ /* Now we might consider reading some blocks, either to check/generate
++ * parity, or to satisfy requests
++ * or to load a block that is being partially written.
++ */
++ if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
++ (s.syncing && (s.uptodate < disks)) || s.expanding)
++ handle_issuing_new_read_requests6(sh, &s, &r6s, disks);
+
+- set_bit(STRIPE_INSYNC, &sh->state);
+- }
+- }
++ /* now to consider writing and what else, if anything should be read */
++ if (s.to_write)
++ handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks);
++
++ /* maybe we need to check and possibly fix the parity for this stripe
++ * Any reads will already have been scheduled, so we just see if enough
++ * data is available
++ */
++ if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
++ handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+
+- if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
++ if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+ md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ }
+@@ -2339,9 +3030,9 @@
+ /* If the failed drives are just a ReadError, then we might need
+ * to progress the repair/check process
+ */
+- if (failed <= 2 && ! conf->mddev->ro)
+- for (i=0; i<failed;i++) {
+- dev = &sh->dev[failed_num[i]];
++ if (s.failed <= 2 && !conf->mddev->ro)
++ for (i = 0; i < s.failed; i++) {
++ dev = &sh->dev[r6s.failed_num[i]];
+ if (test_bit(R5_ReadError, &dev->flags)
+ && !test_bit(R5_LOCKED, &dev->flags)
+ && test_bit(R5_UPTODATE, &dev->flags)
+@@ -2358,7 +3049,7 @@
+ }
+ }
+
+- if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
++ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ /* Need to write out all blocks after computing P&Q */
+ sh->disks = conf->raid_disks;
+ sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
+@@ -2366,69 +3057,19 @@
+ compute_parity6(sh, RECONSTRUCT_WRITE);
+ for (i = conf->raid_disks ; i-- ; ) {
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
+- locked++;
++ s.locked++;
+ set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ }
+ clear_bit(STRIPE_EXPANDING, &sh->state);
+- } else if (expanded) {
++ } else if (s.expanded) {
+ clear_bit(STRIPE_EXPAND_READY, &sh->state);
+ atomic_dec(&conf->reshape_stripes);
+ wake_up(&conf->wait_for_overlap);
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ }
+
+- if (expanding && locked == 0) {
+- /* We have read all the blocks in this stripe and now we need to
+- * copy some of them into a target stripe for expand.
+- */
+- clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+- for (i = 0; i < sh->disks ; i++)
+- if (i != pd_idx && i != qd_idx) {
+- int dd_idx2, pd_idx2, j;
+- struct stripe_head *sh2;
+-
+- sector_t bn = compute_blocknr(sh, i);
+- sector_t s = raid5_compute_sector(
+- bn, conf->raid_disks,
+- conf->raid_disks - conf->max_degraded,
+- &dd_idx2, &pd_idx2, conf);
+- sh2 = get_active_stripe(conf, s,
+- conf->raid_disks,
+- pd_idx2, 1);
+- if (sh2 == NULL)
+- /* so for only the early blocks of
+- * this stripe have been requests.
+- * When later blocks get requests, we
+- * will try again
+- */
+- continue;
+- if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+- test_bit(R5_Expanded,
+- &sh2->dev[dd_idx2].flags)) {
+- /* must have already done this block */
+- release_stripe(sh2);
+- continue;
+- }
+- memcpy(page_address(sh2->dev[dd_idx2].page),
+- page_address(sh->dev[i].page),
+- STRIPE_SIZE);
+- set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags);
+- set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags);
+- for (j = 0 ; j < conf->raid_disks ; j++)
+- if (j != sh2->pd_idx &&
+- j != raid6_next_disk(sh2->pd_idx,
+- sh2->disks) &&
+- !test_bit(R5_Expanded,
+- &sh2->dev[j].flags))
+- break;
+- if (j == conf->raid_disks) {
+- set_bit(STRIPE_EXPAND_READY,
+- &sh2->state);
+- set_bit(STRIPE_HANDLE, &sh2->state);
+- }
+- release_stripe(sh2);
+- }
+- }
++ if (s.expanding && s.locked == 0)
++ handle_stripe_expansion(conf, sh, &r6s);
+
+ spin_unlock(&sh->lock);
+
+@@ -2470,11 +3111,11 @@
+ rcu_read_unlock();
+
+ if (rdev) {
+- if (syncing || expanding || expanded)
++ if (s.syncing || s.expanding || s.expanded)
+ md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+
+ bi->bi_bdev = rdev->bdev;
+- PRINTK("for %llu schedule op %ld on disc %d\n",
++ pr_debug("for %llu schedule op %ld on disc %d\n",
+ (unsigned long long)sh->sector, bi->bi_rw, i);
+ atomic_inc(&sh->count);
+ bi->bi_sector = sh->sector + rdev->data_offset;
+@@ -2494,7 +3135,7 @@
+ } else {
+ if (rw == WRITE)
+ set_bit(STRIPE_DEGRADED, &sh->state);
+- PRINTK("skip op %ld on disc %d for sector %llu\n",
++ pr_debug("skip op %ld on disc %d for sector %llu\n",
+ bi->bi_rw, i, (unsigned long long)sh->sector);
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ set_bit(STRIPE_HANDLE, &sh->state);
+@@ -2738,7 +3379,7 @@
+ }
+
+
+- PRINTK("raid5_align_endio : io error...handing IO for a retry\n");
++ pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
+
+ add_bio_to_retry(raid_bi, conf);
+ return 0;
+@@ -2776,7 +3417,7 @@
+ mdk_rdev_t *rdev;
+
+ if (!in_chunk_boundary(mddev, raid_bio)) {
+- PRINTK("chunk_aligned_read : non aligned\n");
++ pr_debug("chunk_aligned_read : non aligned\n");
+ return 0;
+ }
+ /*
+@@ -2900,7 +3541,7 @@
+
+ new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
+ &dd_idx, &pd_idx, conf);
+- PRINTK("raid5: make_request, sector %llu logical %llu\n",
++ pr_debug("raid5: make_request, sector %llu logical %llu\n",
+ (unsigned long long)new_sector,
+ (unsigned long long)logical_sector);
+
+@@ -3273,7 +3914,7 @@
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+ int handled;
+
+- PRINTK("+++ raid5d active\n");
++ pr_debug("+++ raid5d active\n");
+
+ md_check_recovery(mddev);
+
+@@ -3308,8 +3949,10 @@
+ handled++;
+ }
+
+- if (list_empty(&conf->handle_list))
++ if (list_empty(&conf->handle_list)) {
++ async_tx_issue_pending_all();
+ break;
++ }
+
+ first = conf->handle_list.next;
+ sh = list_entry(first, struct stripe_head, lru);
+@@ -3325,13 +3968,13 @@
+
+ spin_lock_irq(&conf->device_lock);
+ }
+- PRINTK("%d stripes handled\n", handled);
++ pr_debug("%d stripes handled\n", handled);
+
+ spin_unlock_irq(&conf->device_lock);
+
+ unplug_slaves(mddev);
+
+- PRINTK("--- raid5d inactive\n");
++ pr_debug("--- raid5d inactive\n");
+ }
+
+ static ssize_t
+@@ -3507,7 +4150,7 @@
+ atomic_set(&conf->preread_active_stripes, 0);
+ atomic_set(&conf->active_aligned_reads, 0);
+
+- PRINTK("raid5: run(%s) called.\n", mdname(mddev));
++ pr_debug("raid5: run(%s) called.\n", mdname(mddev));
+
+ ITERATE_RDEV(mddev,rdev,tmp) {
+ raid_disk = rdev->raid_disk;
+@@ -3690,7 +4333,7 @@
+ return 0;
+ }
+
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ static void print_sh (struct seq_file *seq, struct stripe_head *sh)
+ {
+ int i;
+@@ -3737,7 +4380,7 @@
+ conf->disks[i].rdev &&
+ test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
+ seq_printf (seq, "]");
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ seq_printf (seq, "\n");
+ printall(seq, conf);
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/md/xor.c linux-2.6.22-591/drivers/md/xor.c
+--- linux-2.6.22-570/drivers/md/xor.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/md/xor.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,154 +0,0 @@
+-/*
+- * xor.c : Multiple Devices driver for Linux
+- *
+- * Copyright (C) 1996, 1997, 1998, 1999, 2000,
+- * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
+- *
+- * Dispatch optimized RAID-5 checksumming functions.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2, or (at your option)
+- * any later version.
+- *
+- * You should have received a copy of the GNU General Public License
+- * (for example /usr/src/linux/COPYING); if not, write to the Free
+- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#define BH_TRACE 0
+-#include <linux/module.h>
+-#include <linux/raid/md.h>
+-#include <linux/raid/xor.h>
+-#include <asm/xor.h>
+-
+-/* The xor routines to use. */
+-static struct xor_block_template *active_template;
+-
+-void
+-xor_block(unsigned int count, unsigned int bytes, void **ptr)
+-{
+- unsigned long *p0, *p1, *p2, *p3, *p4;
+-
+- p0 = (unsigned long *) ptr[0];
+- p1 = (unsigned long *) ptr[1];
+- if (count == 2) {
+- active_template->do_2(bytes, p0, p1);
+- return;
+- }
+-
+- p2 = (unsigned long *) ptr[2];
+- if (count == 3) {
+- active_template->do_3(bytes, p0, p1, p2);
+- return;
+- }
+-
+- p3 = (unsigned long *) ptr[3];
+- if (count == 4) {
+- active_template->do_4(bytes, p0, p1, p2, p3);
+- return;
+- }
+-
+- p4 = (unsigned long *) ptr[4];
+- active_template->do_5(bytes, p0, p1, p2, p3, p4);
+-}
+-
+-/* Set of all registered templates. */
+-static struct xor_block_template *template_list;
+-
+-#define BENCH_SIZE (PAGE_SIZE)
+-
+-static void
+-do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
+-{
+- int speed;
+- unsigned long now;
+- int i, count, max;
+-
+- tmpl->next = template_list;
+- template_list = tmpl;
+-
+- /*
+- * Count the number of XORs done during a whole jiffy, and use
+- * this to calculate the speed of checksumming. We use a 2-page
+- * allocation to have guaranteed color L1-cache layout.
+- */
+- max = 0;
+- for (i = 0; i < 5; i++) {
+- now = jiffies;
+- count = 0;
+- while (jiffies == now) {
+- mb();
+- tmpl->do_2(BENCH_SIZE, b1, b2);
+- mb();
+- count++;
+- mb();
+- }
+- if (count > max)
+- max = count;
+- }
+-
+- speed = max * (HZ * BENCH_SIZE / 1024);
+- tmpl->speed = speed;
+-
+- printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name,
+- speed / 1000, speed % 1000);
+-}
+-
+-static int
+-calibrate_xor_block(void)
+-{
+- void *b1, *b2;
+- struct xor_block_template *f, *fastest;
+-
+- b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
+- if (! b1) {
+- printk("raid5: Yikes! No memory available.\n");
+- return -ENOMEM;
+- }
+- b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
+-
+- /*
+- * If this arch/cpu has a short-circuited selection, don't loop through all
+- * the possible functions, just test the best one
+- */
+-
+- fastest = NULL;
+-
+-#ifdef XOR_SELECT_TEMPLATE
+- fastest = XOR_SELECT_TEMPLATE(fastest);
+-#endif
+-
+-#define xor_speed(templ) do_xor_speed((templ), b1, b2)
+-
+- if (fastest) {
+- printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n",
+- fastest->name);
+- xor_speed(fastest);
+- } else {
+- printk(KERN_INFO "raid5: measuring checksumming speed\n");
+- XOR_TRY_TEMPLATES;
+- fastest = template_list;
+- for (f = fastest; f; f = f->next)
+- if (f->speed > fastest->speed)
+- fastest = f;
+- }
+-
+- printk("raid5: using function: %s (%d.%03d MB/sec)\n",
+- fastest->name, fastest->speed / 1000, fastest->speed % 1000);
+-
+-#undef xor_speed
+-
+- free_pages((unsigned long)b1, 2);
+-
+- active_template = fastest;
+- return 0;
+-}
+-
+-static __exit void xor_exit(void) { }
+-
+-EXPORT_SYMBOL(xor_block);
+-MODULE_LICENSE("GPL");
+-
+-module_init(calibrate_xor_block);
+-module_exit(xor_exit);
+diff -Nurb linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c
+--- linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-12-21 15:36:12.000000000 -0500
+@@ -523,6 +523,7 @@
+
+ dvb_frontend_init(fe);
+
++ set_freezable();
+ while (1) {
+ up(&fepriv->sem); /* is locked when we enter the thread... */
+ restart:
+diff -Nurb linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c
+--- linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c 2007-12-21 15:36:12.000000000 -0500
+@@ -906,6 +906,7 @@
+ u32 mode = 0;
+
+ dprintk("cx88: tvaudio thread started\n");
++ set_freezable();
+ for (;;) {
+ msleep_interruptible(1000);
+ if (kthread_should_stop())
+diff -Nurb linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c
+--- linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c 2007-12-21 15:36:12.000000000 -0500
+@@ -23,6 +23,7 @@
+ #include <linux/module.h>
+ #include <linux/slab.h>
+ #include <linux/i2c.h>
++#include <linux/freezer.h>
+ #include <linux/videodev.h>
+ #include <linux/videodev2.h>
+ #include <media/v4l2-common.h>
+@@ -468,6 +469,7 @@
+
+
+ v4l_dbg(1, msp_debug, client, "msp3400 daemon started\n");
++ set_freezable();
+ for (;;) {
+ v4l_dbg(2, msp_debug, client, "msp3400 thread: sleep\n");
+ msp_sleep(state, -1);
+@@ -646,7 +648,7 @@
+ int val, i, std, count;
+
+ v4l_dbg(1, msp_debug, client, "msp3410 daemon started\n");
+-
++ set_freezable();
+ for (;;) {
+ v4l_dbg(2, msp_debug, client, "msp3410 thread: sleep\n");
+ msp_sleep(state,-1);
+@@ -940,7 +942,7 @@
+ int val, i;
+
+ v4l_dbg(1, msp_debug, client, "msp34xxg daemon started\n");
+-
++ set_freezable();
+ for (;;) {
+ v4l_dbg(2, msp_debug, client, "msp34xxg thread: sleep\n");
+ msp_sleep(state, -1);
+diff -Nurb linux-2.6.22-570/drivers/media/video/tvaudio.c linux-2.6.22-591/drivers/media/video/tvaudio.c
+--- linux-2.6.22-570/drivers/media/video/tvaudio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/video/tvaudio.c 2007-12-21 15:36:12.000000000 -0500
+@@ -271,7 +271,7 @@
+ struct CHIPDESC *desc = chiplist + chip->type;
+
+ v4l_dbg(1, debug, &chip->c, "%s: thread started\n", chip->c.name);
+-
++ set_freezable();
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!kthread_should_stop())
+diff -Nurb linux-2.6.22-570/drivers/media/video/video-buf-dvb.c linux-2.6.22-591/drivers/media/video/video-buf-dvb.c
+--- linux-2.6.22-570/drivers/media/video/video-buf-dvb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/video/video-buf-dvb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -47,6 +47,7 @@
+ int err;
+
+ dprintk("dvb thread started\n");
++ set_freezable();
+ videobuf_read_start(&dvb->dvbq);
+
+ for (;;) {
+diff -Nurb linux-2.6.22-570/drivers/media/video/vivi.c linux-2.6.22-591/drivers/media/video/vivi.c
+--- linux-2.6.22-570/drivers/media/video/vivi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/media/video/vivi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -573,6 +573,7 @@
+ dprintk(1,"thread started\n");
+
+ mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
++ set_freezable();
+
+ for (;;) {
+ vivi_sleep(dma_q);
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/linux_compat.h linux-2.6.22-591/drivers/message/fusion/linux_compat.h
+--- linux-2.6.22-570/drivers/message/fusion/linux_compat.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/linux_compat.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,9 +0,0 @@
+-/* drivers/message/fusion/linux_compat.h */
+-
+-#ifndef FUSION_LINUX_COMPAT_H
+-#define FUSION_LINUX_COMPAT_H
+-
+-#include <linux/version.h>
+-#include <scsi/scsi_device.h>
+-
+-#endif /* _LINUX_COMPAT_H */
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- * Copyright (c) 2000-2006 LSI Logic Corporation.
++ * Copyright (c) 2000-2007 LSI Logic Corporation.
+ *
+ *
+ * Name: mpi.h
+ * Title: MPI Message independent structures and definitions
+ * Creation Date: July 27, 2000
+ *
+- * mpi.h Version: 01.05.12
++ * mpi.h Version: 01.05.13
+ *
+ * Version History
+ * ---------------
+@@ -78,6 +78,7 @@
+ * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target.
+ * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT.
+ * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT.
++ * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT.
+ * --------------------------------------------------------------------------
+ */
+
+@@ -108,7 +109,7 @@
+ /* Note: The major versions of 0xe0 through 0xff are reserved */
+
+ /* versioning for this MPI header set */
+-#define MPI_HEADER_VERSION_UNIT (0x0E)
++#define MPI_HEADER_VERSION_UNIT (0x10)
+ #define MPI_HEADER_VERSION_DEV (0x00)
+ #define MPI_HEADER_VERSION_UNIT_MASK (0xFF00)
+ #define MPI_HEADER_VERSION_UNIT_SHIFT (8)
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- * Copyright (c) 2000-2006 LSI Logic Corporation.
++ * Copyright (c) 2000-2007 LSI Logic Corporation.
+ *
+ *
+ * Name: mpi_cnfg.h
+ * Title: MPI Config message, structures, and Pages
+ * Creation Date: July 27, 2000
+ *
+- * mpi_cnfg.h Version: 01.05.13
++ * mpi_cnfg.h Version: 01.05.15
+ *
+ * Version History
+ * ---------------
+@@ -293,6 +293,21 @@
+ * Added more AccessStatus values for SAS Device Page 0.
+ * Added bit for SATA Asynchronous Notification Support in
+ * Flags field of SAS Device Page 0.
++ * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4.
++ * Added Disable SMART Polling for CapabilitiesFlags of
++ * IOC Page 6.
++ * Added Disable SMART Polling to DeviceSettings of BIOS
++ * Page 1.
++ * Added Multi-Port Domain bit for DiscoveryStatus field
++ * of SAS IO Unit Page.
++ * Added Multi-Port Domain Illegal flag for SAS IO Unit
++ * Page 1 AdditionalControlFlags field.
++ * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID
++ * Metadata bit to Manufacturing Page 4 ExtFlags field.
++ * Added Internal Connector to End Device Present bit to
++ * Expander Page 0 Flags field.
++ * Fixed define for
++ * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED.
+ * --------------------------------------------------------------------------
+ */
+
+@@ -639,7 +654,7 @@
+ U8 InfoSize1; /* 0Bh */
+ U8 InquirySize; /* 0Ch */
+ U8 Flags; /* 0Dh */
+- U16 Reserved2; /* 0Eh */
++ U16 ExtFlags; /* 0Eh */
+ U8 InquiryData[56]; /* 10h */
+ U32 ISVolumeSettings; /* 48h */
+ U32 IMEVolumeSettings; /* 4Ch */
+@@ -658,7 +673,7 @@
+ } CONFIG_PAGE_MANUFACTURING_4, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_4,
+ ManufacturingPage4_t, MPI_POINTER pManufacturingPage4_t;
+
+-#define MPI_MANUFACTURING4_PAGEVERSION (0x04)
++#define MPI_MANUFACTURING4_PAGEVERSION (0x05)
+
+ /* defines for the Flags field */
+ #define MPI_MANPAGE4_FORCE_BAD_BLOCK_TABLE (0x80)
+@@ -670,6 +685,12 @@
+ #define MPI_MANPAGE4_IM_RESYNC_CACHE_ENABLE (0x02)
+ #define MPI_MANPAGE4_IR_NO_MIX_SAS_SATA (0x01)
+
++/* defines for the ExtFlags field */
++#define MPI_MANPAGE4_EXTFLAGS_HIDE_NON_IR_METADATA (0x0008)
++#define MPI_MANPAGE4_EXTFLAGS_SAS_CACHE_DISABLE (0x0004)
++#define MPI_MANPAGE4_EXTFLAGS_SATA_CACHE_DISABLE (0x0002)
++#define MPI_MANPAGE4_EXTFLAGS_LEGACY_MODE (0x0001)
++
+
+ #ifndef MPI_MANPAGE5_NUM_FORCEWWID
+ #define MPI_MANPAGE5_NUM_FORCEWWID (1)
+@@ -781,7 +802,7 @@
+ } CONFIG_PAGE_MANUFACTURING_9, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_9,
+ ManufacturingPage9_t, MPI_POINTER pManufacturingPage9_t;
+
+-#define MPI_MANUFACTURING6_PAGEVERSION (0x00)
++#define MPI_MANUFACTURING9_PAGEVERSION (0x00)
+
+
+ typedef struct _CONFIG_PAGE_MANUFACTURING_10
+@@ -1138,6 +1159,8 @@
+
+ /* IOC Page 6 Capabilities Flags */
+
++#define MPI_IOCPAGE6_CAP_FLAGS_DISABLE_SMART_POLLING (0x00000008)
++
+ #define MPI_IOCPAGE6_CAP_FLAGS_MASK_METADATA_SIZE (0x00000006)
+ #define MPI_IOCPAGE6_CAP_FLAGS_64MB_METADATA_SIZE (0x00000000)
+ #define MPI_IOCPAGE6_CAP_FLAGS_512MB_METADATA_SIZE (0x00000002)
+@@ -1208,6 +1231,7 @@
+ #define MPI_BIOSPAGE1_IOCSET_ALTERNATE_CHS (0x00000008)
+
+ /* values for the DeviceSettings field */
++#define MPI_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING (0x00000010)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN (0x00000008)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_RM_LUN (0x00000004)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN (0x00000002)
+@@ -2281,11 +2305,11 @@
+ typedef struct _CONFIG_PAGE_RAID_VOL_1
+ {
+ CONFIG_PAGE_HEADER Header; /* 00h */
+- U8 VolumeID; /* 01h */
+- U8 VolumeBus; /* 02h */
+- U8 VolumeIOC; /* 03h */
+- U8 Reserved0; /* 04h */
+- U8 GUID[24]; /* 05h */
++ U8 VolumeID; /* 04h */
++ U8 VolumeBus; /* 05h */
++ U8 VolumeIOC; /* 06h */
++ U8 Reserved0; /* 07h */
++ U8 GUID[24]; /* 08h */
+ U8 Name[32]; /* 20h */
+ U64 WWID; /* 40h */
+ U32 Reserved1; /* 48h */
+@@ -2340,7 +2364,7 @@
+ } RAID_PHYS_DISK0_STATUS, MPI_POINTER PTR_RAID_PHYS_DISK0_STATUS,
+ RaidPhysDiskStatus_t, MPI_POINTER pRaidPhysDiskStatus_t;
+
+-/* RAID Volume 2 IM Physical Disk DiskStatus flags */
++/* RAID Physical Disk PhysDiskStatus flags */
+
+ #define MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC (0x01)
+ #define MPI_PHYSDISK0_STATUS_FLAG_QUIESCED (0x02)
+@@ -2544,6 +2568,7 @@
+ #define MPI_SAS_IOUNIT0_DS_TABLE_LINK (0x00000400)
+ #define MPI_SAS_IOUNIT0_DS_UNSUPPORTED_DEVICE (0x00000800)
+ #define MPI_SAS_IOUNIT0_DS_MAX_SATA_TARGETS (0x00001000)
++#define MPI_SAS_IOUNIT0_DS_MULTI_PORT_DOMAIN (0x00002000)
+
+
+ typedef struct _MPI_SAS_IO_UNIT1_PHY_DATA
+@@ -2607,6 +2632,7 @@
+ #define MPI_SAS_IOUNIT1_CONTROL_CLEAR_AFFILIATION (0x0001)
+
+ /* values for SAS IO Unit Page 1 AdditionalControlFlags */
++#define MPI_SAS_IOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL (0x0080)
+ #define MPI_SAS_IOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION (0x0040)
+ #define MPI_SAS_IOUNIT1_ACONTROL_HIDE_NONZERO_ATTACHED_PHY_IDENT (0x0020)
+ #define MPI_SAS_IOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET (0x0010)
+@@ -2734,6 +2760,7 @@
+ #define MPI_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE (0x00000800)
+
+ /* values for SAS Expander Page 0 Flags field */
++#define MPI_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE (0x04)
+ #define MPI_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG (0x02)
+ #define MPI_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS (0x01)
+
+@@ -2774,7 +2801,7 @@
+ /* see mpi_sas.h for values for SAS Expander Page 1 AttachedDeviceInfo values */
+
+ /* values for SAS Expander Page 1 DiscoveryInfo field */
+-#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY DISABLED (0x04)
++#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED (0x04)
+ #define MPI_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE (0x02)
+ #define MPI_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES (0x01)
+
+@@ -2895,11 +2922,11 @@
+ U8 AttachedPhyIdentifier; /* 16h */
+ U8 Reserved2; /* 17h */
+ U32 AttachedDeviceInfo; /* 18h */
+- U8 ProgrammedLinkRate; /* 20h */
+- U8 HwLinkRate; /* 21h */
+- U8 ChangeCount; /* 22h */
+- U8 Flags; /* 23h */
+- U32 PhyInfo; /* 24h */
++ U8 ProgrammedLinkRate; /* 1Ch */
++ U8 HwLinkRate; /* 1Dh */
++ U8 ChangeCount; /* 1Eh */
++ U8 Flags; /* 1Fh */
++ U32 PhyInfo; /* 20h */
+ } CONFIG_PAGE_SAS_PHY_0, MPI_POINTER PTR_CONFIG_PAGE_SAS_PHY_0,
+ SasPhyPage0_t, MPI_POINTER pSasPhyPage0_t;
+
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt 2007-12-21 15:36:12.000000000 -0500
+@@ -3,28 +3,28 @@
+ MPI Header File Change History
+ ==============================
+
+- Copyright (c) 2000-2006 LSI Logic Corporation.
++ Copyright (c) 2000-2007 LSI Logic Corporation.
+
+ ---------------------------------------
+- Header Set Release Version: 01.05.14
+- Header Set Release Date: 10-11-06
++ Header Set Release Version: 01.05.16
++ Header Set Release Date: 05-24-07
+ ---------------------------------------
+
+ Filename Current version Prior version
+ ---------- --------------- -------------
+- mpi.h 01.05.12 01.05.11
+- mpi_ioc.h 01.05.12 01.05.11
+- mpi_cnfg.h 01.05.13 01.05.12
+- mpi_init.h 01.05.08 01.05.07
++ mpi.h 01.05.13 01.05.12
++ mpi_ioc.h 01.05.14 01.05.13
++ mpi_cnfg.h 01.05.15 01.05.14
++ mpi_init.h 01.05.09 01.05.09
+ mpi_targ.h 01.05.06 01.05.06
+ mpi_fc.h 01.05.01 01.05.01
+ mpi_lan.h 01.05.01 01.05.01
+- mpi_raid.h 01.05.02 01.05.02
++ mpi_raid.h 01.05.03 01.05.03
+ mpi_tool.h 01.05.03 01.05.03
+ mpi_inb.h 01.05.01 01.05.01
+- mpi_sas.h 01.05.04 01.05.03
++ mpi_sas.h 01.05.04 01.05.04
+ mpi_type.h 01.05.02 01.05.02
+- mpi_history.txt 01.05.14 01.05.13
++ mpi_history.txt 01.05.14 01.05.14
+
+
+ * Date Version Description
+@@ -95,6 +95,7 @@
+ * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target.
+ * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT.
+ * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT.
++ * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT.
+ * --------------------------------------------------------------------------
+
+ mpi_ioc.h
+@@ -191,6 +192,13 @@
+ * data structure.
+ * Added new ImageType values for FWDownload and FWUpload
+ * requests.
++ * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS
++ * Broadcast Event Data (replacing _RESERVED2).
++ * For Discovery Error Event Data DiscoveryStatus field,
++ * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and
++ * added _MULTI_PORT_DOMAIN.
++ * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request.
++ * Added Common Boot Block type to FWUpload Request.
+ * --------------------------------------------------------------------------
+
+ mpi_cnfg.h
+@@ -473,6 +481,21 @@
+ * Added more AccessStatus values for SAS Device Page 0.
+ * Added bit for SATA Asynchronous Notification Support in
+ * Flags field of SAS Device Page 0.
++ * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4.
++ * Added Disable SMART Polling for CapabilitiesFlags of
++ * IOC Page 6.
++ * Added Disable SMART Polling to DeviceSettings of BIOS
++ * Page 1.
++ * Added Multi-Port Domain bit for DiscoveryStatus field
++ * of SAS IO Unit Page.
++ * Added Multi-Port Domain Illegal flag for SAS IO Unit
++ * Page 1 AdditionalControlFlags field.
++ * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID
++ * Metadata bit to Manufacturing Page 4 ExtFlags field.
++ * Added Internal Connector to End Device Present bit to
++ * Expander Page 0 Flags field.
++ * Fixed define for
++ * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED.
+ * --------------------------------------------------------------------------
+
+ mpi_init.h
+@@ -517,6 +540,8 @@
+ * unique in the first 32 characters.
+ * 03-27-06 01.05.07 Added Task Management type of Clear ACA.
+ * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA.
++ * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management
++ * Request: Do Not Send Task IU and Soft Reset Option.
+ * --------------------------------------------------------------------------
+
+ mpi_targ.h
+@@ -571,7 +596,7 @@
+ * 11-02-00 01.01.01 Original release for post 1.0 work
+ * 12-04-00 01.01.02 Added messages for Common Transport Send and
+ * Primitive Send.
+- * 01-09-01 01.01.03 Modified some of the new flags to have an MPI prefix
++ * 01-09-01 01.01.03 Modifed some of the new flags to have an MPI prefix
+ * and modified the FcPrimitiveSend flags.
+ * 01-25-01 01.01.04 Move InitiatorIndex in LinkServiceRsp reply to a larger
+ * field.
+@@ -634,6 +659,8 @@
+ * 08-19-04 01.05.01 Original release for MPI v1.5.
+ * 01-15-05 01.05.02 Added defines for the two new RAID Actions for
+ * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE.
++ * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and
++ * associated defines.
+ * --------------------------------------------------------------------------
+
+ mpi_tool.h
+@@ -682,7 +709,22 @@
+
+ mpi_history.txt Parts list history
+
+-Filename 01.05.13 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09
++Filename 01.05.15 01.05.15
++---------- -------- --------
++mpi.h 01.05.12 01.05.13
++mpi_ioc.h 01.05.13 01.05.14
++mpi_cnfg.h 01.05.14 01.05.15
++mpi_init.h 01.05.09 01.05.09
++mpi_targ.h 01.05.06 01.05.06
++mpi_fc.h 01.05.01 01.05.01
++mpi_lan.h 01.05.01 01.05.01
++mpi_raid.h 01.05.03 01.05.03
++mpi_tool.h 01.05.03 01.05.03
++mpi_inb.h 01.05.01 01.05.01
++mpi_sas.h 01.05.04 01.05.04
++mpi_type.h 01.05.02 01.05.02
++
++Filename 01.05.14 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09
+ ---------- -------- -------- -------- -------- -------- --------
+ mpi.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.08 01.05.07
+ mpi_ioc.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.09 01.05.08
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,221 +0,0 @@
+-/*
+- * Copyright (c) 2003-2004 LSI Logic Corporation.
+- *
+- *
+- * Name: mpi_inb.h
+- * Title: MPI Inband structures and definitions
+- * Creation Date: September 30, 2003
+- *
+- * mpi_inb.h Version: 01.05.01
+- *
+- * Version History
+- * ---------------
+- *
+- * Date Version Description
+- * -------- -------- ------------------------------------------------------
+- * 05-11-04 01.03.01 Original release.
+- * 08-19-04 01.05.01 Original release for MPI v1.5.
+- * --------------------------------------------------------------------------
+- */
+-
+-#ifndef MPI_INB_H
+-#define MPI_INB_H
+-
+-/******************************************************************************
+-*
+-* I n b a n d M e s s a g e s
+-*
+-*******************************************************************************/
+-
+-
+-/****************************************************************************/
+-/* Inband Buffer Post Request */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_BUFFER_POST_REQUEST
+-{
+- U8 Reserved1; /* 00h */
+- U8 BufferCount; /* 01h */
+- U8 ChainOffset; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U32 Reserved4; /* 0Ch */
+- SGE_TRANS_SIMPLE_UNION SGL; /* 10h */
+-} MSG_INBAND_BUFFER_POST_REQUEST, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REQUEST,
+- MpiInbandBufferPostRequest_t , MPI_POINTER pMpiInbandBufferPostRequest_t;
+-
+-
+-typedef struct _WWN_FC_FORMAT
+-{
+- U64 NodeName; /* 00h */
+- U64 PortName; /* 08h */
+-} WWN_FC_FORMAT, MPI_POINTER PTR_WWN_FC_FORMAT,
+- WwnFcFormat_t, MPI_POINTER pWwnFcFormat_t;
+-
+-typedef struct _WWN_SAS_FORMAT
+-{
+- U64 WorldWideID; /* 00h */
+- U32 Reserved1; /* 08h */
+- U32 Reserved2; /* 0Ch */
+-} WWN_SAS_FORMAT, MPI_POINTER PTR_WWN_SAS_FORMAT,
+- WwnSasFormat_t, MPI_POINTER pWwnSasFormat_t;
+-
+-typedef union _WWN_INBAND_FORMAT
+-{
+- WWN_FC_FORMAT Fc;
+- WWN_SAS_FORMAT Sas;
+-} WWN_INBAND_FORMAT, MPI_POINTER PTR_WWN_INBAND_FORMAT,
+- WwnInbandFormat, MPI_POINTER pWwnInbandFormat;
+-
+-
+-/* Inband Buffer Post reply message */
+-
+-typedef struct _MSG_INBAND_BUFFER_POST_REPLY
+-{
+- U16 Reserved1; /* 00h */
+- U8 MsgLength; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U16 Reserved4; /* 0Ch */
+- U16 IOCStatus; /* 0Eh */
+- U32 IOCLogInfo; /* 10h */
+- U32 TransferLength; /* 14h */
+- U32 TransactionContext; /* 18h */
+- WWN_INBAND_FORMAT Wwn; /* 1Ch */
+- U32 IOCIdentifier[4]; /* 2Ch */
+-} MSG_INBAND_BUFFER_POST_REPLY, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REPLY,
+- MpiInbandBufferPostReply_t, MPI_POINTER pMpiInbandBufferPostReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Send Request */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_SEND_REQUEST
+-{
+- U16 Reserved1; /* 00h */
+- U8 ChainOffset; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U32 Reserved4; /* 0Ch */
+- WWN_INBAND_FORMAT Wwn; /* 10h */
+- U32 Reserved5; /* 20h */
+- SGE_IO_UNION SGL; /* 24h */
+-} MSG_INBAND_SEND_REQUEST, MPI_POINTER PTR_MSG_INBAND_SEND_REQUEST,
+- MpiInbandSendRequest_t , MPI_POINTER pMpiInbandSendRequest_t;
+-
+-
+-/* Inband Send reply message */
+-
+-typedef struct _MSG_INBAND_SEND_REPLY
+-{
+- U16 Reserved1; /* 00h */
+- U8 MsgLength; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U16 Reserved4; /* 0Ch */
+- U16 IOCStatus; /* 0Eh */
+- U32 IOCLogInfo; /* 10h */
+- U32 ResponseLength; /* 14h */
+-} MSG_INBAND_SEND_REPLY, MPI_POINTER PTR_MSG_INBAND_SEND_REPLY,
+- MpiInbandSendReply_t, MPI_POINTER pMpiInbandSendReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Response Request */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_RSP_REQUEST
+-{
+- U16 Reserved1; /* 00h */
+- U8 ChainOffset; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U32 Reserved4; /* 0Ch */
+- WWN_INBAND_FORMAT Wwn; /* 10h */
+- U32 IOCIdentifier[4]; /* 20h */
+- U32 ResponseLength; /* 30h */
+- SGE_IO_UNION SGL; /* 34h */
+-} MSG_INBAND_RSP_REQUEST, MPI_POINTER PTR_MSG_INBAND_RSP_REQUEST,
+- MpiInbandRspRequest_t , MPI_POINTER pMpiInbandRspRequest_t;
+-
+-
+-/* Inband Response reply message */
+-
+-typedef struct _MSG_INBAND_RSP_REPLY
+-{
+- U16 Reserved1; /* 00h */
+- U8 MsgLength; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U16 Reserved4; /* 0Ch */
+- U16 IOCStatus; /* 0Eh */
+- U32 IOCLogInfo; /* 10h */
+-} MSG_INBAND_RSP_REPLY, MPI_POINTER PTR_MSG_INBAND_RSP_REPLY,
+- MpiInbandRspReply_t, MPI_POINTER pMpiInbandRspReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Abort Request */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_ABORT_REQUEST
+-{
+- U8 Reserved1; /* 00h */
+- U8 AbortType; /* 01h */
+- U8 ChainOffset; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U32 Reserved4; /* 0Ch */
+- U32 ContextToAbort; /* 10h */
+-} MSG_INBAND_ABORT_REQUEST, MPI_POINTER PTR_MSG_INBAND_ABORT_REQUEST,
+- MpiInbandAbortRequest_t , MPI_POINTER pMpiInbandAbortRequest_t;
+-
+-#define MPI_INBAND_ABORT_TYPE_ALL_BUFFERS (0x00)
+-#define MPI_INBAND_ABORT_TYPE_EXACT_BUFFER (0x01)
+-#define MPI_INBAND_ABORT_TYPE_SEND_REQUEST (0x02)
+-#define MPI_INBAND_ABORT_TYPE_RESPONSE_REQUEST (0x03)
+-
+-
+-/* Inband Abort reply message */
+-
+-typedef struct _MSG_INBAND_ABORT_REPLY
+-{
+- U8 Reserved1; /* 00h */
+- U8 AbortType; /* 01h */
+- U8 MsgLength; /* 02h */
+- U8 Function; /* 03h */
+- U16 Reserved2; /* 04h */
+- U8 Reserved3; /* 06h */
+- U8 MsgFlags; /* 07h */
+- U32 MsgContext; /* 08h */
+- U16 Reserved4; /* 0Ch */
+- U16 IOCStatus; /* 0Eh */
+- U32 IOCLogInfo; /* 10h */
+-} MSG_INBAND_ABORT_REPLY, MPI_POINTER PTR_MSG_INBAND_ABORT_REPLY,
+- MpiInbandAbortReply_t, MPI_POINTER pMpiInbandAbortReply_t;
+-
+-
+-#endif
+-
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- * Copyright (c) 2000-2006 LSI Logic Corporation.
++ * Copyright (c) 2000-2007 LSI Logic Corporation.
+ *
+ *
+ * Name: mpi_init.h
+ * Title: MPI initiator mode messages and structures
+ * Creation Date: June 8, 2000
+ *
+- * mpi_init.h Version: 01.05.08
++ * mpi_init.h Version: 01.05.09
+ *
+ * Version History
+ * ---------------
+@@ -54,6 +54,8 @@
+ * unique in the first 32 characters.
+ * 03-27-06 01.05.07 Added Task Management type of Clear ACA.
+ * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA.
++ * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management
++ * Request: Do Not Send Task IU and Soft Reset Option.
+ * --------------------------------------------------------------------------
+ */
+
+@@ -432,10 +434,14 @@
+ #define MPI_SCSITASKMGMT_TASKTYPE_CLR_ACA (0x08)
+
+ /* MsgFlags bits */
++#define MPI_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU (0x01)
++
+ #define MPI_SCSITASKMGMT_MSGFLAGS_TARGET_RESET_OPTION (0x00)
+ #define MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION (0x02)
+ #define MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION (0x04)
+
++#define MPI_SCSITASKMGMT_MSGFLAGS_SOFT_RESET_OPTION (0x08)
++
+ /* SCSI Task Management Reply */
+ typedef struct _MSG_SCSI_TASK_MGMT_REPLY
+ {
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- * Copyright (c) 2000-2006 LSI Logic Corporation.
++ * Copyright (c) 2000-2007 LSI Logic Corporation.
+ *
+ *
+ * Name: mpi_ioc.h
+ * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages
+ * Creation Date: August 11, 2000
+ *
+- * mpi_ioc.h Version: 01.05.12
++ * mpi_ioc.h Version: 01.05.14
+ *
+ * Version History
+ * ---------------
+@@ -106,6 +106,13 @@
+ * data structure.
+ * Added new ImageType values for FWDownload and FWUpload
+ * requests.
++ * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS
++ * Broadcast Event Data (replacing _RESERVED2).
++ * For Discovery Error Event Data DiscoveryStatus field,
++ * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and
++ * added _MULTI_PORT_DOMAIN.
++ * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request.
++ * Added Common Boot Block type to FWUpload Request.
+ * --------------------------------------------------------------------------
+ */
+
+@@ -792,7 +799,7 @@
+
+ #define MPI_EVENT_PRIMITIVE_CHANGE (0x01)
+ #define MPI_EVENT_PRIMITIVE_EXPANDER (0x03)
+-#define MPI_EVENT_PRIMITIVE_RESERVED2 (0x04)
++#define MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT (0x04)
+ #define MPI_EVENT_PRIMITIVE_RESERVED3 (0x05)
+ #define MPI_EVENT_PRIMITIVE_RESERVED4 (0x06)
+ #define MPI_EVENT_PRIMITIVE_CHANGE0_RESERVED (0x07)
+@@ -857,8 +864,9 @@
+ #define MPI_EVENT_DSCVRY_ERR_DS_SMP_CRC_ERROR (0x00000100)
+ #define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_SUBTRACTIVE (0x00000200)
+ #define MPI_EVENT_DSCVRY_ERR_DS_TABLE_TO_TABLE (0x00000400)
+-#define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_PATHS (0x00000800)
++#define MPI_EVENT_DSCVRY_ERR_DS_UNSUPPORTED_DEVICE (0x00000800)
+ #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS (0x00001000)
++#define MPI_EVENT_DSCVRY_ERR_DS_MULTI_PORT_DOMAIN (0x00002000)
+
+ /* SAS SMP Error Event data */
+
+@@ -990,6 +998,7 @@
+ #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_1 (0x07)
+ #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_2 (0x08)
+ #define MPI_FW_DOWNLOAD_ITYPE_MEGARAID (0x09)
++#define MPI_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B)
+
+
+ typedef struct _FWDownloadTCSGE
+@@ -1049,6 +1058,7 @@
+ #define MPI_FW_UPLOAD_ITYPE_CONFIG_2 (0x08)
+ #define MPI_FW_UPLOAD_ITYPE_MEGARAID (0x09)
+ #define MPI_FW_UPLOAD_ITYPE_COMPLETE (0x0A)
++#define MPI_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B)
+
+ typedef struct _FWUploadTCSGE
+ {
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- * Copyright (c) 2001-2005 LSI Logic Corporation.
++ * Copyright (c) 2001-2007 LSI Logic Corporation.
+ *
+ *
+ * Name: mpi_raid.h
+ * Title: MPI RAID message and structures
+ * Creation Date: February 27, 2001
+ *
+- * mpi_raid.h Version: 01.05.02
++ * mpi_raid.h Version: 01.05.03
+ *
+ * Version History
+ * ---------------
+@@ -32,6 +32,8 @@
+ * 08-19-04 01.05.01 Original release for MPI v1.5.
+ * 01-15-05 01.05.02 Added defines for the two new RAID Actions for
+ * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE.
++ * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and
++ * associated defines.
+ * --------------------------------------------------------------------------
+ */
+
+@@ -90,6 +92,7 @@
+ #define MPI_RAID_ACTION_INACTIVATE_VOLUME (0x12)
+ #define MPI_RAID_ACTION_SET_RESYNC_RATE (0x13)
+ #define MPI_RAID_ACTION_SET_DATA_SCRUB_RATE (0x14)
++#define MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE (0x15)
+
+ /* ActionDataWord defines for use with MPI_RAID_ACTION_CREATE_VOLUME action */
+ #define MPI_RAID_ACTION_ADATA_DO_NOT_SYNC (0x00000001)
+@@ -111,6 +114,10 @@
+ /* ActionDataWord defines for use with MPI_RAID_ACTION_SET_DATA_SCRUB_RATE action */
+ #define MPI_RAID_ACTION_ADATA_DATA_SCRUB_RATE_MASK (0x000000FF)
+
++/* ActionDataWord defines for use with MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */
++#define MPI_RAID_ACTION_ADATA_ENABLE_FW_UPDATE (0x00000001)
++#define MPI_RAID_ACTION_ADATA_MASK_FW_UPDATE_TIMEOUT (0x0000FF00)
++#define MPI_RAID_ACTION_ADATA_SHIFT_FW_UPDATE_TIMEOUT (8)
+
+
+ /* RAID Action reply message */
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.c linux-2.6.22-591/drivers/message/fusion/mptbase.c
+--- linux-2.6.22-570/drivers/message/fusion/mptbase.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptbase.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,7 +6,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -64,6 +64,7 @@
+ #endif
+
+ #include "mptbase.h"
++#include "lsi/mpi_log_fc.h"
+
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+ #define my_NAME "Fusion MPT base driver"
+@@ -6349,14 +6350,37 @@
+ static void
+ mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info)
+ {
+- static char *subcl_str[8] = {
+- "FCP Initiator", "FCP Target", "LAN", "MPI Message Layer",
+- "FC Link", "Context Manager", "Invalid Field Offset", "State Change Info"
+- };
+- u8 subcl = (log_info >> 24) & 0x7;
++ char *desc = "unknown";
++
++ switch (log_info & 0xFF000000) {
++ case MPI_IOCLOGINFO_FC_INIT_BASE:
++ desc = "FCP Initiator";
++ break;
++ case MPI_IOCLOGINFO_FC_TARGET_BASE:
++ desc = "FCP Target";
++ break;
++ case MPI_IOCLOGINFO_FC_LAN_BASE:
++ desc = "LAN";
++ break;
++ case MPI_IOCLOGINFO_FC_MSG_BASE:
++ desc = "MPI Message Layer";
++ break;
++ case MPI_IOCLOGINFO_FC_LINK_BASE:
++ desc = "FC Link";
++ break;
++ case MPI_IOCLOGINFO_FC_CTX_BASE:
++ desc = "Context Manager";
++ break;
++ case MPI_IOCLOGINFO_FC_INVALID_FIELD_BYTE_OFFSET:
++ desc = "Invalid Field Offset";
++ break;
++ case MPI_IOCLOGINFO_FC_STATE_CHANGE:
++ desc = "State Change Info";
++ break;
++ }
+
+- printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubCl={%s}\n",
+- ioc->name, log_info, subcl_str[subcl]);
++ printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubClass={%s}, Value=(0x%06x)\n",
++ ioc->name, log_info, desc, (log_info & 0xFFFFFF));
+ }
+
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.h linux-2.6.22-591/drivers/message/fusion/mptbase.h
+--- linux-2.6.22-570/drivers/message/fusion/mptbase.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptbase.h 2007-12-21 15:36:12.000000000 -0500
+@@ -6,7 +6,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -75,8 +75,8 @@
+ #define COPYRIGHT "Copyright (c) 1999-2007 " MODULEAUTHOR
+ #endif
+
+-#define MPT_LINUX_VERSION_COMMON "3.04.04"
+-#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.04"
++#define MPT_LINUX_VERSION_COMMON "3.04.05"
++#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.05"
+ #define WHAT_MAGIC_STRING "@" "(" "#" ")"
+
+ #define show_mptmod_ver(s,ver) \
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.c linux-2.6.22-591/drivers/message/fusion/mptctl.c
+--- linux-2.6.22-570/drivers/message/fusion/mptctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptctl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -5,7 +5,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.h linux-2.6.22-591/drivers/message/fusion/mptctl.h
+--- linux-2.6.22-570/drivers/message/fusion/mptctl.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptctl.h 2007-12-21 15:36:12.000000000 -0500
+@@ -6,7 +6,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptfc.c linux-2.6.22-591/drivers/message/fusion/mptfc.c
+--- linux-2.6.22-570/drivers/message/fusion/mptfc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptfc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -4,7 +4,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -43,7 +43,6 @@
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+-#include "linux_compat.h" /* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.c linux-2.6.22-591/drivers/message/fusion/mptlan.c
+--- linux-2.6.22-570/drivers/message/fusion/mptlan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptlan.c 2007-12-21 15:36:12.000000000 -0500
+@@ -5,7 +5,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 2000-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.h linux-2.6.22-591/drivers/message/fusion/mptlan.h
+--- linux-2.6.22-570/drivers/message/fusion/mptlan.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptlan.h 2007-12-21 15:36:12.000000000 -0500
+@@ -5,7 +5,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 2000-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptsas.c linux-2.6.22-591/drivers/message/fusion/mptsas.c
+--- linux-2.6.22-570/drivers/message/fusion/mptsas.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptsas.c 2007-12-21 15:36:12.000000000 -0500
+@@ -4,7 +4,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ * Copyright (c) 2005-2007 Dell
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.c linux-2.6.22-591/drivers/message/fusion/mptscsih.c
+--- linux-2.6.22-570/drivers/message/fusion/mptscsih.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptscsih.c 2007-12-21 15:36:12.000000000 -0500
+@@ -4,7 +4,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -44,7 +44,6 @@
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+
+-#include "linux_compat.h" /* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+@@ -260,30 +259,13 @@
+ /* Map the data portion, if any.
+ * sges_left = 0 if no data transfer.
+ */
+- if ( (sges_left = SCpnt->use_sg) ) {
+- sges_left = pci_map_sg(ioc->pcidev,
+- (struct scatterlist *) SCpnt->request_buffer,
+- SCpnt->use_sg,
+- SCpnt->sc_data_direction);
+- if (sges_left == 0)
++ sges_left = scsi_dma_map(SCpnt);
++ if (sges_left < 0)
+ return FAILED;
+- } else if (SCpnt->request_bufflen) {
+- SCpnt->SCp.dma_handle = pci_map_single(ioc->pcidev,
+- SCpnt->request_buffer,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- dsgprintk((MYIOC_s_INFO_FMT "SG: non-SG for %p, len=%d\n",
+- ioc->name, SCpnt, SCpnt->request_bufflen));
+- mptscsih_add_sge((char *) &pReq->SGL,
+- 0xD1000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|SCpnt->request_bufflen,
+- SCpnt->SCp.dma_handle);
+-
+- return SUCCESS;
+- }
+
+ /* Handle the SG case.
+ */
+- sg = (struct scatterlist *) SCpnt->request_buffer;
++ sg = scsi_sglist(SCpnt);
+ sg_done = 0;
+ sgeOffset = sizeof(SCSIIORequest_t) - sizeof(SGE_IO_UNION);
+ chainSge = NULL;
+@@ -465,7 +447,12 @@
+ MPT_FRAME_HDR *mf;
+ SEPRequest_t *SEPMsg;
+
+- if (ioc->bus_type == FC)
++ if (ioc->bus_type != SAS)
++ return;
++
++ /* Not supported for hidden raid components
++ */
++ if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
+ return;
+
+ if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
+@@ -662,7 +649,7 @@
+ scsi_state = pScsiReply->SCSIState;
+ scsi_status = pScsiReply->SCSIStatus;
+ xfer_cnt = le32_to_cpu(pScsiReply->TransferCount);
+- sc->resid = sc->request_bufflen - xfer_cnt;
++ scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt);
+ log_info = le32_to_cpu(pScsiReply->IOCLogInfo);
+
+ /*
+@@ -767,7 +754,7 @@
+ break;
+
+ case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH: /* 0x0049 */
+- sc->resid = sc->request_bufflen - xfer_cnt;
++ scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt);
+ if((xfer_cnt==0)||(sc->underflow > xfer_cnt))
+ sc->result=DID_SOFT_ERROR << 16;
+ else /* Sufficient data transfer occurred */
+@@ -816,7 +803,7 @@
+ break;
+
+ case MPI_IOCSTATUS_SCSI_DATA_OVERRUN: /* 0x0044 */
+- sc->resid=0;
++ scsi_set_resid(sc, 0);
+ case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR: /* 0x0040 */
+ case MPI_IOCSTATUS_SUCCESS: /* 0x0000 */
+ sc->result = (DID_OK << 16) | scsi_status;
+@@ -900,22 +887,17 @@
+
+ dreplyprintk(("%s: [%d:%d:%d:%d] resid=%d "
+ "bufflen=%d xfer_cnt=%d\n", __FUNCTION__,
+- sc->device->host->host_no, sc->device->channel, sc->device->id,
+- sc->device->lun, sc->resid, sc->request_bufflen,
+- xfer_cnt));
++ sc->device->host->host_no,
++ sc->device->channel, sc->device->id,
++ sc->device->lun, scsi_get_resid(sc),
++ scsi_bufflen(sc), xfer_cnt));
+ }
+ #endif
+
+ } /* end of address reply case */
+
+ /* Unmap the DMA buffers, if any. */
+- if (sc->use_sg) {
+- pci_unmap_sg(ioc->pcidev, (struct scatterlist *) sc->request_buffer,
+- sc->use_sg, sc->sc_data_direction);
+- } else if (sc->request_bufflen) {
+- pci_unmap_single(ioc->pcidev, sc->SCp.dma_handle,
+- sc->request_bufflen, sc->sc_data_direction);
+- }
++ scsi_dma_unmap(sc);
+
+ sc->scsi_done(sc); /* Issue the command callback */
+
+@@ -970,17 +952,8 @@
+ /* Set status, free OS resources (SG DMA buffers)
+ * Do OS callback
+ */
+- if (SCpnt->use_sg) {
+- pci_unmap_sg(ioc->pcidev,
+- (struct scatterlist *) SCpnt->request_buffer,
+- SCpnt->use_sg,
+- SCpnt->sc_data_direction);
+- } else if (SCpnt->request_bufflen) {
+- pci_unmap_single(ioc->pcidev,
+- SCpnt->SCp.dma_handle,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- }
++ scsi_dma_unmap(SCpnt);
++
+ SCpnt->result = DID_RESET << 16;
+ SCpnt->host_scribble = NULL;
+
+@@ -1023,14 +996,19 @@
+ mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii);
+ if (mf == NULL)
+ continue;
++ /* If the device is a hidden raid component, then its
++ * expected that the mf->function will be RAID_SCSI_IO
++ */
++ if (vdevice->vtarget->tflags &
++ MPT_TARGET_FLAGS_RAID_COMPONENT && mf->Function !=
++ MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)
++ continue;
++
+ int_to_scsilun(vdevice->lun, &lun);
+ if ((mf->Bus != vdevice->vtarget->channel) ||
+ (mf->TargetID != vdevice->vtarget->id) ||
+ memcmp(lun.scsi_lun, mf->LUN, 8))
+ continue;
+- dsprintk(( "search_running: found (sc=%p, mf = %p) "
+- "channel %d id %d, lun %d \n", hd->ScsiLookup[ii],
+- mf, mf->Bus, mf->TargetID, vdevice->lun));
+
+ /* Cleanup
+ */
+@@ -1039,19 +1017,12 @@
+ mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf);
+ if ((unsigned char *)mf != sc->host_scribble)
+ continue;
+- if (sc->use_sg) {
+- pci_unmap_sg(hd->ioc->pcidev,
+- (struct scatterlist *) sc->request_buffer,
+- sc->use_sg,
+- sc->sc_data_direction);
+- } else if (sc->request_bufflen) {
+- pci_unmap_single(hd->ioc->pcidev,
+- sc->SCp.dma_handle,
+- sc->request_bufflen,
+- sc->sc_data_direction);
+- }
++ scsi_dma_unmap(sc);
+ sc->host_scribble = NULL;
+ sc->result = DID_NO_CONNECT << 16;
++ dsprintk(( "search_running: found (sc=%p, mf = %p) "
++ "channel %d id %d, lun %d \n", sc, mf,
++ vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun));
+ sc->scsi_done(sc);
+ }
+ }
+@@ -1380,10 +1351,10 @@
+ * will be no data transfer! GRRRRR...
+ */
+ if (SCpnt->sc_data_direction == DMA_FROM_DEVICE) {
+- datalen = SCpnt->request_bufflen;
++ datalen = scsi_bufflen(SCpnt);
+ scsidir = MPI_SCSIIO_CONTROL_READ; /* DATA IN (host<--ioc<--dev) */
+ } else if (SCpnt->sc_data_direction == DMA_TO_DEVICE) {
+- datalen = SCpnt->request_bufflen;
++ datalen = scsi_bufflen(SCpnt);
+ scsidir = MPI_SCSIIO_CONTROL_WRITE; /* DATA OUT (host-->ioc-->dev) */
+ } else {
+ datalen = 0;
+@@ -1768,20 +1739,45 @@
+ u32 ctx2abort;
+ int scpnt_idx;
+ int retval;
+- VirtDevice *vdev;
++ VirtDevice *vdevice;
+ ulong sn = SCpnt->serial_number;
++ MPT_ADAPTER *ioc;
+
+ /* If we can't locate our host adapter structure, return FAILED status.
+ */
+ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) {
+ SCpnt->result = DID_RESET << 16;
+ SCpnt->scsi_done(SCpnt);
+- dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: "
+- "Can't locate host! (sc=%p)\n",
+- SCpnt));
++ dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: Can't locate "
++ "host! (sc=%p)\n", SCpnt));
+ return FAILED;
+ }
+
++ ioc = hd->ioc;
++ printk(MYIOC_s_INFO_FMT "attempting task abort! (sc=%p)\n",
++ ioc->name, SCpnt);
++ scsi_print_command(SCpnt);
++
++ vdevice = SCpnt->device->hostdata;
++ if (!vdevice || !vdevice->vtarget) {
++ dtmprintk((MYIOC_s_DEBUG_FMT "task abort: device has been "
++ "deleted (sc=%p)\n", ioc->name, SCpnt));
++ SCpnt->result = DID_NO_CONNECT << 16;
++ SCpnt->scsi_done(SCpnt);
++ retval = 0;
++ goto out;
++ }
++
++ /* Task aborts are not supported for hidden raid components.
++ */
++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
++ dtmprintk((MYIOC_s_DEBUG_FMT "task abort: hidden raid "
++ "component (sc=%p)\n", ioc->name, SCpnt));
++ SCpnt->result = DID_RESET << 16;
++ retval = FAILED;
++ goto out;
++ }
++
+ /* Find this command
+ */
+ if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) {
+@@ -1790,21 +1786,20 @@
+ */
+ SCpnt->result = DID_RESET << 16;
+ dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: "
+- "Command not in the active list! (sc=%p)\n",
+- hd->ioc->name, SCpnt));
+- return SUCCESS;
++ "Command not in the active list! (sc=%p)\n", ioc->name,
++ SCpnt));
++ retval = 0;
++ goto out;
+ }
+
+- if (hd->resetPending)
+- return FAILED;
++ if (hd->resetPending) {
++ retval = FAILED;
++ goto out;
++ }
+
+ if (hd->timeouts < -1)
+ hd->timeouts++;
+
+- printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n",
+- hd->ioc->name, SCpnt);
+- scsi_print_command(SCpnt);
+-
+ /* Most important! Set TaskMsgContext to SCpnt's MsgContext!
+ * (the IO to be ABORT'd)
+ *
+@@ -1817,18 +1812,17 @@
+
+ hd->abortSCpnt = SCpnt;
+
+- vdev = SCpnt->device->hostdata;
+ retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
+- vdev->vtarget->channel, vdev->vtarget->id, vdev->lun,
+- ctx2abort, mptscsih_get_tm_timeout(hd->ioc));
++ vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun,
++ ctx2abort, mptscsih_get_tm_timeout(ioc));
+
+ if (SCPNT_TO_LOOKUP_IDX(SCpnt) == scpnt_idx &&
+ SCpnt->serial_number == sn)
+ retval = FAILED;
+
+- printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n",
+- hd->ioc->name,
+- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++ out:
++ printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n",
++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+ if (retval == 0)
+ return SUCCESS;
+@@ -1850,32 +1844,47 @@
+ {
+ MPT_SCSI_HOST *hd;
+ int retval;
+- VirtDevice *vdev;
++ VirtDevice *vdevice;
++ MPT_ADAPTER *ioc;
+
+ /* If we can't locate our host adapter structure, return FAILED status.
+ */
+ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+- dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: "
+- "Can't locate host! (sc=%p)\n",
+- SCpnt));
++ dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: Can't "
++ "locate host! (sc=%p)\n", SCpnt));
+ return FAILED;
+ }
+
+- if (hd->resetPending)
+- return FAILED;
+-
+- printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n",
+- hd->ioc->name, SCpnt);
++ ioc = hd->ioc;
++ printk(MYIOC_s_INFO_FMT "attempting target reset! (sc=%p)\n",
++ ioc->name, SCpnt);
+ scsi_print_command(SCpnt);
+
+- vdev = SCpnt->device->hostdata;
++ if (hd->resetPending) {
++ retval = FAILED;
++ goto out;
++ }
++
++ vdevice = SCpnt->device->hostdata;
++ if (!vdevice || !vdevice->vtarget) {
++ retval = 0;
++ goto out;
++ }
++
++ /* Target reset to hidden raid component is not supported
++ */
++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
++ retval = FAILED;
++ goto out;
++ }
++
+ retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+- vdev->vtarget->channel, vdev->vtarget->id,
+- 0, 0, mptscsih_get_tm_timeout(hd->ioc));
++ vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0,
++ mptscsih_get_tm_timeout(ioc));
+
+- printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n",
+- hd->ioc->name,
+- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++ out:
++ printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n",
++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+ if (retval == 0)
+ return SUCCESS;
+@@ -1899,18 +1908,19 @@
+ MPT_SCSI_HOST *hd;
+ int retval;
+ VirtDevice *vdev;
++ MPT_ADAPTER *ioc;
+
+ /* If we can't locate our host adapter structure, return FAILED status.
+ */
+ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+- dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: "
+- "Can't locate host! (sc=%p)\n",
+- SCpnt ) );
++ dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: Can't "
++ "locate host! (sc=%p)\n", SCpnt ));
+ return FAILED;
+ }
+
+- printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n",
+- hd->ioc->name, SCpnt);
++ ioc = hd->ioc;
++ printk(MYIOC_s_INFO_FMT "attempting bus reset! (sc=%p)\n",
++ ioc->name, SCpnt);
+ scsi_print_command(SCpnt);
+
+ if (hd->timeouts < -1)
+@@ -1918,11 +1928,10 @@
+
+ vdev = SCpnt->device->hostdata;
+ retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+- vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(hd->ioc));
++ vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc));
+
+- printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n",
+- hd->ioc->name,
+- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++ printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n",
++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+ if (retval == 0)
+ return SUCCESS;
+@@ -1943,37 +1952,38 @@
+ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
+ {
+ MPT_SCSI_HOST * hd;
+- int status = SUCCESS;
++ int retval;
++ MPT_ADAPTER *ioc;
+
+ /* If we can't locate the host to reset, then we failed. */
+ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+- dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: "
+- "Can't locate host! (sc=%p)\n",
+- SCpnt ) );
++ dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: Can't "
++ "locate host! (sc=%p)\n", SCpnt));
+ return FAILED;
+ }
+
+- printk(KERN_WARNING MYNAM ": %s: Attempting host reset! (sc=%p)\n",
+- hd->ioc->name, SCpnt);
++ ioc = hd->ioc;
++ printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n",
++ ioc->name, SCpnt);
+
+ /* If our attempts to reset the host failed, then return a failed
+ * status. The host will be taken off line by the SCSI mid-layer.
+ */
+- if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0){
+- status = FAILED;
++ if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0) {
++ retval = FAILED;
+ } else {
+ /* Make sure TM pending is cleared and TM state is set to
+ * NONE.
+ */
++ retval = 0;
+ hd->tmPending = 0;
+ hd->tmState = TM_STATE_NONE;
+ }
+
+- dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: "
+- "Status = %s\n",
+- (status == SUCCESS) ? "SUCCESS" : "FAILED" ) );
++ printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n",
++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+- return status;
++ return retval;
+ }
+
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -3150,6 +3160,16 @@
+ {
+ INTERNAL_CMD iocmd;
+
++ /* Ignore hidden raid components, this is handled when the command
++ * is sent to the volume
++ */
++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
++ return;
++
++ if (vdevice->vtarget->type != TYPE_DISK || vdevice->vtarget->deleted ||
++ !vdevice->configured_lun)
++ return;
++
+ /* Following parameters will not change
+ * in this routine.
+ */
+@@ -3164,8 +3184,6 @@
+ iocmd.id = vdevice->vtarget->id;
+ iocmd.lun = vdevice->lun;
+
+- if ((vdevice->vtarget->type == TYPE_DISK) &&
+- (vdevice->configured_lun))
+ mptscsih_do_cmd(hd, &iocmd);
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.h linux-2.6.22-591/drivers/message/fusion/mptscsih.h
+--- linux-2.6.22-570/drivers/message/fusion/mptscsih.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptscsih.h 2007-12-21 15:36:12.000000000 -0500
+@@ -6,7 +6,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptspi.c linux-2.6.22-591/drivers/message/fusion/mptspi.c
+--- linux-2.6.22-570/drivers/message/fusion/mptspi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/fusion/mptspi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -4,7 +4,7 @@
+ * running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ * Copyright (c) 1999-2007 LSI Logic Corporation
+- * (mailto:mpt_linux_developer@lsi.com)
++ * (mailto:DL-MPTFusionLinux@lsi.com)
+ *
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -44,7 +44,6 @@
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+
+-#include "linux_compat.h" /* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+diff -Nurb linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c
+--- linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -377,12 +377,8 @@
+ osm_err("SCSI error %08x\n", error);
+
+ dev = &c->pdev->dev;
+- if (cmd->use_sg)
+- dma_unmap_sg(dev, cmd->request_buffer, cmd->use_sg,
+- cmd->sc_data_direction);
+- else if (cmd->SCp.dma_handle)
+- dma_unmap_single(dev, cmd->SCp.dma_handle, cmd->request_bufflen,
+- cmd->sc_data_direction);
++
++ scsi_dma_unmap(cmd);
+
+ cmd->scsi_done(cmd);
+
+@@ -664,21 +660,15 @@
+
+ if (sgl_offset != SGL_OFFSET_0) {
+ /* write size of data addressed by SGL */
+- *mptr++ = cpu_to_le32(SCpnt->request_bufflen);
++ *mptr++ = cpu_to_le32(scsi_bufflen(SCpnt));
+
+ /* Now fill in the SGList and command */
+- if (SCpnt->use_sg) {
+- if (!i2o_dma_map_sg(c, SCpnt->request_buffer,
+- SCpnt->use_sg,
++
++ if (scsi_sg_count(SCpnt)) {
++ if (!i2o_dma_map_sg(c, scsi_sglist(SCpnt),
++ scsi_sg_count(SCpnt),
+ SCpnt->sc_data_direction, &mptr))
+ goto nomem;
+- } else {
+- SCpnt->SCp.dma_handle =
+- i2o_dma_map_single(c, SCpnt->request_buffer,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction, &mptr);
+- if (dma_mapping_error(SCpnt->SCp.dma_handle))
+- goto nomem;
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c
+--- linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c 2007-12-21 15:36:12.000000000 -0500
+@@ -209,6 +209,7 @@
+ DECLARE_WAITQUEUE(wait, tsk);
+ int valid = 0;
+
++ set_freezable();
+ add_wait_queue(&ts->irq_wait, &wait);
+ while (!kthread_should_stop()) {
+ unsigned int x, y, p;
+diff -Nurb linux-2.6.22-570/drivers/misc/asus-laptop.c linux-2.6.22-591/drivers/misc/asus-laptop.c
+--- linux-2.6.22-570/drivers/misc/asus-laptop.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/misc/asus-laptop.c 2007-12-21 15:36:12.000000000 -0500
+@@ -737,8 +737,7 @@
+ struct device_attribute dev_attr_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+- .mode = 0, \
+- .owner = THIS_MODULE }, \
++ .mode = 0 }, \
+ .show = NULL, \
+ .store = NULL, \
+ }
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/Kconfig linux-2.6.22-591/drivers/mmc/card/Kconfig
+--- linux-2.6.22-570/drivers/mmc/card/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/card/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -14,3 +14,21 @@
+ mount the filesystem. Almost everyone wishing MMC support
+ should say Y or M here.
+
++config MMC_BLOCK_BOUNCE
++ bool "Use bounce buffer for simple hosts"
++ depends on MMC_BLOCK
++ default y
++ help
++ SD/MMC is a high latency protocol where it is crucial to
++ send large requests in order to get high performance. Many
++ controllers, however, are restricted to continuous memory
++ (i.e. they can't do scatter-gather), something the kernel
++ rarely can provide.
++
++ Say Y here to help these restricted hosts by bouncing
++ requests back and forth from a large buffer. You will get
++ a big performance gain at the cost of up to 64 KiB of
++ physical memory.
++
++ If unsure, say Y here.
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/block.c linux-2.6.22-591/drivers/mmc/card/block.c
+--- linux-2.6.22-570/drivers/mmc/card/block.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/card/block.c 2007-12-21 15:36:12.000000000 -0500
+@@ -262,7 +262,9 @@
+ }
+
+ brq.data.sg = mq->sg;
+- brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg);
++ brq.data.sg_len = mmc_queue_map_sg(mq);
++
++ mmc_queue_bounce_pre(mq);
+
+ if (brq.data.blocks !=
+ (req->nr_sectors >> (md->block_bits - 9))) {
+@@ -279,6 +281,9 @@
+ }
+
+ mmc_wait_for_req(card->host, &brq.mrq);
++
++ mmc_queue_bounce_post(mq);
++
+ if (brq.cmd.error) {
+ printk(KERN_ERR "%s: error %d sending read/write command\n",
+ req->rq_disk->disk_name, brq.cmd.error);
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.c linux-2.6.22-591/drivers/mmc/card/queue.c
+--- linux-2.6.22-570/drivers/mmc/card/queue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/card/queue.c 2007-12-21 15:36:12.000000000 -0500
+@@ -11,12 +11,15 @@
+ */
+ #include <linux/module.h>
+ #include <linux/blkdev.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+
+ #include <linux/mmc/card.h>
+ #include <linux/mmc/host.h>
+ #include "queue.h"
+
++#define MMC_QUEUE_BOUNCESZ 65536
++
+ #define MMC_QUEUE_SUSPENDED (1 << 0)
+
+ /*
+@@ -42,11 +45,7 @@
+ struct mmc_queue *mq = d;
+ struct request_queue *q = mq->queue;
+
+- /*
+- * Set iothread to ensure that we aren't put to sleep by
+- * the process freezing. We handle suspension ourselves.
+- */
+- current->flags |= PF_MEMALLOC|PF_NOFREEZE;
++ current->flags |= PF_MEMALLOC;
+
+ down(&mq->thread_sem);
+ do {
+@@ -118,6 +117,7 @@
+ struct mmc_host *host = card->host;
+ u64 limit = BLK_BOUNCE_HIGH;
+ int ret;
++ unsigned int bouncesz;
+
+ if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
+ limit = *mmc_dev(host)->dma_mask;
+@@ -127,36 +127,83 @@
+ if (!mq->queue)
+ return -ENOMEM;
+
++ mq->queue->queuedata = mq;
++ mq->req = NULL;
++
+ blk_queue_prep_rq(mq->queue, mmc_prep_request);
++
++#ifdef CONFIG_MMC_BLOCK_BOUNCE
++ if (host->max_hw_segs == 1) {
++ bouncesz = MMC_QUEUE_BOUNCESZ;
++
++ if (bouncesz > host->max_req_size)
++ bouncesz = host->max_req_size;
++ if (bouncesz > host->max_seg_size)
++ bouncesz = host->max_seg_size;
++
++ mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
++ if (!mq->bounce_buf) {
++ printk(KERN_WARNING "%s: unable to allocate "
++ "bounce buffer\n", mmc_card_name(card));
++ } else {
++ blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
++ blk_queue_max_sectors(mq->queue, bouncesz / 512);
++ blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
++ blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
++ blk_queue_max_segment_size(mq->queue, bouncesz);
++
++ mq->sg = kmalloc(sizeof(struct scatterlist),
++ GFP_KERNEL);
++ if (!mq->sg) {
++ ret = -ENOMEM;
++ goto free_bounce_buf;
++ }
++
++ mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
++ bouncesz / 512, GFP_KERNEL);
++ if (!mq->bounce_sg) {
++ ret = -ENOMEM;
++ goto free_sg;
++ }
++ }
++ }
++#endif
++
++ if (!mq->bounce_buf) {
+ blk_queue_bounce_limit(mq->queue, limit);
+ blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
+ blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
+ blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
+ blk_queue_max_segment_size(mq->queue, host->max_seg_size);
+
+- mq->queue->queuedata = mq;
+- mq->req = NULL;
+-
+- mq->sg = kmalloc(sizeof(struct scatterlist) * host->max_phys_segs,
+- GFP_KERNEL);
++ mq->sg = kmalloc(sizeof(struct scatterlist) *
++ host->max_phys_segs, GFP_KERNEL);
+ if (!mq->sg) {
+ ret = -ENOMEM;
+ goto cleanup_queue;
+ }
++ }
+
+ init_MUTEX(&mq->thread_sem);
+
+ mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd");
+ if (IS_ERR(mq->thread)) {
+ ret = PTR_ERR(mq->thread);
+- goto free_sg;
++ goto free_bounce_sg;
+ }
+
+ return 0;
+-
++ free_bounce_sg:
++ if (mq->bounce_sg)
++ kfree(mq->bounce_sg);
++ mq->bounce_sg = NULL;
+ free_sg:
+ kfree(mq->sg);
+ mq->sg = NULL;
++ free_bounce_buf:
++ if (mq->bounce_buf)
++ kfree(mq->bounce_buf);
++ mq->bounce_buf = NULL;
+ cleanup_queue:
+ blk_cleanup_queue(mq->queue);
+ return ret;
+@@ -178,9 +225,17 @@
+ /* Then terminate our worker thread */
+ kthread_stop(mq->thread);
+
++ if (mq->bounce_sg)
++ kfree(mq->bounce_sg);
++ mq->bounce_sg = NULL;
++
+ kfree(mq->sg);
+ mq->sg = NULL;
+
++ if (mq->bounce_buf)
++ kfree(mq->bounce_buf);
++ mq->bounce_buf = NULL;
++
+ blk_cleanup_queue(mq->queue);
+
+ mq->card = NULL;
+@@ -231,3 +286,108 @@
+ }
+ }
+
++static void copy_sg(struct scatterlist *dst, unsigned int dst_len,
++ struct scatterlist *src, unsigned int src_len)
++{
++ unsigned int chunk;
++ char *dst_buf, *src_buf;
++ unsigned int dst_size, src_size;
++
++ dst_buf = NULL;
++ src_buf = NULL;
++ dst_size = 0;
++ src_size = 0;
++
++ while (src_len) {
++ BUG_ON(dst_len == 0);
++
++ if (dst_size == 0) {
++ dst_buf = page_address(dst->page) + dst->offset;
++ dst_size = dst->length;
++ }
++
++ if (src_size == 0) {
++ src_buf = page_address(src->page) + src->offset;
++ src_size = src->length;
++ }
++
++ chunk = min(dst_size, src_size);
++
++ memcpy(dst_buf, src_buf, chunk);
++
++ dst_buf += chunk;
++ src_buf += chunk;
++ dst_size -= chunk;
++ src_size -= chunk;
++
++ if (dst_size == 0) {
++ dst++;
++ dst_len--;
++ }
++
++ if (src_size == 0) {
++ src++;
++ src_len--;
++ }
++ }
++}
++
++unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
++{
++ unsigned int sg_len;
++
++ if (!mq->bounce_buf)
++ return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
++
++ BUG_ON(!mq->bounce_sg);
++
++ sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
++
++ mq->bounce_sg_len = sg_len;
++
++ /*
++ * Shortcut in the event we only get a single entry.
++ */
++ if (sg_len == 1) {
++ memcpy(mq->sg, mq->bounce_sg, sizeof(struct scatterlist));
++ return 1;
++ }
++
++ mq->sg[0].page = virt_to_page(mq->bounce_buf);
++ mq->sg[0].offset = offset_in_page(mq->bounce_buf);
++ mq->sg[0].length = 0;
++
++ while (sg_len) {
++ mq->sg[0].length += mq->bounce_sg[sg_len - 1].length;
++ sg_len--;
++ }
++
++ return 1;
++}
++
++void mmc_queue_bounce_pre(struct mmc_queue *mq)
++{
++ if (!mq->bounce_buf)
++ return;
++
++ if (mq->bounce_sg_len == 1)
++ return;
++ if (rq_data_dir(mq->req) != WRITE)
++ return;
++
++ copy_sg(mq->sg, 1, mq->bounce_sg, mq->bounce_sg_len);
++}
++
++void mmc_queue_bounce_post(struct mmc_queue *mq)
++{
++ if (!mq->bounce_buf)
++ return;
++
++ if (mq->bounce_sg_len == 1)
++ return;
++ if (rq_data_dir(mq->req) != READ)
++ return;
++
++ copy_sg(mq->bounce_sg, mq->bounce_sg_len, mq->sg, 1);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.h linux-2.6.22-591/drivers/mmc/card/queue.h
+--- linux-2.6.22-570/drivers/mmc/card/queue.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/card/queue.h 2007-12-21 15:36:12.000000000 -0500
+@@ -14,6 +14,9 @@
+ void *data;
+ struct request_queue *queue;
+ struct scatterlist *sg;
++ char *bounce_buf;
++ struct scatterlist *bounce_sg;
++ unsigned int bounce_sg_len;
+ };
+
+ extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *);
+@@ -21,4 +24,8 @@
+ extern void mmc_queue_suspend(struct mmc_queue *);
+ extern void mmc_queue_resume(struct mmc_queue *);
+
++extern unsigned int mmc_queue_map_sg(struct mmc_queue *);
++extern void mmc_queue_bounce_pre(struct mmc_queue *);
++extern void mmc_queue_bounce_post(struct mmc_queue *);
++
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/Kconfig linux-2.6.22-591/drivers/mmc/core/Kconfig
+--- linux-2.6.22-570/drivers/mmc/core/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -14,3 +14,16 @@
+ This option is usually just for embedded systems which use
+ a MMC/SD card for rootfs. Most people should say N here.
+
++config MMC_PASSWORDS
++ boolean "MMC card lock/unlock passwords (EXPERIMENTAL)"
++ depends on EXPERIMENTAL
++ select KEYS
++ help
++ Say Y here to enable the use of passwords to lock and unlock
++ MMC cards. This uses the access key retention support, using
++ request_key to look up the key associated with each card.
++
++ For example, if you have an MMC card that was locked using
++ Symbian OS on your cell phone, you won't be able to read it
++ on Linux without this support.
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/Makefile linux-2.6.22-591/drivers/mmc/core/Makefile
+--- linux-2.6.22-570/drivers/mmc/core/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -7,5 +7,6 @@
+ endif
+
+ obj-$(CONFIG_MMC) += mmc_core.o
+-mmc_core-y := core.o sysfs.o mmc.o mmc_ops.o sd.o sd_ops.o
++mmc_core-y := core.o sysfs.o bus.o host.o mmc.o mmc_ops.o sd.o sd_ops.o
++mmc_core-$(CONFIG_MMC_PASSWORDS) += lock.o
+
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.c linux-2.6.22-591/drivers/mmc/core/bus.c
+--- linux-2.6.22-570/drivers/mmc/core/bus.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/bus.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,245 @@
++/*
++ * linux/drivers/mmc/core/bus.c
++ *
++ * Copyright (C) 2003 Russell King, All Rights Reserved.
++ * Copyright (C) 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * MMC card bus driver model
++ */
++
++#include <linux/device.h>
++#include <linux/err.h>
++
++#include <linux/mmc/card.h>
++#include <linux/mmc/host.h>
++
++#include "sysfs.h"
++#include "bus.h"
++
++#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev)
++#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv)
++
++static ssize_t mmc_type_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ switch (card->type) {
++ case MMC_TYPE_MMC:
++ return sprintf(buf, "MMC\n");
++ case MMC_TYPE_SD:
++ return sprintf(buf, "SD\n");
++ default:
++ return -EFAULT;
++ }
++}
++
++static struct device_attribute mmc_dev_attrs[] = {
++ MMC_ATTR_RO(type),
++ __ATTR_NULL,
++};
++
++/*
++ * This currently matches any MMC driver to any MMC card - drivers
++ * themselves make the decision whether to drive this card in their
++ * probe method.
++ *
++ * We also fail for all locked cards; drivers expect to be able to do block
++ * I/O still on probe(), which is not possible while the card is locked.
++ * Device probing must be triggered sometime later to make the card available
++ * to the block driver.
++ */
++static int mmc_bus_match(struct device *dev, struct device_driver *drv)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ if (mmc_card_locked(card)) {
++ dev_dbg(&card->dev, "card is locked; binding is deferred\n");
++ return 0;
++ }
++
++ return 1;
++}
++
++static int
++mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf,
++ int buf_size)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++ int retval = 0, i = 0, length = 0;
++
++#define add_env(fmt,val) do { \
++ retval = add_uevent_var(envp, num_envp, &i, \
++ buf, buf_size, &length, \
++ fmt, val); \
++ if (retval) \
++ return retval; \
++} while (0);
++
++ switch (card->type) {
++ case MMC_TYPE_MMC:
++ add_env("MMC_TYPE=%s", "MMC");
++ break;
++ case MMC_TYPE_SD:
++ add_env("MMC_TYPE=%s", "SD");
++ break;
++ }
++
++ add_env("MMC_NAME=%s", mmc_card_name(card));
++
++#undef add_env
++
++ envp[i] = NULL;
++
++ return 0;
++}
++
++static int mmc_bus_probe(struct device *dev)
++{
++ struct mmc_driver *drv = to_mmc_driver(dev->driver);
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ return drv->probe(card);
++}
++
++static int mmc_bus_remove(struct device *dev)
++{
++ struct mmc_driver *drv = to_mmc_driver(dev->driver);
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ drv->remove(card);
++
++ return 0;
++}
++
++static int mmc_bus_suspend(struct device *dev, pm_message_t state)
++{
++ struct mmc_driver *drv = to_mmc_driver(dev->driver);
++ struct mmc_card *card = dev_to_mmc_card(dev);
++ int ret = 0;
++
++ if (dev->driver && drv->suspend)
++ ret = drv->suspend(card, state);
++ return ret;
++}
++
++static int mmc_bus_resume(struct device *dev)
++{
++ struct mmc_driver *drv = to_mmc_driver(dev->driver);
++ struct mmc_card *card = dev_to_mmc_card(dev);
++ int ret = 0;
++
++ if (dev->driver && drv->resume)
++ ret = drv->resume(card);
++ return ret;
++}
++
++static struct bus_type mmc_bus_type = {
++ .name = "mmc",
++ .dev_attrs = mmc_dev_attrs,
++ .match = mmc_bus_match,
++ .uevent = mmc_bus_uevent,
++ .probe = mmc_bus_probe,
++ .remove = mmc_bus_remove,
++ .suspend = mmc_bus_suspend,
++ .resume = mmc_bus_resume,
++};
++
++int mmc_register_bus(void)
++{
++ return bus_register(&mmc_bus_type);
++}
++
++void mmc_unregister_bus(void)
++{
++ bus_unregister(&mmc_bus_type);
++}
++
++/**
++ * mmc_register_driver - register a media driver
++ * @drv: MMC media driver
++ */
++int mmc_register_driver(struct mmc_driver *drv)
++{
++ drv->drv.bus = &mmc_bus_type;
++ return driver_register(&drv->drv);
++}
++
++EXPORT_SYMBOL(mmc_register_driver);
++
++/**
++ * mmc_unregister_driver - unregister a media driver
++ * @drv: MMC media driver
++ */
++void mmc_unregister_driver(struct mmc_driver *drv)
++{
++ drv->drv.bus = &mmc_bus_type;
++ driver_unregister(&drv->drv);
++}
++
++EXPORT_SYMBOL(mmc_unregister_driver);
++
++static void mmc_release_card(struct device *dev)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ kfree(card);
++}
++
++/*
++ * Allocate and initialise a new MMC card structure.
++ */
++struct mmc_card *mmc_alloc_card(struct mmc_host *host)
++{
++ struct mmc_card *card;
++
++ card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL);
++ if (!card)
++ return ERR_PTR(-ENOMEM);
++
++ memset(card, 0, sizeof(struct mmc_card));
++
++ card->host = host;
++
++ device_initialize(&card->dev);
++
++ card->dev.parent = mmc_classdev(host);
++ card->dev.bus = &mmc_bus_type;
++ card->dev.release = mmc_release_card;
++
++ return card;
++}
++
++/*
++ * Register a new MMC card with the driver model.
++ */
++int mmc_add_card(struct mmc_card *card)
++{
++ int ret;
++
++ snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
++ "%s:%04x", mmc_hostname(card->host), card->rca);
++
++ ret = device_add(&card->dev);
++ if (ret == 0)
++ mmc_card_set_present(card);
++
++ return ret;
++}
++
++/*
++ * Unregister a new MMC card with the driver model, and
++ * (eventually) free it.
++ */
++void mmc_remove_card(struct mmc_card *card)
++{
++ if (mmc_card_present(card))
++ device_del(&card->dev);
++
++ put_device(&card->dev);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.h linux-2.6.22-591/drivers/mmc/core/bus.h
+--- linux-2.6.22-570/drivers/mmc/core/bus.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/bus.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,22 @@
++/*
++ * linux/drivers/mmc/core/bus.h
++ *
++ * Copyright (C) 2003 Russell King, All Rights Reserved.
++ * Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_BUS_H
++#define _MMC_CORE_BUS_H
++
++struct mmc_card *mmc_alloc_card(struct mmc_host *host);
++int mmc_add_card(struct mmc_card *card);
++void mmc_remove_card(struct mmc_card *card);
++
++int mmc_register_bus(void);
++void mmc_unregister_bus(void);
++
++#endif
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.c linux-2.6.22-591/drivers/mmc/core/core.c
+--- linux-2.6.22-570/drivers/mmc/core/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -27,7 +27,9 @@
+ #include <linux/mmc/sd.h>
+
+ #include "core.h"
+-#include "sysfs.h"
++#include "bus.h"
++#include "host.h"
++#include "lock.h"
+
+ #include "mmc_ops.h"
+ #include "sd_ops.h"
+@@ -35,6 +37,25 @@
+ extern int mmc_attach_mmc(struct mmc_host *host, u32 ocr);
+ extern int mmc_attach_sd(struct mmc_host *host, u32 ocr);
+
++static struct workqueue_struct *workqueue;
++
++/*
++ * Internal function. Schedule delayed work in the MMC work queue.
++ */
++static int mmc_schedule_delayed_work(struct delayed_work *work,
++ unsigned long delay)
++{
++ return queue_delayed_work(workqueue, work, delay);
++}
++
++/*
++ * Internal function. Flush all scheduled work from the MMC work queue.
++ */
++static void mmc_flush_scheduled_work(void)
++{
++ flush_workqueue(workqueue);
++}
++
+ /**
+ * mmc_request_done - finish processing an MMC request
+ * @host: MMC host which completed request
+@@ -369,22 +390,6 @@
+ }
+
+ /*
+- * Allocate a new MMC card
+- */
+-struct mmc_card *mmc_alloc_card(struct mmc_host *host)
+-{
+- struct mmc_card *card;
+-
+- card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL);
+- if (!card)
+- return ERR_PTR(-ENOMEM);
+-
+- mmc_init_card(card, host);
+-
+- return card;
+-}
+-
+-/*
+ * Apply power to the MMC stack. This is a two-stage process.
+ * First, we enable power to the card without the clock running.
+ * We then wait a bit for the power to stabilise. Finally,
+@@ -512,7 +517,7 @@
+ EXPORT_SYMBOL(mmc_detect_change);
+
+
+-static void mmc_rescan(struct work_struct *work)
++void mmc_rescan(struct work_struct *work)
+ {
+ struct mmc_host *host =
+ container_of(work, struct mmc_host, detect.work);
+@@ -561,69 +566,13 @@
+ }
+ }
+
+-
+-/**
+- * mmc_alloc_host - initialise the per-host structure.
+- * @extra: sizeof private data structure
+- * @dev: pointer to host device model structure
+- *
+- * Initialise the per-host structure.
+- */
+-struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
+-{
+- struct mmc_host *host;
+-
+- host = mmc_alloc_host_sysfs(extra, dev);
+- if (host) {
+- spin_lock_init(&host->lock);
+- init_waitqueue_head(&host->wq);
+- INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+-
+- /*
+- * By default, hosts do not support SGIO or large requests.
+- * They have to set these according to their abilities.
+- */
+- host->max_hw_segs = 1;
+- host->max_phys_segs = 1;
+- host->max_seg_size = PAGE_CACHE_SIZE;
+-
+- host->max_req_size = PAGE_CACHE_SIZE;
+- host->max_blk_size = 512;
+- host->max_blk_count = PAGE_CACHE_SIZE / 512;
+- }
+-
+- return host;
+-}
+-
+-EXPORT_SYMBOL(mmc_alloc_host);
+-
+-/**
+- * mmc_add_host - initialise host hardware
+- * @host: mmc host
+- */
+-int mmc_add_host(struct mmc_host *host)
++void mmc_start_host(struct mmc_host *host)
+ {
+- int ret;
+-
+- ret = mmc_add_host_sysfs(host);
+- if (ret == 0) {
+ mmc_power_off(host);
+ mmc_detect_change(host, 0);
+- }
+-
+- return ret;
+ }
+
+-EXPORT_SYMBOL(mmc_add_host);
+-
+-/**
+- * mmc_remove_host - remove host hardware
+- * @host: mmc host
+- *
+- * Unregister and remove all cards associated with this host,
+- * and power down the MMC bus.
+- */
+-void mmc_remove_host(struct mmc_host *host)
++void mmc_stop_host(struct mmc_host *host)
+ {
+ #ifdef CONFIG_MMC_DEBUG
+ unsigned long flags;
+@@ -648,24 +597,8 @@
+ BUG_ON(host->card);
+
+ mmc_power_off(host);
+- mmc_remove_host_sysfs(host);
+ }
+
+-EXPORT_SYMBOL(mmc_remove_host);
+-
+-/**
+- * mmc_free_host - free the host structure
+- * @host: mmc host
+- *
+- * Free the host once all references to it have been dropped.
+- */
+-void mmc_free_host(struct mmc_host *host)
+-{
+- mmc_free_host_sysfs(host);
+-}
+-
+-EXPORT_SYMBOL(mmc_free_host);
+-
+ #ifdef CONFIG_PM
+
+ /**
+@@ -726,4 +659,47 @@
+
+ #endif
+
++static int __init mmc_init(void)
++{
++ int ret;
++
++ workqueue = create_singlethread_workqueue("kmmcd");
++ if (!workqueue)
++ return -ENOMEM;
++
++ ret = mmc_register_bus();
++ if (ret)
++ goto destroy_workqueue;
++
++ ret = mmc_register_host_class();
++ if (ret)
++ goto unregister_bus;
++
++ ret = mmc_register_key_type();
++ if (ret)
++ goto unregister_host_class;
++
++ return 0;
++
++unregister_host_class:
++ mmc_unregister_host_class();
++unregister_bus:
++ mmc_unregister_bus();
++destroy_workqueue:
++ destroy_workqueue(workqueue);
++
++ return ret;
++}
++
++static void __exit mmc_exit(void)
++{
++ mmc_unregister_key_type();
++ mmc_unregister_host_class();
++ mmc_unregister_bus();
++ destroy_workqueue(workqueue);
++}
++
++module_init(mmc_init);
++module_exit(mmc_exit);
++
+ MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.h linux-2.6.22-591/drivers/mmc/core/core.h
+--- linux-2.6.22-570/drivers/mmc/core/core.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/core.h 2007-12-21 15:36:12.000000000 -0500
+@@ -54,8 +54,6 @@
+ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
+ void mmc_set_timing(struct mmc_host *host, unsigned int timing);
+
+-struct mmc_card *mmc_alloc_card(struct mmc_host *host);
+-
+ static inline void mmc_delay(unsigned int ms)
+ {
+ if (ms < 1000 / HZ) {
+@@ -66,5 +64,9 @@
+ }
+ }
+
++void mmc_rescan(struct work_struct *work);
++void mmc_start_host(struct mmc_host *host);
++void mmc_stop_host(struct mmc_host *host);
++
+ #endif
+
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.c linux-2.6.22-591/drivers/mmc/core/host.c
+--- linux-2.6.22-570/drivers/mmc/core/host.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/host.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,156 @@
++/*
++ * linux/drivers/mmc/core/host.c
++ *
++ * Copyright (C) 2003 Russell King, All Rights Reserved.
++ * Copyright (C) 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * MMC host class device management
++ */
++
++#include <linux/device.h>
++#include <linux/err.h>
++#include <linux/idr.h>
++#include <linux/pagemap.h>
++
++#include <linux/mmc/host.h>
++
++#include "core.h"
++#include "host.h"
++
++#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
++
++static void mmc_host_classdev_release(struct device *dev)
++{
++ struct mmc_host *host = cls_dev_to_mmc_host(dev);
++ kfree(host);
++}
++
++static struct class mmc_host_class = {
++ .name = "mmc_host",
++ .dev_release = mmc_host_classdev_release,
++};
++
++int mmc_register_host_class(void)
++{
++ return class_register(&mmc_host_class);
++}
++
++void mmc_unregister_host_class(void)
++{
++ class_unregister(&mmc_host_class);
++}
++
++static DEFINE_IDR(mmc_host_idr);
++static DEFINE_SPINLOCK(mmc_host_lock);
++
++/**
++ * mmc_alloc_host - initialise the per-host structure.
++ * @extra: sizeof private data structure
++ * @dev: pointer to host device model structure
++ *
++ * Initialise the per-host structure.
++ */
++struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
++{
++ struct mmc_host *host;
++
++ host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
++ if (!host)
++ return NULL;
++
++ memset(host, 0, sizeof(struct mmc_host) + extra);
++
++ host->parent = dev;
++ host->class_dev.parent = dev;
++ host->class_dev.class = &mmc_host_class;
++ device_initialize(&host->class_dev);
++
++ spin_lock_init(&host->lock);
++ init_waitqueue_head(&host->wq);
++ INIT_DELAYED_WORK(&host->detect, mmc_rescan);
++
++ /*
++ * By default, hosts do not support SGIO or large requests.
++ * They have to set these according to their abilities.
++ */
++ host->max_hw_segs = 1;
++ host->max_phys_segs = 1;
++ host->max_seg_size = PAGE_CACHE_SIZE;
++
++ host->max_req_size = PAGE_CACHE_SIZE;
++ host->max_blk_size = 512;
++ host->max_blk_count = PAGE_CACHE_SIZE / 512;
++
++ return host;
++}
++
++EXPORT_SYMBOL(mmc_alloc_host);
++
++/**
++ * mmc_add_host - initialise host hardware
++ * @host: mmc host
++ */
++int mmc_add_host(struct mmc_host *host)
++{
++ int err;
++
++ if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
++ return -ENOMEM;
++
++ spin_lock(&mmc_host_lock);
++ err = idr_get_new(&mmc_host_idr, host, &host->index);
++ spin_unlock(&mmc_host_lock);
++ if (err)
++ return err;
++
++ snprintf(host->class_dev.bus_id, BUS_ID_SIZE,
++ "mmc%d", host->index);
++
++ err = device_add(&host->class_dev);
++ if (err)
++ return err;
++
++ mmc_start_host(host);
++
++ return 0;
++}
++
++EXPORT_SYMBOL(mmc_add_host);
++
++/**
++ * mmc_remove_host - remove host hardware
++ * @host: mmc host
++ *
++ * Unregister and remove all cards associated with this host,
++ * and power down the MMC bus.
++ */
++void mmc_remove_host(struct mmc_host *host)
++{
++ mmc_stop_host(host);
++
++ device_del(&host->class_dev);
++
++ spin_lock(&mmc_host_lock);
++ idr_remove(&mmc_host_idr, host->index);
++ spin_unlock(&mmc_host_lock);
++}
++
++EXPORT_SYMBOL(mmc_remove_host);
++
++/**
++ * mmc_free_host - free the host structure
++ * @host: mmc host
++ *
++ * Free the host once all references to it have been dropped.
++ */
++void mmc_free_host(struct mmc_host *host)
++{
++ put_device(&host->class_dev);
++}
++
++EXPORT_SYMBOL(mmc_free_host);
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.h linux-2.6.22-591/drivers/mmc/core/host.h
+--- linux-2.6.22-570/drivers/mmc/core/host.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/host.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,18 @@
++/*
++ * linux/drivers/mmc/core/host.h
++ *
++ * Copyright (C) 2003 Russell King, All Rights Reserved.
++ * Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_HOST_H
++#define _MMC_CORE_HOST_H
++
++int mmc_register_host_class(void);
++void mmc_unregister_host_class(void);
++
++#endif
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.c linux-2.6.22-591/drivers/mmc/core/lock.c
+--- linux-2.6.22-570/drivers/mmc/core/lock.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/lock.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,199 @@
++/*
++ * linux/drivers/mmc/core/lock.h
++ *
++ * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved.
++ * Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * MMC password key handling.
++ */
++
++#include <linux/device.h>
++#include <linux/key.h>
++#include <linux/err.h>
++
++#include <linux/mmc/card.h>
++#include <linux/mmc/host.h>
++#include <linux/mmc/mmc.h>
++
++#include "sysfs.h"
++#include "mmc_ops.h"
++#include "lock.h"
++
++#define MMC_KEYLEN_MAXBYTES 32
++
++#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev)
++
++static int mmc_key_instantiate(struct key *key, const void *data, size_t datalen)
++{
++ struct mmc_key_payload *mpayload;
++ int ret;
++
++ ret = -EINVAL;
++ if (datalen <= 0 || datalen > MMC_KEYLEN_MAXBYTES || !data) {
++ pr_debug("Invalid data\n");
++ goto error;
++ }
++
++ ret = key_payload_reserve(key, datalen);
++ if (ret < 0) {
++ pr_debug("ret = %d\n", ret);
++ goto error;
++ }
++
++ ret = -ENOMEM;
++ mpayload = kmalloc(sizeof(*mpayload) + datalen, GFP_KERNEL);
++ if (!mpayload) {
++ pr_debug("Unable to allocate mpayload structure\n");
++ goto error;
++ }
++ mpayload->datalen = datalen;
++ memcpy(mpayload->data, data, datalen);
++
++ rcu_assign_pointer(key->payload.data, mpayload);
++
++ /* ret = 0 if there is no error */
++ ret = 0;
++
++error:
++ return ret;
++}
++
++static int mmc_key_match(const struct key *key, const void *description)
++{
++ return strcmp(key->description, description) == 0;
++}
++
++/*
++ * dispose of the data dangling from the corpse of a mmc key
++ */
++static void mmc_key_destroy(struct key *key)
++{
++ struct mmc_key_payload *mpayload = key->payload.data;
++
++ kfree(mpayload);
++}
++
++static struct key_type mmc_key_type = {
++ .name = "mmc",
++ .def_datalen = MMC_KEYLEN_MAXBYTES,
++ .instantiate = mmc_key_instantiate,
++ .match = mmc_key_match,
++ .destroy = mmc_key_destroy,
++};
++
++int mmc_register_key_type(void)
++{
++ return register_key_type(&mmc_key_type);
++}
++
++void mmc_unregister_key_type(void)
++{
++ unregister_key_type(&mmc_key_type);
++}
++
++static ssize_t
++mmc_lockable_show(struct device *dev, struct device_attribute *att, char *buf)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++
++ return sprintf(buf, "%slocked\n", mmc_card_locked(card) ? "" : "un");
++}
++
++/*
++ * implement MMC password functions: force erase, remove password, change
++ * password, unlock card and assign password.
++ */
++static ssize_t
++mmc_lockable_store(struct device *dev, struct device_attribute *att,
++ const char *data, size_t len)
++{
++ struct mmc_card *card = dev_to_mmc_card(dev);
++ int ret;
++ struct key *mmc_key;
++
++ if(!mmc_card_lockable(card))
++ return -EINVAL;
++
++ mmc_claim_host(card->host);
++
++ ret = -EINVAL;
++ if (mmc_card_locked(card) && !strncmp(data, "erase", 5)) {
++ /* forced erase only works while card is locked */
++ mmc_lock_unlock(card, NULL, MMC_LOCK_MODE_ERASE);
++ ret = len;
++ } else if (!mmc_card_locked(card) && !strncmp(data, "remove", 6)) {
++ /* remove password only works while card is unlocked */
++ mmc_key = request_key(&mmc_key_type, "mmc:key", "remove");
++
++ if (!IS_ERR(mmc_key)) {
++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_CLR_PWD);
++ if (!ret)
++ ret = len;
++ } else
++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++ } else if (!mmc_card_locked(card) && ((!strncmp(data, "assign", 6)) ||
++ (!strncmp(data, "change", 6)))) {
++ /* assign or change */
++ if(!(strncmp(data, "assign", 6)))
++ mmc_key = request_key(&mmc_key_type, "mmc:key", "assign");
++ else
++ mmc_key = request_key(&mmc_key_type, "mmc:key", "change");
++
++ if (!IS_ERR(mmc_key)) {
++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_SET_PWD);
++ if (!ret)
++ ret = len;
++ } else
++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++ } else if (mmc_card_locked(card) && !strncmp(data, "unlock", 6)) {
++ /* unlock */
++ mmc_key = request_key(&mmc_key_type, "mmc:key", "unlock");
++ if (!IS_ERR(mmc_key)) {
++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_UNLOCK);
++ if (ret) {
++ dev_dbg(&card->dev, "Wrong password\n");
++ ret = -EINVAL;
++ }
++ else {
++ mmc_release_host(card->host);
++ device_release_driver(dev);
++ ret = device_attach(dev);
++ if(!ret)
++ return -EINVAL;
++ else
++ return len;
++ }
++ } else
++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++ }
++
++ mmc_release_host(card->host);
++ return ret;
++}
++
++static struct device_attribute mmc_dev_attr_lockable[] = {
++ __ATTR(lockable, S_IWUSR | S_IRUGO,
++ mmc_lockable_show, mmc_lockable_store),
++ __ATTR_NULL,
++};
++
++int mmc_lock_add_sysfs(struct mmc_card *card)
++{
++ if (!mmc_card_lockable(card))
++ return 0;
++
++ return mmc_add_attrs(card, mmc_dev_attr_lockable);
++}
++
++void mmc_lock_remove_sysfs(struct mmc_card *card)
++{
++ if (!mmc_card_lockable(card))
++ return;
++
++ mmc_remove_attrs(card, mmc_dev_attr_lockable);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.h linux-2.6.22-591/drivers/mmc/core/lock.h
+--- linux-2.6.22-570/drivers/mmc/core/lock.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/mmc/core/lock.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,51 @@
++/*
++ * linux/drivers/mmc/core/lock.h
++ *
++ * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved.
++ * Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_LOCK_H
++#define _MMC_CORE_LOCK_H
++
++#ifdef CONFIG_MMC_PASSWORDS
++
++/* core-internal data */
++struct mmc_key_payload {
++ struct rcu_head rcu; /* RCU destructor */
++ unsigned short datalen; /* length of this data */
++ char data[0]; /* actual data */
++};
++
++int mmc_register_key_type(void);
++void mmc_unregister_key_type(void);
++
++int mmc_lock_add_sysfs(struct mmc_card *card);
++void mmc_lock_remove_sysfs(struct mmc_card *card);
++
++#else
++
++static inline int mmc_register_key_type(void)
++{
++ return 0;
++}
++
++static inline void mmc_unregister_key_type(void)
++{
++}
++
++static inline int mmc_lock_add_sysfs(struct mmc_card *card)
++{
++ return 0;
++}
++
++static inline void mmc_lock_remove_sysfs(struct mmc_card *card)
++{
++}
++
++#endif
++
++#endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc.c linux-2.6.22-591/drivers/mmc/core/mmc.c
+--- linux-2.6.22-570/drivers/mmc/core/mmc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/mmc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -18,6 +18,8 @@
+
+ #include "core.h"
+ #include "sysfs.h"
++#include "bus.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+
+ static const unsigned int tran_exp[] = {
+@@ -230,19 +232,74 @@
+ return err;
+ }
+
++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
++ card->raw_cid[2], card->raw_cid[3]);
++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
++ card->raw_csd[2], card->raw_csd[3]);
++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year);
++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev);
++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev);
++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid);
++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name);
++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid);
++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial);
++
++static struct device_attribute mmc_dev_attrs[] = {
++ MMC_ATTR_RO(cid),
++ MMC_ATTR_RO(csd),
++ MMC_ATTR_RO(date),
++ MMC_ATTR_RO(fwrev),
++ MMC_ATTR_RO(hwrev),
++ MMC_ATTR_RO(manfid),
++ MMC_ATTR_RO(name),
++ MMC_ATTR_RO(oemid),
++ MMC_ATTR_RO(serial),
++ __ATTR_NULL,
++};
++
++/*
++ * Adds sysfs entries as relevant.
++ */
++static int mmc_sysfs_add(struct mmc_card *card)
++{
++ int ret;
++
++ ret = mmc_add_attrs(card, mmc_dev_attrs);
++ if (ret < 0)
++ return ret;
++
++ ret = mmc_lock_add_sysfs(card);
++ if (ret < 0) {
++ mmc_remove_attrs(card, mmc_dev_attrs);
++ return ret;
++ }
++
++ return 0;
++}
++
++/*
++ * Removes the sysfs entries added by mmc_sysfs_add().
++ */
++static void mmc_sysfs_remove(struct mmc_card *card)
++{
++ mmc_lock_remove_sysfs(card);
++ mmc_remove_attrs(card, mmc_dev_attrs);
++}
++
+ /*
+ * Handle the detection and initialisation of a card.
+ *
+ * In the case of a resume, "curcard" will contain the card
+ * we're trying to reinitialise.
+ */
+-static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
++static int mmc_init_card(struct mmc_host *host, u32 ocr,
+ struct mmc_card *oldcard)
+ {
+ struct mmc_card *card;
+ int err;
+ u32 cid[4];
+ unsigned int max_dtr;
++ u32 status;
+
+ BUG_ON(!host);
+ BUG_ON(!host->claimed);
+@@ -294,6 +351,15 @@
+
+ mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
+
++ /*
++ * Check if card is locked.
++ */
++ err = mmc_send_status(card, &status);
++ if (err != MMC_ERR_NONE)
++ goto free_card;
++ if (status & R1_CARD_IS_LOCKED)
++ mmc_card_set_locked(card);
++
+ if (!oldcard) {
+ /*
+ * Fetch CSD from card.
+@@ -389,6 +455,8 @@
+ BUG_ON(!host);
+ BUG_ON(!host->card);
+
++ mmc_sysfs_remove(host->card);
++
+ mmc_remove_card(host->card);
+ host->card = NULL;
+ }
+@@ -413,8 +481,7 @@
+ mmc_release_host(host);
+
+ if (err != MMC_ERR_NONE) {
+- mmc_remove_card(host->card);
+- host->card = NULL;
++ mmc_remove(host);
+
+ mmc_claim_host(host);
+ mmc_detach_bus(host);
+@@ -434,7 +501,7 @@
+
+ mmc_claim_host(host);
+ mmc_deselect_cards(host);
+- host->card->state &= ~MMC_STATE_HIGHSPEED;
++ host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED);
+ mmc_release_host(host);
+ }
+
+@@ -453,11 +520,9 @@
+
+ mmc_claim_host(host);
+
+- err = mmc_sd_init_card(host, host->ocr, host->card);
++ err = mmc_init_card(host, host->ocr, host->card);
+ if (err != MMC_ERR_NONE) {
+- mmc_remove_card(host->card);
+- host->card = NULL;
+-
++ mmc_remove(host);
+ mmc_detach_bus(host);
+ }
+
+@@ -512,13 +577,17 @@
+ /*
+ * Detect and init the card.
+ */
+- err = mmc_sd_init_card(host, host->ocr, NULL);
++ err = mmc_init_card(host, host->ocr, NULL);
+ if (err != MMC_ERR_NONE)
+ goto err;
+
+ mmc_release_host(host);
+
+- err = mmc_register_card(host->card);
++ err = mmc_add_card(host->card);
++ if (err)
++ goto reclaim_host;
++
++ err = mmc_sysfs_add(host->card);
+ if (err)
+ goto reclaim_host;
+
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.c linux-2.6.22-591/drivers/mmc/core/mmc_ops.c
+--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2,6 +2,8 @@
+ * linux/drivers/mmc/mmc_ops.h
+ *
+ * Copyright 2006-2007 Pierre Ossman
++ * MMC password protection (C) 2006 Instituto Nokia de Tecnologia (INdT),
++ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -12,12 +14,14 @@
+ #include <linux/types.h>
+ #include <asm/scatterlist.h>
+ #include <linux/scatterlist.h>
++#include <linux/key.h>
+
+ #include <linux/mmc/host.h>
+ #include <linux/mmc/card.h>
+ #include <linux/mmc/mmc.h>
+
+ #include "core.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+
+ static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card)
+@@ -274,3 +278,114 @@
+ return MMC_ERR_NONE;
+ }
+
++#ifdef CONFIG_MMC_PASSWORDS
++
++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode)
++{
++ struct mmc_request mrq;
++ struct mmc_command cmd;
++ struct mmc_data data;
++ struct scatterlist sg;
++ struct mmc_key_payload *mpayload;
++ unsigned long erase_timeout;
++ int err, data_size;
++ u8 *data_buf;
++
++ mpayload = NULL;
++ data_size = 1;
++ if (!(mode & MMC_LOCK_MODE_ERASE)) {
++ mpayload = rcu_dereference(key->payload.data);
++ data_size = 2 + mpayload->datalen;
++ }
++
++ data_buf = kmalloc(data_size, GFP_KERNEL);
++ if (!data_buf)
++ return -ENOMEM;
++ memset(data_buf, 0, data_size);
++
++ data_buf[0] |= mode;
++ if (mode & MMC_LOCK_MODE_UNLOCK)
++ data_buf[0] &= ~MMC_LOCK_MODE_UNLOCK;
++
++ if (!(mode & MMC_LOCK_MODE_ERASE)) {
++ data_buf[1] = mpayload->datalen;
++ memcpy(data_buf + 2, mpayload->data, mpayload->datalen);
++ }
++
++ memset(&cmd, 0, sizeof(struct mmc_command));
++
++ cmd.opcode = MMC_SET_BLOCKLEN;
++ cmd.arg = data_size;
++ cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
++ err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
++ if (err != MMC_ERR_NONE)
++ goto out;
++
++ memset(&cmd, 0, sizeof(struct mmc_command));
++
++ cmd.opcode = MMC_LOCK_UNLOCK;
++ cmd.arg = 0;
++ cmd.flags = MMC_RSP_R1B | MMC_CMD_ADTC;
++
++ memset(&data, 0, sizeof(struct mmc_data));
++
++ mmc_set_data_timeout(&data, card, 1);
++
++ data.blksz = data_size;
++ data.blocks = 1;
++ data.flags = MMC_DATA_WRITE;
++ data.sg = &sg;
++ data.sg_len = 1;
++
++ memset(&mrq, 0, sizeof(struct mmc_request));
++
++ mrq.cmd = &cmd;
++ mrq.data = &data;
++
++ sg_init_one(&sg, data_buf, data_size);
++ err = mmc_wait_for_req(card->host, &mrq);
++ if (err != MMC_ERR_NONE)
++ goto out;
++
++ memset(&cmd, 0, sizeof(struct mmc_command));
++
++ cmd.opcode = MMC_SEND_STATUS;
++ cmd.arg = card->rca << 16;
++ cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
++
++ /* set timeout for forced erase operation to 3 min. (see MMC spec) */
++ erase_timeout = jiffies + 180 * HZ;
++ do {
++ /* we cannot use "retries" here because the
++ * R1_LOCK_UNLOCK_FAILED bit is cleared by subsequent reads to
++ * the status register, hiding the error condition */
++ err = mmc_wait_for_cmd(card->host, &cmd, 0);
++ if (err != MMC_ERR_NONE)
++ break;
++ /* the other modes don't need timeout checking */
++ if (!(mode & MMC_LOCK_MODE_ERASE))
++ continue;
++ if (time_after(jiffies, erase_timeout)) {
++ dev_dbg(&card->dev, "forced erase timed out\n");
++ err = MMC_ERR_TIMEOUT;
++ break;
++ }
++ } while (!(cmd.resp[0] & R1_READY_FOR_DATA));
++ if (cmd.resp[0] & R1_LOCK_UNLOCK_FAILED) {
++ dev_dbg(&card->dev, "LOCK_UNLOCK operation failed\n");
++ err = MMC_ERR_FAILED;
++ }
++
++ if (cmd.resp[0] & R1_CARD_IS_LOCKED)
++ mmc_card_set_locked(card);
++ else
++ card->state &= ~MMC_STATE_LOCKED;
++
++out:
++ kfree(data_buf);
++
++ return err;
++}
++
++#endif /* CONFIG_MMC_PASSWORDS */
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.h linux-2.6.22-591/drivers/mmc/core/mmc_ops.h
+--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.h 2007-12-21 15:36:12.000000000 -0500
+@@ -12,6 +12,8 @@
+ #ifndef _MMC_MMC_OPS_H
+ #define _MMC_MMC_OPS_H
+
++struct key;
++
+ int mmc_select_card(struct mmc_card *card);
+ int mmc_deselect_cards(struct mmc_host *host);
+ int mmc_go_idle(struct mmc_host *host);
+@@ -22,6 +24,7 @@
+ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
+ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value);
+ int mmc_send_status(struct mmc_card *card, u32 *status);
++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode);
+
+ #endif
+
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sd.c linux-2.6.22-591/drivers/mmc/core/sd.c
+--- linux-2.6.22-570/drivers/mmc/core/sd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/sd.c 2007-12-21 15:36:12.000000000 -0500
+@@ -19,11 +19,11 @@
+
+ #include "core.h"
+ #include "sysfs.h"
++#include "bus.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+ #include "sd_ops.h"
+
+-#include "core.h"
+-
+ static const unsigned int tran_exp[] = {
+ 10000, 100000, 1000000, 10000000,
+ 0, 0, 0, 0
+@@ -280,6 +280,62 @@
+ return err;
+ }
+
++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
++ card->raw_cid[2], card->raw_cid[3]);
++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
++ card->raw_csd[2], card->raw_csd[3]);
++MMC_ATTR_FN(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]);
++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year);
++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev);
++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev);
++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid);
++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name);
++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid);
++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial);
++
++static struct device_attribute mmc_sd_dev_attrs[] = {
++ MMC_ATTR_RO(cid),
++ MMC_ATTR_RO(csd),
++ MMC_ATTR_RO(scr),
++ MMC_ATTR_RO(date),
++ MMC_ATTR_RO(fwrev),
++ MMC_ATTR_RO(hwrev),
++ MMC_ATTR_RO(manfid),
++ MMC_ATTR_RO(name),
++ MMC_ATTR_RO(oemid),
++ MMC_ATTR_RO(serial),
++ __ATTR_NULL,
++};
++
++/*
++ * Adds sysfs entries as relevant.
++ */
++static int mmc_sd_sysfs_add(struct mmc_card *card)
++{
++ int ret;
++
++ ret = mmc_add_attrs(card, mmc_sd_dev_attrs);
++ if (ret < 0)
++ return ret;
++
++ ret = mmc_lock_add_sysfs(card);
++ if (ret < 0) {
++ mmc_remove_attrs(card, mmc_sd_dev_attrs);
++ return ret;
++ }
++
++ return 0;
++}
++
++/*
++ * Removes the sysfs entries added by mmc_sysfs_add().
++ */
++static void mmc_sd_sysfs_remove(struct mmc_card *card)
++{
++ mmc_lock_remove_sysfs(card);
++ mmc_remove_attrs(card, mmc_sd_dev_attrs);
++}
++
+ /*
+ * Handle the detection and initialisation of a card.
+ *
+@@ -293,6 +349,7 @@
+ int err;
+ u32 cid[4];
+ unsigned int max_dtr;
++ u32 status;
+
+ BUG_ON(!host);
+ BUG_ON(!host->claimed);
+@@ -352,6 +409,15 @@
+
+ mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
+
++ /*
++ * Check if card is locked.
++ */
++ err = mmc_send_status(card, &status);
++ if (err != MMC_ERR_NONE)
++ goto free_card;
++ if (status & R1_CARD_IS_LOCKED)
++ mmc_card_set_locked(card);
++
+ if (!oldcard) {
+ /*
+ * Fetch CSD from card.
+@@ -463,6 +529,8 @@
+ BUG_ON(!host);
+ BUG_ON(!host->card);
+
++ mmc_sd_sysfs_remove(host->card);
++
+ mmc_remove_card(host->card);
+ host->card = NULL;
+ }
+@@ -487,8 +555,7 @@
+ mmc_release_host(host);
+
+ if (err != MMC_ERR_NONE) {
+- mmc_remove_card(host->card);
+- host->card = NULL;
++ mmc_sd_remove(host);
+
+ mmc_claim_host(host);
+ mmc_detach_bus(host);
+@@ -508,7 +575,7 @@
+
+ mmc_claim_host(host);
+ mmc_deselect_cards(host);
+- host->card->state &= ~MMC_STATE_HIGHSPEED;
++ host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED);
+ mmc_release_host(host);
+ }
+
+@@ -529,9 +596,7 @@
+
+ err = mmc_sd_init_card(host, host->ocr, host->card);
+ if (err != MMC_ERR_NONE) {
+- mmc_remove_card(host->card);
+- host->card = NULL;
+-
++ mmc_sd_remove(host);
+ mmc_detach_bus(host);
+ }
+
+@@ -599,7 +664,11 @@
+
+ mmc_release_host(host);
+
+- err = mmc_register_card(host->card);
++ err = mmc_add_card(host->card);
++ if (err)
++ goto reclaim_host;
++
++ err = mmc_sd_sysfs_add(host->card);
+ if (err)
+ goto reclaim_host;
+
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.c linux-2.6.22-591/drivers/mmc/core/sysfs.c
+--- linux-2.6.22-570/drivers/mmc/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2,6 +2,7 @@
+ * linux/drivers/mmc/core/sysfs.c
+ *
+ * Copyright (C) 2003 Russell King, All Rights Reserved.
++ * Copyright 2007 Pierre Ossman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+@@ -9,352 +10,34 @@
+ *
+ * MMC sysfs/driver model support.
+ */
+-#include <linux/module.h>
+-#include <linux/init.h>
+ #include <linux/device.h>
+-#include <linux/idr.h>
+-#include <linux/workqueue.h>
+
+ #include <linux/mmc/card.h>
+-#include <linux/mmc/host.h>
+
+ #include "sysfs.h"
+
+-#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev)
+-#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv)
+-#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
+-
+-#define MMC_ATTR(name, fmt, args...) \
+-static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \
+-{ \
+- struct mmc_card *card = dev_to_mmc_card(dev); \
+- return sprintf(buf, fmt, args); \
+-}
+-
+-MMC_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
+- card->raw_cid[2], card->raw_cid[3]);
+-MMC_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
+- card->raw_csd[2], card->raw_csd[3]);
+-MMC_ATTR(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]);
+-MMC_ATTR(date, "%02d/%04d\n", card->cid.month, card->cid.year);
+-MMC_ATTR(fwrev, "0x%x\n", card->cid.fwrev);
+-MMC_ATTR(hwrev, "0x%x\n", card->cid.hwrev);
+-MMC_ATTR(manfid, "0x%06x\n", card->cid.manfid);
+-MMC_ATTR(name, "%s\n", card->cid.prod_name);
+-MMC_ATTR(oemid, "0x%04x\n", card->cid.oemid);
+-MMC_ATTR(serial, "0x%08x\n", card->cid.serial);
+-
+-#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
+-
+-static struct device_attribute mmc_dev_attrs[] = {
+- MMC_ATTR_RO(cid),
+- MMC_ATTR_RO(csd),
+- MMC_ATTR_RO(date),
+- MMC_ATTR_RO(fwrev),
+- MMC_ATTR_RO(hwrev),
+- MMC_ATTR_RO(manfid),
+- MMC_ATTR_RO(name),
+- MMC_ATTR_RO(oemid),
+- MMC_ATTR_RO(serial),
+- __ATTR_NULL
+-};
+-
+-static struct device_attribute mmc_dev_attr_scr = MMC_ATTR_RO(scr);
+-
+-
+-static void mmc_release_card(struct device *dev)
+-{
+- struct mmc_card *card = dev_to_mmc_card(dev);
+-
+- kfree(card);
+-}
+-
+-/*
+- * This currently matches any MMC driver to any MMC card - drivers
+- * themselves make the decision whether to drive this card in their
+- * probe method.
+- */
+-static int mmc_bus_match(struct device *dev, struct device_driver *drv)
+-{
+- return 1;
+-}
+-
+-static int
+-mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf,
+- int buf_size)
+-{
+- struct mmc_card *card = dev_to_mmc_card(dev);
+- char ccc[13];
+- int retval = 0, i = 0, length = 0;
+-
+-#define add_env(fmt,val) do { \
+- retval = add_uevent_var(envp, num_envp, &i, \
+- buf, buf_size, &length, \
+- fmt, val); \
+- if (retval) \
+- return retval; \
+-} while (0);
+-
+- for (i = 0; i < 12; i++)
+- ccc[i] = card->csd.cmdclass & (1 << i) ? '1' : '0';
+- ccc[12] = '\0';
+-
+- add_env("MMC_CCC=%s", ccc);
+- add_env("MMC_MANFID=%06x", card->cid.manfid);
+- add_env("MMC_NAME=%s", mmc_card_name(card));
+- add_env("MMC_OEMID=%04x", card->cid.oemid);
+-#undef add_env
+- envp[i] = NULL;
+-
+- return 0;
+-}
+-
+-static int mmc_bus_suspend(struct device *dev, pm_message_t state)
++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs)
+ {
+- struct mmc_driver *drv = to_mmc_driver(dev->driver);
+- struct mmc_card *card = dev_to_mmc_card(dev);
+- int ret = 0;
+-
+- if (dev->driver && drv->suspend)
+- ret = drv->suspend(card, state);
+- return ret;
+-}
++ int error = 0;
++ int i;
+
+-static int mmc_bus_resume(struct device *dev)
+-{
+- struct mmc_driver *drv = to_mmc_driver(dev->driver);
+- struct mmc_card *card = dev_to_mmc_card(dev);
+- int ret = 0;
+-
+- if (dev->driver && drv->resume)
+- ret = drv->resume(card);
+- return ret;
+-}
+-
+-static int mmc_bus_probe(struct device *dev)
+-{
+- struct mmc_driver *drv = to_mmc_driver(dev->driver);
+- struct mmc_card *card = dev_to_mmc_card(dev);
+-
+- return drv->probe(card);
+-}
+-
+-static int mmc_bus_remove(struct device *dev)
+-{
+- struct mmc_driver *drv = to_mmc_driver(dev->driver);
+- struct mmc_card *card = dev_to_mmc_card(dev);
+-
+- drv->remove(card);
+-
+- return 0;
+-}
+-
+-static struct bus_type mmc_bus_type = {
+- .name = "mmc",
+- .dev_attrs = mmc_dev_attrs,
+- .match = mmc_bus_match,
+- .uevent = mmc_bus_uevent,
+- .probe = mmc_bus_probe,
+- .remove = mmc_bus_remove,
+- .suspend = mmc_bus_suspend,
+- .resume = mmc_bus_resume,
+-};
+-
+-/**
+- * mmc_register_driver - register a media driver
+- * @drv: MMC media driver
+- */
+-int mmc_register_driver(struct mmc_driver *drv)
+-{
+- drv->drv.bus = &mmc_bus_type;
+- return driver_register(&drv->drv);
+-}
+-
+-EXPORT_SYMBOL(mmc_register_driver);
+-
+-/**
+- * mmc_unregister_driver - unregister a media driver
+- * @drv: MMC media driver
+- */
+-void mmc_unregister_driver(struct mmc_driver *drv)
+-{
+- drv->drv.bus = &mmc_bus_type;
+- driver_unregister(&drv->drv);
+-}
+-
+-EXPORT_SYMBOL(mmc_unregister_driver);
+-
+-
+-/*
+- * Internal function. Initialise a MMC card structure.
+- */
+-void mmc_init_card(struct mmc_card *card, struct mmc_host *host)
+-{
+- memset(card, 0, sizeof(struct mmc_card));
+- card->host = host;
+- device_initialize(&card->dev);
+- card->dev.parent = mmc_classdev(host);
+- card->dev.bus = &mmc_bus_type;
+- card->dev.release = mmc_release_card;
+-}
+-
+-/*
+- * Internal function. Register a new MMC card with the driver model.
+- */
+-int mmc_register_card(struct mmc_card *card)
+-{
+- int ret;
+-
+- snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
+- "%s:%04x", mmc_hostname(card->host), card->rca);
+-
+- ret = device_add(&card->dev);
+- if (ret == 0) {
+- if (mmc_card_sd(card)) {
+- ret = device_create_file(&card->dev, &mmc_dev_attr_scr);
+- if (ret)
+- device_del(&card->dev);
+- }
++ for (i = 0; attr_name(attrs[i]); i++) {
++ error = device_create_file(&card->dev, &attrs[i]);
++ if (error) {
++ while (--i >= 0)
++ device_remove_file(&card->dev, &attrs[i]);
++ break;
+ }
+- if (ret == 0)
+- mmc_card_set_present(card);
+- return ret;
+-}
+-
+-/*
+- * Internal function. Unregister a new MMC card with the
+- * driver model, and (eventually) free it.
+- */
+-void mmc_remove_card(struct mmc_card *card)
+-{
+- if (mmc_card_present(card)) {
+- if (mmc_card_sd(card))
+- device_remove_file(&card->dev, &mmc_dev_attr_scr);
+-
+- device_del(&card->dev);
+ }
+
+- put_device(&card->dev);
+-}
+-
+-
+-static void mmc_host_classdev_release(struct device *dev)
+-{
+- struct mmc_host *host = cls_dev_to_mmc_host(dev);
+- kfree(host);
+-}
+-
+-static struct class mmc_host_class = {
+- .name = "mmc_host",
+- .dev_release = mmc_host_classdev_release,
+-};
+-
+-static DEFINE_IDR(mmc_host_idr);
+-static DEFINE_SPINLOCK(mmc_host_lock);
+-
+-/*
+- * Internal function. Allocate a new MMC host.
+- */
+-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev)
+-{
+- struct mmc_host *host;
+-
+- host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
+- if (host) {
+- memset(host, 0, sizeof(struct mmc_host) + extra);
+-
+- host->parent = dev;
+- host->class_dev.parent = dev;
+- host->class_dev.class = &mmc_host_class;
+- device_initialize(&host->class_dev);
+- }
+-
+- return host;
+-}
+-
+-/*
+- * Internal function. Register a new MMC host with the MMC class.
+- */
+-int mmc_add_host_sysfs(struct mmc_host *host)
+-{
+- int err;
+-
+- if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
+- return -ENOMEM;
+-
+- spin_lock(&mmc_host_lock);
+- err = idr_get_new(&mmc_host_idr, host, &host->index);
+- spin_unlock(&mmc_host_lock);
+- if (err)
+- return err;
+-
+- snprintf(host->class_dev.bus_id, BUS_ID_SIZE,
+- "mmc%d", host->index);
+-
+- return device_add(&host->class_dev);
+-}
+-
+-/*
+- * Internal function. Unregister a MMC host with the MMC class.
+- */
+-void mmc_remove_host_sysfs(struct mmc_host *host)
+-{
+- device_del(&host->class_dev);
+-
+- spin_lock(&mmc_host_lock);
+- idr_remove(&mmc_host_idr, host->index);
+- spin_unlock(&mmc_host_lock);
++ return error;
+ }
+
+-/*
+- * Internal function. Free a MMC host.
+- */
+-void mmc_free_host_sysfs(struct mmc_host *host)
++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs)
+ {
+- put_device(&host->class_dev);
+-}
++ int i;
+
+-static struct workqueue_struct *workqueue;
+-
+-/*
+- * Internal function. Schedule delayed work in the MMC work queue.
+- */
+-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay)
+-{
+- return queue_delayed_work(workqueue, work, delay);
+-}
+-
+-/*
+- * Internal function. Flush all scheduled work from the MMC work queue.
+- */
+-void mmc_flush_scheduled_work(void)
+-{
+- flush_workqueue(workqueue);
+-}
+-
+-static int __init mmc_init(void)
+-{
+- int ret;
+-
+- workqueue = create_singlethread_workqueue("kmmcd");
+- if (!workqueue)
+- return -ENOMEM;
+-
+- ret = bus_register(&mmc_bus_type);
+- if (ret == 0) {
+- ret = class_register(&mmc_host_class);
+- if (ret)
+- bus_unregister(&mmc_bus_type);
+- }
+- return ret;
+-}
+-
+-static void __exit mmc_exit(void)
+-{
+- class_unregister(&mmc_host_class);
+- bus_unregister(&mmc_bus_type);
+- destroy_workqueue(workqueue);
++ for (i = 0; attr_name(attrs[i]); i++)
++ device_remove_file(&card->dev, &attrs[i]);
+ }
+
+-module_init(mmc_init);
+-module_exit(mmc_exit);
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.h linux-2.6.22-591/drivers/mmc/core/sysfs.h
+--- linux-2.6.22-570/drivers/mmc/core/sysfs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/core/sysfs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -11,17 +11,16 @@
+ #ifndef _MMC_CORE_SYSFS_H
+ #define _MMC_CORE_SYSFS_H
+
+-void mmc_init_card(struct mmc_card *card, struct mmc_host *host);
+-int mmc_register_card(struct mmc_card *card);
+-void mmc_remove_card(struct mmc_card *card);
++#define MMC_ATTR_FN(name, fmt, args...) \
++static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \
++{ \
++ struct mmc_card *card = container_of(dev, struct mmc_card, dev);\
++ return sprintf(buf, fmt, args); \
++}
+
+-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev);
+-int mmc_add_host_sysfs(struct mmc_host *host);
+-void mmc_remove_host_sysfs(struct mmc_host *host);
+-void mmc_free_host_sysfs(struct mmc_host *host);
++#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
+
+-int mmc_schedule_work(struct work_struct *work);
+-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay);
+-void mmc_flush_scheduled_work(void);
++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs);
++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs);
+
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/host/sdhci.c linux-2.6.22-591/drivers/mmc/host/sdhci.c
+--- linux-2.6.22-570/drivers/mmc/host/sdhci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mmc/host/sdhci.c 2007-12-21 15:36:12.000000000 -0500
+@@ -70,6 +70,14 @@
+ .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE,
+ },
+
++ {
++ .vendor = PCI_VENDOR_ID_ENE,
++ .device = PCI_DEVICE_ID_ENE_CB712_SD_2,
++ .subvendor = PCI_ANY_ID,
++ .subdevice = PCI_ANY_ID,
++ .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE,
++ },
++
+ { /* Generic SD host controller */
+ PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
+ },
+diff -Nurb linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c
+--- linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/mtd/mtd.h>
+ #include <linux/blkdev.h>
+ #include <linux/blkpg.h>
++#include <linux/freezer.h>
+ #include <linux/spinlock.h>
+ #include <linux/hdreg.h>
+ #include <linux/init.h>
+@@ -80,7 +81,7 @@
+ struct request_queue *rq = tr->blkcore_priv->rq;
+
+ /* we might get involved when memory gets low, so use PF_MEMALLOC */
+- current->flags |= PF_MEMALLOC | PF_NOFREEZE;
++ current->flags |= PF_MEMALLOC;
+
+ spin_lock_irq(rq->queue_lock);
+ while (!kthread_should_stop()) {
+diff -Nurb linux-2.6.22-570/drivers/mtd/ubi/wl.c linux-2.6.22-591/drivers/mtd/ubi/wl.c
+--- linux-2.6.22-570/drivers/mtd/ubi/wl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/mtd/ubi/wl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1346,6 +1346,7 @@
+ ubi_msg("background thread \"%s\" started, PID %d",
+ ubi->bgt_name, current->pid);
+
++ set_freezable();
+ for (;;) {
+ int err;
+
+diff -Nurb linux-2.6.22-570/drivers/net/3c523.c linux-2.6.22-591/drivers/net/3c523.c
+--- linux-2.6.22-570/drivers/net/3c523.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/3c523.c 2007-12-21 15:36:12.000000000 -0500
+@@ -990,7 +990,7 @@
+ if (skb != NULL) {
+ skb_reserve(skb, 2); /* 16 byte alignment */
+ skb_put(skb,totlen);
+- eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
++ skb_copy_to_linear_data(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/7990.c linux-2.6.22-591/drivers/net/7990.c
+--- linux-2.6.22-570/drivers/net/7990.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/7990.c 2007-12-21 15:36:12.000000000 -0500
+@@ -333,9 +333,9 @@
+
+ skb_reserve (skb, 2); /* 16 byte align */
+ skb_put (skb, len); /* make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
+- len, 0);
++ len);
+ skb->protocol = eth_type_trans (skb, dev);
+ netif_rx (skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/8139too.c linux-2.6.22-591/drivers/net/8139too.c
+--- linux-2.6.22-570/drivers/net/8139too.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/8139too.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2017,7 +2017,7 @@
+ #if RX_BUF_IDX == 3
+ wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
+ #else
+- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
++ skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
+ #endif
+ skb_put (skb, pkt_size);
+
+diff -Nurb linux-2.6.22-570/drivers/net/Kconfig linux-2.6.22-591/drivers/net/Kconfig
+--- linux-2.6.22-570/drivers/net/Kconfig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/net/Kconfig 2007-12-21 15:36:14.000000000 -0500
+@@ -119,6 +119,20 @@
+
+ If you don't know what to use this for, you don't need it.
+
++config ETUN
++ tristate "Ethernet tunnel device driver support"
++ depends on SYSFS
++ ---help---
++ ETUN provices a pair of network devices that can be used for
++ configuring interesting topolgies. What one devices transmits
++ the other receives and vice versa. The link level framing
++ is ethernet for wide compatibility with network stacks.
++
++ To compile this driver as a module, choose M here: the module
++ will be called etun.
++
++ If you don't know what to use this for, you don't need it.
++
+ config NET_SB1000
+ tristate "General Instruments Surfboard 1000"
+ depends on PNP
+@@ -2555,6 +2569,18 @@
+
+ source "drivers/s390/net/Kconfig"
+
++config XEN_NETDEV_FRONTEND
++ tristate "Xen network device frontend driver"
++ depends on XEN
++ default y
++ help
++ The network device frontend driver allows the kernel to
++ access network devices exported exported by a virtual
++ machine containing a physical network device driver. The
++ frontend driver is intended for unprivileged guest domains;
++ if you are compiling a kernel for a Xen guest, you almost
++ certainly want to enable this.
++
+ config ISERIES_VETH
+ tristate "iSeries Virtual Ethernet driver support"
+ depends on PPC_ISERIES
+diff -Nurb linux-2.6.22-570/drivers/net/Makefile linux-2.6.22-591/drivers/net/Makefile
+--- linux-2.6.22-570/drivers/net/Makefile 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/net/Makefile 2007-12-21 15:36:14.000000000 -0500
+@@ -186,6 +186,7 @@
+ obj-$(CONFIG_MACMACE) += macmace.o
+ obj-$(CONFIG_MAC89x0) += mac89x0.o
+ obj-$(CONFIG_TUN) += tun.o
++obj-$(CONFIG_ETUN) += etun.o
+ obj-$(CONFIG_NET_NETX) += netx-eth.o
+ obj-$(CONFIG_DL2K) += dl2k.o
+ obj-$(CONFIG_R8169) += r8169.o
+@@ -224,7 +225,10 @@
+ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/
+
+ obj-$(CONFIG_NETCONSOLE) += netconsole.o
++obj-$(CONFIG_KGDBOE) += kgdboe.o
+
+ obj-$(CONFIG_FS_ENET) += fs_enet/
+
+ obj-$(CONFIG_NETXEN_NIC) += netxen/
++obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
++
+diff -Nurb linux-2.6.22-570/drivers/net/a2065.c linux-2.6.22-591/drivers/net/a2065.c
+--- linux-2.6.22-570/drivers/net/a2065.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/a2065.c 2007-12-21 15:36:12.000000000 -0500
+@@ -322,9 +322,9 @@
+
+ skb_reserve (skb, 2); /* 16 byte align */
+ skb_put (skb, len); /* make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
+- len, 0);
++ len);
+ skb->protocol = eth_type_trans (skb, dev);
+ netif_rx (skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/ariadne.c linux-2.6.22-591/drivers/net/ariadne.c
+--- linux-2.6.22-570/drivers/net/ariadne.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ariadne.c 2007-12-21 15:36:12.000000000 -0500
+@@ -746,7 +746,7 @@
+
+ skb_reserve(skb,2); /* 16 byte align */
+ skb_put(skb,pkt_len); /* Make room */
+- eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
++ skb_copy_to_linear_data(skb, (char *)priv->rx_buff[entry], pkt_len);
+ skb->protocol=eth_type_trans(skb,dev);
+ #if 0
+ printk(KERN_DEBUG "RX pkt type 0x%04x from ",
+diff -Nurb linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c
+--- linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c 2007-12-21 15:36:12.000000000 -0500
+@@ -258,7 +258,7 @@
+ skb_reserve(skb, 2);
+ dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
+ length, DMA_FROM_DEVICE);
+- eth_copy_and_sum(skb, ep->rx_buf[entry], length, 0);
++ skb_copy_to_linear_data(skb, ep->rx_buf[entry], length);
+ skb_put(skb, length);
+ skb->protocol = eth_type_trans(skb, dev);
+
+diff -Nurb linux-2.6.22-570/drivers/net/au1000_eth.c linux-2.6.22-591/drivers/net/au1000_eth.c
+--- linux-2.6.22-570/drivers/net/au1000_eth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/au1000_eth.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1205,8 +1205,8 @@
+ continue;
+ }
+ skb_reserve(skb, 2); /* 16 byte IP header align */
+- eth_copy_and_sum(skb,
+- (unsigned char *)pDB->vaddr, frmlen, 0);
++ skb_copy_to_linear_data(skb,
++ (unsigned char *)pDB->vaddr, frmlen);
+ skb_put(skb, frmlen);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb); /* pass the packet to upper layers */
+diff -Nurb linux-2.6.22-570/drivers/net/bnx2.c linux-2.6.22-591/drivers/net/bnx2.c
+--- linux-2.6.22-570/drivers/net/bnx2.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/bnx2.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6490,10 +6490,10 @@
+ memcpy(dev->perm_addr, bp->mac_addr, 6);
+ bp->name = board_info[ent->driver_data].name;
+
+- if (CHIP_NUM(bp) == CHIP_NUM_5709)
+- dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
+- else
+ dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
++ if (CHIP_NUM(bp) == CHIP_NUM_5709)
++ dev->features |= NETIF_F_IPV6_CSUM;
++
+ #ifdef BCM_VLAN
+ dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_3ad.c linux-2.6.22-591/drivers/net/bonding/bond_3ad.c
+--- linux-2.6.22-570/drivers/net/bonding/bond_3ad.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/bonding/bond_3ad.c 2007-12-21 15:36:14.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/ethtool.h>
+ #include <linux/if_bonding.h>
+ #include <linux/pkt_sched.h>
++#include <net/net_namespace.h>
+ #include "bonding.h"
+ #include "bond_3ad.h"
+
+@@ -2448,6 +2449,9 @@
+ struct slave *slave = NULL;
+ int ret = NET_RX_DROP;
+
++ if (dev->nd_net != &init_net)
++ goto out;
++
+ if (!(dev->flags & IFF_MASTER))
+ goto out;
+
+diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_alb.c linux-2.6.22-591/drivers/net/bonding/bond_alb.c
+--- linux-2.6.22-570/drivers/net/bonding/bond_alb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/bonding/bond_alb.c 2007-12-21 15:36:14.000000000 -0500
+@@ -345,6 +345,9 @@
+ struct arp_pkt *arp = (struct arp_pkt *)skb->data;
+ int res = NET_RX_DROP;
+
++ if (bond_dev->nd_net != &init_net)
++ goto out;
++
+ if (!(bond_dev->flags & IFF_MASTER))
+ goto out;
+
+diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_main.c linux-2.6.22-591/drivers/net/bonding/bond_main.c
+--- linux-2.6.22-570/drivers/net/bonding/bond_main.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/net/bonding/bond_main.c 2007-12-21 15:36:14.000000000 -0500
+@@ -75,6 +75,7 @@
+ #include <linux/if_vlan.h>
+ #include <linux/if_bonding.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+ #include "bonding.h"
+ #include "bond_3ad.h"
+ #include "bond_alb.h"
+@@ -2376,6 +2377,7 @@
+ * can tag the ARP with the proper VLAN tag.
+ */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.fl4_dst = targets[i];
+ fl.fl4_tos = RTO_ONLINK;
+
+@@ -2485,6 +2487,9 @@
+ unsigned char *arp_ptr;
+ u32 sip, tip;
+
++ if (dev->nd_net != &init_net)
++ goto out;
++
+ if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
+ goto out;
+
+@@ -3172,7 +3177,7 @@
+ {
+ int len = strlen(DRV_NAME);
+
+- for (bond_proc_dir = proc_net->subdir; bond_proc_dir;
++ for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir;
+ bond_proc_dir = bond_proc_dir->next) {
+ if ((bond_proc_dir->namelen == len) &&
+ !memcmp(bond_proc_dir->name, DRV_NAME, len)) {
+@@ -3181,7 +3186,7 @@
+ }
+
+ if (!bond_proc_dir) {
+- bond_proc_dir = proc_mkdir(DRV_NAME, proc_net);
++ bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
+ if (bond_proc_dir) {
+ bond_proc_dir->owner = THIS_MODULE;
+ } else {
+@@ -3216,7 +3221,7 @@
+ bond_proc_dir->owner = NULL;
+ }
+ } else {
+- remove_proc_entry(DRV_NAME, proc_net);
++ remove_proc_entry(DRV_NAME, init_net.proc_net);
+ bond_proc_dir = NULL;
+ }
+ }
+@@ -3323,6 +3328,9 @@
+ {
+ struct net_device *event_dev = (struct net_device *)ptr;
+
++ if (event_dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ dprintk("event_dev: %s, event: %lx\n",
+ (event_dev ? event_dev->name : "None"),
+ event);
+@@ -3740,7 +3748,7 @@
+ }
+
+ down_write(&(bonding_rwsem));
+- slave_dev = dev_get_by_name(ifr->ifr_slave);
++ slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave);
+
+ dprintk("slave_dev=%p: \n", slave_dev);
+
+diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c
+--- linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c 2007-12-21 15:36:14.000000000 -0500
+@@ -35,6 +35,7 @@
+ #include <linux/ctype.h>
+ #include <linux/inet.h>
+ #include <linux/rtnetlink.h>
++#include <net/net_namespace.h>
+
+ /* #define BONDING_DEBUG 1 */
+ #include "bonding.h"
+@@ -299,7 +300,7 @@
+ read_unlock_bh(&bond->lock);
+ printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n",
+ bond->dev->name, ifname);
+- dev = dev_get_by_name(ifname);
++ dev = dev_get_by_name(&init_net, ifname);
+ if (!dev) {
+ printk(KERN_INFO DRV_NAME
+ ": %s: Interface %s does not exist!\n",
+diff -Nurb linux-2.6.22-570/drivers/net/dl2k.c linux-2.6.22-591/drivers/net/dl2k.c
+--- linux-2.6.22-570/drivers/net/dl2k.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/dl2k.c 2007-12-21 15:36:12.000000000 -0500
+@@ -866,9 +866,9 @@
+ PCI_DMA_FROMDEVICE);
+ /* 16 byte align the IP header */
+ skb_reserve (skb, 2);
+- eth_copy_and_sum (skb,
++ skb_copy_to_linear_data (skb,
+ np->rx_skbuff[entry]->data,
+- pkt_len, 0);
++ pkt_len);
+ skb_put (skb, pkt_len);
+ pci_dma_sync_single_for_device(np->pdev,
+ desc->fraginfo &
+diff -Nurb linux-2.6.22-570/drivers/net/dummy.c linux-2.6.22-591/drivers/net/dummy.c
+--- linux-2.6.22-570/drivers/net/dummy.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/dummy.c 2007-12-21 15:36:12.000000000 -0500
+@@ -34,11 +34,17 @@
+ #include <linux/etherdevice.h>
+ #include <linux/init.h>
+ #include <linux/moduleparam.h>
++#include <linux/rtnetlink.h>
++#include <net/rtnetlink.h>
++
++struct dummy_priv {
++ struct net_device *dev;
++ struct list_head list;
++};
+
+ static int numdummies = 1;
+
+ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev);
+-static struct net_device_stats *dummy_get_stats(struct net_device *dev);
+
+ static int dummy_set_address(struct net_device *dev, void *p)
+ {
+@@ -56,13 +62,13 @@
+ {
+ }
+
+-static void __init dummy_setup(struct net_device *dev)
++static void dummy_setup(struct net_device *dev)
+ {
+ /* Initialize the device structure. */
+- dev->get_stats = dummy_get_stats;
+ dev->hard_start_xmit = dummy_xmit;
+ dev->set_multicast_list = set_multicast_list;
+ dev->set_mac_address = dummy_set_address;
++ dev->destructor = free_netdev;
+
+ /* Fill in device structure with ethernet-generic values. */
+ ether_setup(dev);
+@@ -76,77 +82,114 @@
+
+ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+- struct net_device_stats *stats = netdev_priv(dev);
+-
+- stats->tx_packets++;
+- stats->tx_bytes+=skb->len;
++ dev->stats.tx_packets++;
++ dev->stats.tx_bytes += skb->len;
+
+ dev_kfree_skb(skb);
+ return 0;
+ }
+
+-static struct net_device_stats *dummy_get_stats(struct net_device *dev)
++static LIST_HEAD(dummies);
++
++static int dummy_newlink(struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[])
+ {
+- return netdev_priv(dev);
++ struct dummy_priv *priv = netdev_priv(dev);
++ int err;
++
++ err = register_netdevice(dev);
++ if (err < 0)
++ return err;
++
++ priv->dev = dev;
++ list_add_tail(&priv->list, &dummies);
++ return 0;
++}
++
++static void dummy_dellink(struct net_device *dev)
++{
++ struct dummy_priv *priv = netdev_priv(dev);
++
++ list_del(&priv->list);
++ unregister_netdevice(dev);
+ }
+
+-static struct net_device **dummies;
++static struct rtnl_link_ops dummy_link_ops __read_mostly = {
++ .kind = "dummy",
++ .priv_size = sizeof(struct dummy_priv),
++ .setup = dummy_setup,
++ .newlink = dummy_newlink,
++ .dellink = dummy_dellink,
++};
+
+ /* Number of dummy devices to be set up by this module. */
+ module_param(numdummies, int, 0);
+ MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
+
+-static int __init dummy_init_one(int index)
++static int __init dummy_init_one(void)
+ {
+ struct net_device *dev_dummy;
++ struct dummy_priv *priv;
+ int err;
+
+- dev_dummy = alloc_netdev(sizeof(struct net_device_stats),
+- "dummy%d", dummy_setup);
++ dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d",
++ dummy_setup);
+
+ if (!dev_dummy)
+ return -ENOMEM;
+
+- if ((err = register_netdev(dev_dummy))) {
+- free_netdev(dev_dummy);
+- dev_dummy = NULL;
+- } else {
+- dummies[index] = dev_dummy;
+- }
++ err = dev_alloc_name(dev_dummy, dev_dummy->name);
++ if (err < 0)
++ goto err;
++
++ dev_dummy->rtnl_link_ops = &dummy_link_ops;
++ err = register_netdevice(dev_dummy);
++ if (err < 0)
++ goto err;
++
++ priv = netdev_priv(dev_dummy);
++ priv->dev = dev_dummy;
++ list_add_tail(&priv->list, &dummies);
++ return 0;
+
++err:
++ free_netdev(dev_dummy);
+ return err;
+ }
+
+-static void dummy_free_one(int index)
+-{
+- unregister_netdev(dummies[index]);
+- free_netdev(dummies[index]);
+-}
+-
+ static int __init dummy_init_module(void)
+ {
++ struct dummy_priv *priv, *next;
+ int i, err = 0;
+- dummies = kmalloc(numdummies * sizeof(void *), GFP_KERNEL);
+- if (!dummies)
+- return -ENOMEM;
++
++ rtnl_lock();
++ err = __rtnl_link_register(&dummy_link_ops);
++
+ for (i = 0; i < numdummies && !err; i++)
+- err = dummy_init_one(i);
+- if (err) {
+- i--;
+- while (--i >= 0)
+- dummy_free_one(i);
++ err = dummy_init_one();
++ if (err < 0) {
++ list_for_each_entry_safe(priv, next, &dummies, list)
++ dummy_dellink(priv->dev);
++ __rtnl_link_unregister(&dummy_link_ops);
+ }
++ rtnl_unlock();
++
+ return err;
+ }
+
+ static void __exit dummy_cleanup_module(void)
+ {
+- int i;
+- for (i = 0; i < numdummies; i++)
+- dummy_free_one(i);
+- kfree(dummies);
++ struct dummy_priv *priv, *next;
++
++ rtnl_lock();
++ list_for_each_entry_safe(priv, next, &dummies, list)
++ dummy_dellink(priv->dev);
++
++ __rtnl_link_unregister(&dummy_link_ops);
++ rtnl_unlock();
+ }
+
+ module_init(dummy_init_module);
+ module_exit(dummy_cleanup_module);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_RTNL_LINK("dummy");
+diff -Nurb linux-2.6.22-570/drivers/net/eepro100.c linux-2.6.22-591/drivers/net/eepro100.c
+--- linux-2.6.22-570/drivers/net/eepro100.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/eepro100.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1801,7 +1801,7 @@
+
+ #if 1 || USE_IP_CSUM
+ /* Packet is in one chunk -- we can copy + cksum. */
+- eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, sp->rx_skbuff[entry]->data, pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ skb_copy_from_linear_data(sp->rx_skbuff[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/epic100.c linux-2.6.22-591/drivers/net/epic100.c
+--- linux-2.6.22-570/drivers/net/epic100.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/epic100.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1201,7 +1201,7 @@
+ ep->rx_ring[entry].bufaddr,
+ ep->rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(skb, ep->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len);
+ skb_put(skb, pkt_len);
+ pci_dma_sync_single_for_device(ep->pci_dev,
+ ep->rx_ring[entry].bufaddr,
+diff -Nurb linux-2.6.22-570/drivers/net/eql.c linux-2.6.22-591/drivers/net/eql.c
+--- linux-2.6.22-570/drivers/net/eql.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/eql.c 2007-12-21 15:36:14.000000000 -0500
+@@ -116,6 +116,7 @@
+ #include <linux/init.h>
+ #include <linux/timer.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+
+ #include <linux/if.h>
+ #include <linux/if_arp.h>
+@@ -412,7 +413,7 @@
+ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t)))
+ return -EFAULT;
+
+- slave_dev = dev_get_by_name(srq.slave_name);
++ slave_dev = dev_get_by_name(&init_net, srq.slave_name);
+ if (slave_dev) {
+ if ((master_dev->flags & IFF_UP) == IFF_UP) {
+ /* slave is not a master & not already a slave: */
+@@ -460,7 +461,7 @@
+ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t)))
+ return -EFAULT;
+
+- slave_dev = dev_get_by_name(srq.slave_name);
++ slave_dev = dev_get_by_name(&init_net, srq.slave_name);
+ ret = -EINVAL;
+ if (slave_dev) {
+ spin_lock_bh(&eql->queue.lock);
+@@ -493,7 +494,7 @@
+ if (copy_from_user(&sc, scp, sizeof (slave_config_t)))
+ return -EFAULT;
+
+- slave_dev = dev_get_by_name(sc.slave_name);
++ slave_dev = dev_get_by_name(&init_net, sc.slave_name);
+ if (!slave_dev)
+ return -ENODEV;
+
+@@ -528,7 +529,7 @@
+ if (copy_from_user(&sc, scp, sizeof (slave_config_t)))
+ return -EFAULT;
+
+- slave_dev = dev_get_by_name(sc.slave_name);
++ slave_dev = dev_get_by_name(&init_net, sc.slave_name);
+ if (!slave_dev)
+ return -ENODEV;
+
+diff -Nurb linux-2.6.22-570/drivers/net/etun.c linux-2.6.22-591/drivers/net/etun.c
+--- linux-2.6.22-570/drivers/net/etun.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/net/etun.c 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,489 @@
++/*
++ * ETUN - Universal ETUN device driver.
++ * Copyright (C) 2006 Linux Networx
++ *
++ */
++
++#define DRV_NAME "etun"
++#define DRV_VERSION "1.0"
++#define DRV_DESCRIPTION "Ethernet pseudo tunnel device driver"
++#define DRV_COPYRIGHT "(C) 2007 Linux Networx"
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++#include <linux/if.h>
++#include <linux/if_ether.h>
++#include <linux/ctype.h>
++#include <linux/nsproxy.h>
++#include <net/net_namespace.h>
++#include <net/dst.h>
++
++
++/* Device cheksum strategy.
++ *
++ * etun is designed to a be a pair of virutal devices
++ * connecting two network stack instances.
++ *
++ * Typically it will either be used with ethernet bridging or
++ * it will be used to route packets between the two stacks.
++ *
++ * The only checksum offloading I can do is to completely
++ * skip the checksumming step all together.
++ *
++ * When used for ethernet bridging I don't believe any
++ * checksum off loading is safe.
++ * - If my source is an external interface the checksum may be
++ * invalid so I don't want to report I have already checked it.
++ * - If my destination is an external interface I don't want to put
++ * a packet on the wire with someone computing the checksum.
++ *
++ * When used for routing between two stacks checksums should
++ * be as unnecessary as they are on the loopback device.
++ *
++ * So by default I am safe and disable checksumming and
++ * other advanced features like SG and TSO.
++ *
++ * However because I think these features could be useful
++ * I provide the ethtool functions to and enable/disable
++ * them at runtime.
++ *
++ * If you think you can correctly enable these go ahead.
++ * For checksums both the transmitter and the receiver must
++ * agree before the are actually disabled.
++ */
++
++#define ETUN_NUM_STATS 1
++static struct {
++ const char string[ETH_GSTRING_LEN];
++} ethtool_stats_keys[ETUN_NUM_STATS] = {
++ { "partner_ifindex" },
++};
++
++struct etun_info {
++ struct net_device *rx_dev;
++ unsigned ip_summed;
++ struct net_device_stats stats;
++ struct list_head list;
++ struct net_device *dev;
++};
++
++/*
++ * I have to hold the rtnl_lock during device delete.
++ * So I use the rtnl_lock to protect my list manipulations
++ * as well. Crude but simple.
++ */
++static LIST_HEAD(etun_list);
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev)
++{
++ struct etun_info *tx_info = tx_dev->priv;
++ struct net_device *rx_dev = tx_info->rx_dev;
++ struct etun_info *rx_info = rx_dev->priv;
++
++ tx_info->stats.tx_packets++;
++ tx_info->stats.tx_bytes += skb->len;
++
++ /* Drop the skb state that was needed to get here */
++ skb_orphan(skb);
++ if (skb->dst)
++ skb->dst = dst_pop(skb->dst); /* Allow for smart routing */
++
++ /* Switch to the receiving device */
++ skb->pkt_type = PACKET_HOST;
++ skb->protocol = eth_type_trans(skb, rx_dev);
++ skb->dev = rx_dev;
++ skb->ip_summed = CHECKSUM_NONE;
++
++ /* If both halves agree no checksum is needed */
++ if (tx_dev->features & NETIF_F_NO_CSUM)
++ skb->ip_summed = rx_info->ip_summed;
++
++ rx_dev->last_rx = jiffies;
++ rx_info->stats.rx_packets++;
++ rx_info->stats.rx_bytes += skb->len;
++ netif_rx(skb);
++
++ return 0;
++}
++
++static struct net_device_stats *etun_get_stats(struct net_device *dev)
++{
++ struct etun_info *info = dev->priv;
++ return &info->stats;
++}
++
++/* ethtool interface */
++static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
++{
++ cmd->supported = 0;
++ cmd->advertising = 0;
++ cmd->speed = SPEED_10000; /* Memory is fast! */
++ cmd->duplex = DUPLEX_FULL;
++ cmd->port = PORT_TP;
++ cmd->phy_address = 0;
++ cmd->transceiver = XCVR_INTERNAL;
++ cmd->autoneg = AUTONEG_DISABLE;
++ cmd->maxtxpkt = 0;
++ cmd->maxrxpkt = 0;
++ return 0;
++}
++
++static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
++{
++ strcpy(info->driver, DRV_NAME);
++ strcpy(info->version, DRV_VERSION);
++ strcpy(info->fw_version, "N/A");
++}
++
++static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
++{
++ switch(stringset) {
++ case ETH_SS_STATS:
++ memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys));
++ break;
++ case ETH_SS_TEST:
++ default:
++ break;
++ }
++}
++
++static int etun_get_stats_count(struct net_device *dev)
++{
++ return ETUN_NUM_STATS;
++}
++
++static void etun_get_ethtool_stats(struct net_device *dev,
++ struct ethtool_stats *stats, u64 *data)
++{
++ struct etun_info *info = dev->priv;
++
++ data[0] = info->rx_dev->ifindex;
++}
++
++static u32 etun_get_rx_csum(struct net_device *dev)
++{
++ struct etun_info *info = dev->priv;
++ return info->ip_summed == CHECKSUM_UNNECESSARY;
++}
++
++static int etun_set_rx_csum(struct net_device *dev, u32 data)
++{
++ struct etun_info *info = dev->priv;
++
++ info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
++
++ return 0;
++}
++
++static u32 etun_get_tx_csum(struct net_device *dev)
++{
++ return (dev->features & NETIF_F_NO_CSUM) != 0;
++}
++
++static int etun_set_tx_csum(struct net_device *dev, u32 data)
++{
++ dev->features &= ~NETIF_F_NO_CSUM;
++ if (data)
++ dev->features |= NETIF_F_NO_CSUM;
++
++ return 0;
++}
++
++static struct ethtool_ops etun_ethtool_ops = {
++ .get_settings = etun_get_settings,
++ .get_drvinfo = etun_get_drvinfo,
++ .get_link = ethtool_op_get_link,
++ .get_rx_csum = etun_get_rx_csum,
++ .set_rx_csum = etun_set_rx_csum,
++ .get_tx_csum = etun_get_tx_csum,
++ .set_tx_csum = etun_set_tx_csum,
++ .get_sg = ethtool_op_get_sg,
++ .set_sg = ethtool_op_set_sg,
++#if 0 /* Does just setting the bit successfuly emulate tso? */
++ .get_tso = ethtool_op_get_tso,
++ .set_tso = ethtool_op_set_tso,
++#endif
++ .get_strings = etun_get_strings,
++ .get_stats_count = etun_get_stats_count,
++ .get_ethtool_stats = etun_get_ethtool_stats,
++ .get_perm_addr = ethtool_op_get_perm_addr,
++};
++
++static int etun_open(struct net_device *tx_dev)
++{
++ struct etun_info *tx_info = tx_dev->priv;
++ struct net_device *rx_dev = tx_info->rx_dev;
++ /* If we attempt to bring up etun in the small window before
++ * it is connected to it's partner error.
++ */
++ if (!rx_dev)
++ return -ENOTCONN;
++ if (rx_dev->flags & IFF_UP) {
++ netif_carrier_on(tx_dev);
++ netif_carrier_on(rx_dev);
++ }
++ netif_start_queue(tx_dev);
++ return 0;
++}
++
++static int etun_stop(struct net_device *tx_dev)
++{
++ struct etun_info *tx_info = tx_dev->priv;
++ struct net_device *rx_dev = tx_info->rx_dev;
++ netif_stop_queue(tx_dev);
++ if (netif_carrier_ok(tx_dev)) {
++ netif_carrier_off(tx_dev);
++ netif_carrier_off(rx_dev);
++ }
++ return 0;
++}
++
++static int etun_change_mtu(struct net_device *dev, int new_mtu)
++{
++ /* Don't allow ridiculously small mtus */
++ if (new_mtu < (ETH_ZLEN - ETH_HLEN))
++ return -EINVAL;
++ dev->mtu = new_mtu;
++ return 0;
++}
++
++static void etun_set_multicast_list(struct net_device *dev)
++{
++ /* Nothing sane I can do here */
++ return;
++}
++
++static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
++{
++ return -EOPNOTSUPP;
++}
++
++/* Only allow letters and numbers in an etun device name */
++static int is_valid_name(const char *name)
++{
++ const char *ptr;
++ for (ptr = name; *ptr; ptr++) {
++ if (!isalnum(*ptr))
++ return 0;
++ }
++ return 1;
++}
++
++static struct net_device *etun_alloc(struct net *net, const char *name)
++{
++ struct net_device *dev;
++ struct etun_info *info;
++ int err;
++
++ if (!name || !is_valid_name(name))
++ return ERR_PTR(-EINVAL);
++
++ dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup);
++ if (!dev)
++ return ERR_PTR(-ENOMEM);
++
++ info = dev->priv;
++ info->dev = dev;
++ dev->nd_net = net;
++
++ random_ether_addr(dev->dev_addr);
++ dev->tx_queue_len = 0; /* A queue is silly for a loopback device */
++ dev->hard_start_xmit = etun_xmit;
++ dev->get_stats = etun_get_stats;
++ dev->open = etun_open;
++ dev->stop = etun_stop;
++ dev->set_multicast_list = etun_set_multicast_list;
++ dev->do_ioctl = etun_ioctl;
++ dev->features = NETIF_F_FRAGLIST
++ | NETIF_F_HIGHDMA
++ | NETIF_F_LLTX;
++ dev->flags = IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC;
++ dev->ethtool_ops = &etun_ethtool_ops;
++ dev->destructor = free_netdev;
++ dev->change_mtu = etun_change_mtu;
++ err = register_netdev(dev);
++ if (err) {
++ free_netdev(dev);
++ dev = ERR_PTR(err);
++ goto out;
++ }
++ netif_carrier_off(dev);
++out:
++ return dev;
++}
++
++static int etun_alloc_pair(struct net *net, const char *name0, const char *name1)
++{
++ struct net_device *dev0, *dev1;
++ struct etun_info *info0, *info1;
++
++ dev0 = etun_alloc(net, name0);
++ if (IS_ERR(dev0)) {
++ return PTR_ERR(dev0);
++ }
++ info0 = dev0->priv;
++
++ dev1 = etun_alloc(net, name1);
++ if (IS_ERR(dev1)) {
++ unregister_netdev(dev0);
++ return PTR_ERR(dev1);
++ }
++ info1 = dev1->priv;
++
++ dev_hold(dev0);
++ dev_hold(dev1);
++ info0->rx_dev = dev1;
++ info1->rx_dev = dev0;
++
++ /* Only place one member of the pair on the list
++ * so I don't confuse list_for_each_entry_safe,
++ * by deleting two list entries at once.
++ */
++ rtnl_lock();
++ list_add(&info0->list, &etun_list);
++ INIT_LIST_HEAD(&info1->list);
++ rtnl_unlock();
++
++ return 0;
++}
++
++static int etun_unregister_pair(struct net_device *dev0)
++{
++ struct etun_info *info0, *info1;
++ struct net_device *dev1;
++
++ ASSERT_RTNL();
++
++ if (!dev0)
++ return -ENODEV;
++
++ /* Ensure my network devices are not passing packets */
++ dev_close(dev0);
++ info0 = dev0->priv;
++ dev1 = info0->rx_dev;
++ info1 = dev1->priv;
++ dev_close(dev1);
++
++ /* Drop the cross device references */
++ dev_put(dev0);
++ dev_put(dev1);
++
++ /* Remove from the etun list */
++ if (!list_empty(&info0->list))
++ list_del_init(&info0->list);
++ if (!list_empty(&info1->list))
++ list_del_init(&info1->list);
++
++ unregister_netdevice(dev0);
++ unregister_netdevice(dev1);
++ return 0;
++}
++
++static int etun_noget(char *buffer, struct kernel_param *kp)
++{
++ return 0;
++}
++
++static int etun_newif(const char *val, struct kernel_param *kp)
++{
++ char name0[IFNAMSIZ], name1[IFNAMSIZ];
++ const char *mid;
++ int len, len0, len1;
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
++ /* Avoid frustration by removing trailing whitespace */
++ len = strlen(val);
++ while (isspace(val[len - 1]))
++ len--;
++
++ /* Split the string into 2 names */
++ mid = memchr(val, ',', len);
++ if (!mid)
++ return -EINVAL;
++
++ /* Get the first device name */
++ len0 = mid - val;
++ if (len0 > sizeof(name0) - 1)
++ len = sizeof(name0) - 1;
++ strncpy(name0, val, len0);
++ name0[len0] = '\0';
++
++ /* And the second device name */
++ len1 = len - (len0 + 1);
++ if (len1 > sizeof(name1) - 1)
++ len1 = sizeof(name1) - 1;
++ strncpy(name1, mid + 1, len1);
++ name1[len1] = '\0';
++
++ return etun_alloc_pair(current->nsproxy->net_ns, name0, name1);
++}
++
++static int etun_delif(const char *val, struct kernel_param *kp)
++{
++ char name[IFNAMSIZ];
++ int len;
++ struct net_device *dev;
++ int err;
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
++ /* Avoid frustration by removing trailing whitespace */
++ len = strlen(val);
++ while (isspace(val[len - 1]))
++ len--;
++
++ /* Get the device name */
++ if (len > sizeof(name) - 1)
++ return -EINVAL;
++ strncpy(name, val, len);
++ name[len] = '\0';
++
++ /* Double check I don't have strange characters in my device name */
++ if (!is_valid_name(name))
++ return -EINVAL;
++
++ rtnl_lock();
++ err = -ENODEV;
++ dev = __dev_get_by_name(current->nsproxy->net_ns, name);
++ err = etun_unregister_pair(dev);
++ rtnl_unlock();
++ return err;
++}
++
++static int __init etun_init(void)
++{
++ printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
++ printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT);
++
++ return 0;
++}
++
++static void etun_cleanup(void)
++{
++ struct etun_info *info, *tmp;
++ rtnl_lock();
++ list_for_each_entry_safe(info, tmp, &etun_list, list) {
++ etun_unregister_pair(info->dev);
++ }
++ rtnl_unlock();
++}
++
++module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR);
++module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR);
++module_init(etun_init);
++module_exit(etun_cleanup);
++MODULE_DESCRIPTION(DRV_DESCRIPTION);
++MODULE_AUTHOR("Eric Biederman <ebiederm@xmission.com>");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/net/fealnx.c linux-2.6.22-591/drivers/net/fealnx.c
+--- linux-2.6.22-570/drivers/net/fealnx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/fealnx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1727,8 +1727,8 @@
+ /* Call copy + cksum if available. */
+
+ #if ! defined(__alpha__)
+- eth_copy_and_sum(skb,
+- np->cur_rx->skbuff->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb,
++ np->cur_rx->skbuff->data, pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/fec.c linux-2.6.22-591/drivers/net/fec.c
+--- linux-2.6.22-570/drivers/net/fec.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/fec.c 2007-12-21 15:36:12.000000000 -0500
+@@ -648,7 +648,7 @@
+ fep->stats.rx_dropped++;
+ } else {
+ skb_put(skb,pkt_len-4); /* Make room */
+- eth_copy_and_sum(skb, data, pkt_len-4, 0);
++ skb_copy_to_linear_data(skb, data, pkt_len-4);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ }
+diff -Nurb linux-2.6.22-570/drivers/net/hamachi.c linux-2.6.22-591/drivers/net/hamachi.c
+--- linux-2.6.22-570/drivers/net/hamachi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/hamachi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1575,8 +1575,8 @@
+ PCI_DMA_FROMDEVICE);
+ /* Call copy + cksum if available. */
+ #if 1 || USE_IP_COPYSUM
+- eth_copy_and_sum(skb,
+- hmp->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb,
++ hmp->rx_skbuff[entry]->data, pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma
+diff -Nurb linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c
+--- linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -320,7 +320,7 @@
+ sprintf(portarg, "%ld", bc->pdev->port->base);
+ printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
+
+- return call_usermodehelper(eppconfig_path, argv, envp, 1);
++ return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
+ }
+
+ /* ---------------------------------------------------------------------- */
+diff -Nurb linux-2.6.22-570/drivers/net/hamradio/bpqether.c linux-2.6.22-591/drivers/net/hamradio/bpqether.c
+--- linux-2.6.22-570/drivers/net/hamradio/bpqether.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/hamradio/bpqether.c 2007-12-21 15:36:14.000000000 -0500
+@@ -83,6 +83,7 @@
+
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+ #include <linux/bpqether.h>
+
+@@ -172,6 +173,9 @@
+ struct ethhdr *eth;
+ struct bpqdev *bpq;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ return NET_RX_DROP;
+
+@@ -559,6 +563,9 @@
+ {
+ struct net_device *dev = (struct net_device *)ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (!dev_is_ethdev(dev))
+ return NOTIFY_DONE;
+
+@@ -594,7 +601,7 @@
+ static int __init bpq_init_driver(void)
+ {
+ #ifdef CONFIG_PROC_FS
+- if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) {
++ if (!proc_net_fops_create(&init_net, "bpqether", S_IRUGO, &bpq_info_fops)) {
+ printk(KERN_ERR
+ "bpq: cannot create /proc/net/bpqether entry.\n");
+ return -ENOENT;
+@@ -618,7 +625,7 @@
+
+ unregister_netdevice_notifier(&bpq_dev_notifier);
+
+- proc_net_remove("bpqether");
++ proc_net_remove(&init_net, "bpqether");
+
+ rtnl_lock();
+ while (!list_empty(&bpq_devices)) {
+diff -Nurb linux-2.6.22-570/drivers/net/hamradio/scc.c linux-2.6.22-591/drivers/net/hamradio/scc.c
+--- linux-2.6.22-570/drivers/net/hamradio/scc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/hamradio/scc.c 2007-12-21 15:36:14.000000000 -0500
+@@ -174,6 +174,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/bitops.h>
+
++#include <net/net_namespace.h>
+ #include <net/ax25.h>
+
+ #include <asm/irq.h>
+@@ -2114,7 +2115,7 @@
+ }
+ rtnl_unlock();
+
+- proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops);
++ proc_net_fops_create(&init_net, "z8530drv", 0, &scc_net_seq_fops);
+
+ return 0;
+ }
+@@ -2169,7 +2170,7 @@
+ if (Vector_Latch)
+ release_region(Vector_Latch, 1);
+
+- proc_net_remove("z8530drv");
++ proc_net_remove(&init_net, "z8530drv");
+ }
+
+ MODULE_AUTHOR("Joerg Reuter <jreuter@yaina.de>");
+diff -Nurb linux-2.6.22-570/drivers/net/hamradio/yam.c linux-2.6.22-591/drivers/net/hamradio/yam.c
+--- linux-2.6.22-570/drivers/net/hamradio/yam.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/hamradio/yam.c 2007-12-21 15:36:14.000000000 -0500
+@@ -61,6 +61,7 @@
+ #include <linux/etherdevice.h>
+ #include <linux/skbuff.h>
+ #include <net/ax25.h>
++#include <net/net_namespace.h>
+
+ #include <linux/kernel.h>
+ #include <linux/proc_fs.h>
+@@ -1142,7 +1143,7 @@
+ yam_timer.expires = jiffies + HZ / 100;
+ add_timer(&yam_timer);
+
+- proc_net_fops_create("yam", S_IRUGO, &yam_info_fops);
++ proc_net_fops_create(&init_net, "yam", S_IRUGO, &yam_info_fops);
+ return 0;
+ error:
+ while (--i >= 0) {
+@@ -1174,7 +1175,7 @@
+ kfree(p);
+ }
+
+- proc_net_remove("yam");
++ proc_net_remove(&init_net, "yam");
+ }
+
+ /* --------------------------------------------------------------------- */
+diff -Nurb linux-2.6.22-570/drivers/net/ibmveth.c linux-2.6.22-591/drivers/net/ibmveth.c
+--- linux-2.6.22-570/drivers/net/ibmveth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ibmveth.c 2007-12-21 15:36:14.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <linux/mm.h>
+ #include <linux/ethtool.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+ #include <asm/semaphore.h>
+ #include <asm/hvcall.h>
+ #include <asm/atomic.h>
+@@ -97,7 +98,7 @@
+ static struct kobj_type ktype_veth_pool;
+
+ #ifdef CONFIG_PROC_FS
+-#define IBMVETH_PROC_DIR "net/ibmveth"
++#define IBMVETH_PROC_DIR "ibmveth"
+ static struct proc_dir_entry *ibmveth_proc_dir;
+ #endif
+
+@@ -1093,7 +1094,7 @@
+ #ifdef CONFIG_PROC_FS
+ static void ibmveth_proc_register_driver(void)
+ {
+- ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL);
++ ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, init_net.proc_net);
+ if (ibmveth_proc_dir) {
+ SET_MODULE_OWNER(ibmveth_proc_dir);
+ }
+@@ -1101,7 +1102,7 @@
+
+ static void ibmveth_proc_unregister_driver(void)
+ {
+- remove_proc_entry(IBMVETH_PROC_DIR, NULL);
++ remove_proc_entry(IBMVETH_PROC_DIR, init_net.proc_net);
+ }
+
+ static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos)
+@@ -1337,7 +1338,7 @@
+
+ #define ATTR(_name, _mode) \
+ struct attribute veth_##_name##_attr = { \
+- .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \
++ .name = __stringify(_name), .mode = _mode, \
+ };
+
+ static ATTR(active, 0644);
+diff -Nurb linux-2.6.22-570/drivers/net/ifb.c linux-2.6.22-591/drivers/net/ifb.c
+--- linux-2.6.22-570/drivers/net/ifb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ifb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -33,12 +33,15 @@
+ #include <linux/etherdevice.h>
+ #include <linux/init.h>
+ #include <linux/moduleparam.h>
++#include <linux/list.h>
+ #include <net/pkt_sched.h>
+
+ #define TX_TIMEOUT (2*HZ)
+
+ #define TX_Q_LIMIT 32
+ struct ifb_private {
++ struct list_head list;
++ struct net_device *dev;
+ struct net_device_stats stats;
+ struct tasklet_struct ifb_tasklet;
+ int tasklet_pending;
+@@ -136,13 +139,14 @@
+
+ }
+
+-static void __init ifb_setup(struct net_device *dev)
++static void ifb_setup(struct net_device *dev)
+ {
+ /* Initialize the device structure. */
+ dev->get_stats = ifb_get_stats;
+ dev->hard_start_xmit = ifb_xmit;
+ dev->open = &ifb_open;
+ dev->stop = &ifb_close;
++ dev->destructor = free_netdev;
+
+ /* Fill in device structure with ethernet-generic values. */
+ ether_setup(dev);
+@@ -197,7 +201,7 @@
+ return stats;
+ }
+
+-static struct net_device **ifbs;
++static LIST_HEAD(ifbs);
+
+ /* Number of ifb devices to be set up by this module. */
+ module_param(numifbs, int, 0);
+@@ -226,9 +230,41 @@
+ return 0;
+ }
+
++static int ifb_newlink(struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[])
++{
++ struct ifb_private *priv = netdev_priv(dev);
++ int err;
++
++ err = register_netdevice(dev);
++ if (err < 0)
++ return err;
++
++ priv->dev = dev;
++ list_add_tail(&priv->list, &ifbs);
++ return 0;
++}
++
++static void ifb_dellink(struct net_device *dev)
++{
++ struct ifb_private *priv = netdev_priv(dev);
++
++ list_del(&priv->list);
++ unregister_netdevice(dev);
++}
++
++static struct rtnl_link_ops ifb_link_ops __read_mostly = {
++ .kind = "ifb",
++ .priv_size = sizeof(struct ifb_private),
++ .setup = ifb_setup,
++ .newlink = ifb_newlink,
++ .dellink = ifb_dellink,
++};
++
+ static int __init ifb_init_one(int index)
+ {
+ struct net_device *dev_ifb;
++ struct ifb_private *priv;
+ int err;
+
+ dev_ifb = alloc_netdev(sizeof(struct ifb_private),
+@@ -237,49 +273,59 @@
+ if (!dev_ifb)
+ return -ENOMEM;
+
+- if ((err = register_netdev(dev_ifb))) {
+- free_netdev(dev_ifb);
+- dev_ifb = NULL;
+- } else {
+- ifbs[index] = dev_ifb;
+- }
++ err = dev_alloc_name(dev_ifb, dev_ifb->name);
++ if (err < 0)
++ goto err;
++
++ dev_ifb->rtnl_link_ops = &ifb_link_ops;
++ err = register_netdevice(dev_ifb);
++ if (err < 0)
++ goto err;
++
++ priv = netdev_priv(dev_ifb);
++ priv->dev = dev_ifb;
++ list_add_tail(&priv->list, &ifbs);
++ return 0;
+
++err:
++ free_netdev(dev_ifb);
+ return err;
+ }
+
+-static void ifb_free_one(int index)
+-{
+- unregister_netdev(ifbs[index]);
+- free_netdev(ifbs[index]);
+-}
+-
+ static int __init ifb_init_module(void)
+ {
+- int i, err = 0;
+- ifbs = kmalloc(numifbs * sizeof(void *), GFP_KERNEL);
+- if (!ifbs)
+- return -ENOMEM;
++ struct ifb_private *priv, *next;
++ int i, err;
++
++ rtnl_lock();
++ err = __rtnl_link_register(&ifb_link_ops);
++
+ for (i = 0; i < numifbs && !err; i++)
+ err = ifb_init_one(i);
+ if (err) {
+- i--;
+- while (--i >= 0)
+- ifb_free_one(i);
++ list_for_each_entry_safe(priv, next, &ifbs, list)
++ ifb_dellink(priv->dev);
++ __rtnl_link_unregister(&ifb_link_ops);
+ }
++ rtnl_unlock();
+
+ return err;
+ }
+
+ static void __exit ifb_cleanup_module(void)
+ {
+- int i;
++ struct ifb_private *priv, *next;
++
++ rtnl_lock();
++ list_for_each_entry_safe(priv, next, &ifbs, list)
++ ifb_dellink(priv->dev);
+
+- for (i = 0; i < numifbs; i++)
+- ifb_free_one(i);
+- kfree(ifbs);
++ __rtnl_link_unregister(&ifb_link_ops);
++ rtnl_unlock();
+ }
+
+ module_init(ifb_init_module);
+ module_exit(ifb_cleanup_module);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Jamal Hadi Salim");
++MODULE_ALIAS_RTNL_LINK("ifb");
+diff -Nurb linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c
+--- linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c 2007-12-21 15:36:12.000000000 -0500
+@@ -111,7 +111,7 @@
+ skb = dev_alloc_skb(desc->pkt_length + 2);
+ if (likely(skb != NULL)) {
+ skb_reserve(skb, 2);
+- eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
++ skb_copy_to_linear_data(skb, buf, desc->pkt_length);
+ skb_put(skb, desc->pkt_length);
+ skb->protocol = eth_type_trans(skb, nds[desc->channel]);
+
+diff -Nurb linux-2.6.22-570/drivers/net/kgdboe.c linux-2.6.22-591/drivers/net/kgdboe.c
+--- linux-2.6.22-570/drivers/net/kgdboe.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/net/kgdboe.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,294 @@
++/*
++ * drivers/net/kgdboe.c
++ *
++ * A network interface for GDB.
++ * Based upon 'gdbserial' by David Grothe <dave@gcom.com>
++ * and Scott Foehner <sfoehner@engr.sgi.com>
++ *
++ * Maintainers: Amit S. Kale <amitkale@linsyssoft.com> and
++ * Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2004 (c) Amit S. Kale <amitkale@linsyssoft.com>
++ * 2004-2005 (c) MontaVista Software, Inc.
++ * 2005 (c) Wind River Systems, Inc.
++ *
++ * Contributors at various stages not listed above:
++ * San Mehat <nettwerk@biodome.org>, Robert Walsh <rjwalsh@durables.org>,
++ * wangdi <wangdi@clusterfs.com>, Matt Mackall <mpm@selenic.com>,
++ * Pavel Machek <pavel@suse.cz>, Jason Wessel <jason.wessel@windriver.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/string.h>
++#include <linux/kgdb.h>
++#include <linux/netpoll.h>
++#include <linux/init.h>
++
++#include <asm/atomic.h>
++
++#define IN_BUF_SIZE 512 /* power of 2, please */
++#define NOT_CONFIGURED_STRING "not_configured"
++#define OUT_BUF_SIZE 30 /* We don't want to send too big of a packet. */
++#define MAX_KGDBOE_CONFIG_STR 256
++
++static char in_buf[IN_BUF_SIZE], out_buf[OUT_BUF_SIZE];
++static int in_head, in_tail, out_count;
++static atomic_t in_count;
++/* 0 = unconfigured, 1 = netpoll options parsed, 2 = fully configured. */
++static int configured;
++static struct kgdb_io local_kgdb_io_ops;
++static int use_dynamic_mac;
++
++MODULE_DESCRIPTION("KGDB driver for network interfaces");
++MODULE_LICENSE("GPL");
++static char config[MAX_KGDBOE_CONFIG_STR] = NOT_CONFIGURED_STRING;
++static struct kparam_string kps = {
++ .string = config,
++ .maxlen = MAX_KGDBOE_CONFIG_STR,
++};
++
++static void rx_hook(struct netpoll *np, int port, char *msg, int len,
++ struct sk_buff *skb)
++{
++ int i;
++
++ np->remote_port = port;
++
++ /* Copy the MAC address if we need to. */
++ if (use_dynamic_mac) {
++ memcpy(np->remote_mac, eth_hdr(skb)->h_source,
++ sizeof(np->remote_mac));
++ use_dynamic_mac = 0;
++ }
++
++ /*
++ * This could be GDB trying to attach. But it could also be GDB
++ * finishing up a session, with kgdb_connected=0 but GDB sending
++ * an ACK for the final packet. To make sure we don't try and
++ * make a breakpoint when GDB is leaving, make sure that if
++ * !kgdb_connected the only len == 1 packet we allow is ^C.
++ */
++ if (!kgdb_connected && (len != 1 || msg[0] == 3) &&
++ !atomic_read(&kgdb_setting_breakpoint)) {
++ tasklet_schedule(&kgdb_tasklet_breakpoint);
++ }
++
++ for (i = 0; i < len; i++) {
++ if (msg[i] == 3)
++ tasklet_schedule(&kgdb_tasklet_breakpoint);
++
++ if (atomic_read(&in_count) >= IN_BUF_SIZE) {
++ /* buffer overflow, clear it */
++ in_head = in_tail = 0;
++ atomic_set(&in_count, 0);
++ break;
++ }
++ in_buf[in_head++] = msg[i];
++ in_head &= (IN_BUF_SIZE - 1);
++ atomic_inc(&in_count);
++ }
++}
++
++static struct netpoll np = {
++ .dev_name = "eth0",
++ .name = "kgdboe",
++ .rx_hook = rx_hook,
++ .local_port = 6443,
++ .remote_port = 6442,
++ .remote_mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++};
++
++static void eth_pre_exception_handler(void)
++{
++ /* Increment the module count when the debugger is active */
++ if (!kgdb_connected)
++ try_module_get(THIS_MODULE);
++ netpoll_set_trap(1);
++}
++
++static void eth_post_exception_handler(void)
++{
++ /* decrement the module count when the debugger detaches */
++ if (!kgdb_connected)
++ module_put(THIS_MODULE);
++ netpoll_set_trap(0);
++}
++
++static int eth_get_char(void)
++{
++ int chr;
++
++ while (atomic_read(&in_count) == 0)
++ netpoll_poll(&np);
++
++ chr = in_buf[in_tail++];
++ in_tail &= (IN_BUF_SIZE - 1);
++ atomic_dec(&in_count);
++ return chr;
++}
++
++static void eth_flush_buf(void)
++{
++ if (out_count && np.dev) {
++ netpoll_send_udp(&np, out_buf, out_count);
++ memset(out_buf, 0, sizeof(out_buf));
++ out_count = 0;
++ }
++}
++
++static void eth_put_char(u8 chr)
++{
++ out_buf[out_count++] = chr;
++ if (out_count == OUT_BUF_SIZE)
++ eth_flush_buf();
++}
++
++static int option_setup(char *opt)
++{
++ char opt_scratch[MAX_KGDBOE_CONFIG_STR];
++
++ /* If we're being given a new configuration, copy it in. */
++ if (opt != config)
++ strcpy(config, opt);
++ /* But work on a copy as netpoll_parse_options will eat it. */
++ strcpy(opt_scratch, opt);
++ configured = !netpoll_parse_options(&np, opt_scratch);
++
++ use_dynamic_mac = 1;
++
++ return 0;
++}
++__setup("kgdboe=", option_setup);
++
++/* With our config string set by some means, configure kgdboe. */
++static int configure_kgdboe(void)
++{
++ /* Try out the string. */
++ option_setup(config);
++
++ if (!configured) {
++ printk(KERN_ERR "kgdboe: configuration incorrect - kgdboe not "
++ "loaded.\n");
++ printk(KERN_ERR " Usage: kgdboe=[src-port]@[src-ip]/[dev],"
++ "[tgt-port]@<tgt-ip>/<tgt-macaddr>\n");
++ return -EINVAL;
++ }
++
++ /* Bring it up. */
++ if (netpoll_setup(&np)) {
++ printk(KERN_ERR "kgdboe: netpoll_setup failed kgdboe failed\n");
++ return -EINVAL;
++ }
++
++ if (kgdb_register_io_module(&local_kgdb_io_ops)) {
++ netpoll_cleanup(&np);
++ return -EINVAL;
++ }
++
++ configured = 2;
++
++ return 0;
++}
++
++static int init_kgdboe(void)
++{
++ int ret;
++
++ /* Already done? */
++ if (configured == 2)
++ return 0;
++
++ /* OK, go ahead and do it. */
++ ret = configure_kgdboe();
++
++ if (configured == 2)
++ printk(KERN_INFO "kgdboe: debugging over ethernet enabled\n");
++
++ return ret;
++}
++
++static void cleanup_kgdboe(void)
++{
++ netpoll_cleanup(&np);
++ configured = 0;
++ kgdb_unregister_io_module(&local_kgdb_io_ops);
++}
++
++static int param_set_kgdboe_var(const char *kmessage, struct kernel_param *kp)
++{
++ char kmessage_save[MAX_KGDBOE_CONFIG_STR];
++ int msg_len = strlen(kmessage);
++
++ if (msg_len + 1 > MAX_KGDBOE_CONFIG_STR) {
++ printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
++ kp->name, MAX_KGDBOE_CONFIG_STR - 1);
++ return -ENOSPC;
++ }
++
++ if (kgdb_connected) {
++ printk(KERN_ERR "kgdboe: Cannot reconfigure while KGDB is "
++ "connected.\n");
++ return 0;
++ }
++
++ /* Start the reconfiguration process by saving the old string */
++ strncpy(kmessage_save, config, sizeof(kmessage_save));
++
++
++ /* Copy in the new param and strip out invalid characters so we
++ * can optionally specify the MAC.
++ */
++ strncpy(config, kmessage, sizeof(config));
++ msg_len--;
++ while (msg_len > 0 &&
++ (config[msg_len] < ',' || config[msg_len] > 'f')) {
++ config[msg_len] = '\0';
++ msg_len--;
++ }
++
++ /* Check to see if we are unconfiguring the io module and that it
++ * was in a fully configured state, as this is the only time that
++ * netpoll_cleanup should get called
++ */
++ if (configured == 2 && strcmp(config, NOT_CONFIGURED_STRING) == 0) {
++ printk(KERN_INFO "kgdboe: reverting to unconfigured state\n");
++ cleanup_kgdboe();
++ return 0;
++ } else
++ /* Go and configure with the new params. */
++ configure_kgdboe();
++
++ if (configured == 2)
++ return 0;
++
++ /* If the new string was invalid, revert to the previous state, which
++ * is at a minimum not_configured. */
++ strncpy(config, kmessage_save, sizeof(config));
++ if (strcmp(kmessage_save, NOT_CONFIGURED_STRING) != 0) {
++ printk(KERN_INFO "kgdboe: reverting to prior configuration\n");
++ /* revert back to the original config */
++ strncpy(config, kmessage_save, sizeof(config));
++ configure_kgdboe();
++ }
++ return 0;
++}
++
++static struct kgdb_io local_kgdb_io_ops = {
++ .read_char = eth_get_char,
++ .write_char = eth_put_char,
++ .init = init_kgdboe,
++ .flush = eth_flush_buf,
++ .pre_exception = eth_pre_exception_handler,
++ .post_exception = eth_post_exception_handler
++};
++
++module_init(init_kgdboe);
++module_exit(cleanup_kgdboe);
++module_param_call(kgdboe, param_set_kgdboe_var, param_get_string, &kps, 0644);
++MODULE_PARM_DESC(kgdboe, " kgdboe=[src-port]@[src-ip]/[dev],"
++ "[tgt-port]@<tgt-ip>/<tgt-macaddr>\n");
+diff -Nurb linux-2.6.22-570/drivers/net/lance.c linux-2.6.22-591/drivers/net/lance.c
+--- linux-2.6.22-570/drivers/net/lance.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/lance.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1186,9 +1186,9 @@
+ }
+ skb_reserve(skb,2); /* 16 byte align */
+ skb_put(skb,pkt_len); /* Make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)isa_bus_to_virt((lp->rx_ring[entry].base & 0x00ffffff)),
+- pkt_len,0);
++ pkt_len);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/loopback.c linux-2.6.22-591/drivers/net/loopback.c
+--- linux-2.6.22-570/drivers/net/loopback.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/loopback.c 2007-12-21 15:36:14.000000000 -0500
+@@ -57,6 +57,7 @@
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <linux/percpu.h>
++#include <net/net_namespace.h>
+
+ struct pcpu_lstats {
+ unsigned long packets;
+@@ -199,39 +200,52 @@
+ .get_rx_csum = always_on,
+ };
+
++static int loopback_net_init(struct net *net)
++{
++ struct net_device *lo = &net->loopback_dev;
+ /*
+ * The loopback device is special. There is only one instance and
+ * it is statically allocated. Don't do this for other devices.
+ */
+-struct net_device loopback_dev = {
+- .name = "lo",
+- .get_stats = &get_stats,
+- .mtu = (16 * 1024) + 20 + 20 + 12,
+- .hard_start_xmit = loopback_xmit,
+- .hard_header = eth_header,
+- .hard_header_cache = eth_header_cache,
+- .header_cache_update = eth_header_cache_update,
+- .hard_header_len = ETH_HLEN, /* 14 */
+- .addr_len = ETH_ALEN, /* 6 */
+- .tx_queue_len = 0,
+- .type = ARPHRD_LOOPBACK, /* 0x0001*/
+- .rebuild_header = eth_rebuild_header,
+- .flags = IFF_LOOPBACK,
+- .features = NETIF_F_SG | NETIF_F_FRAGLIST
++ strcpy(lo->name, "lo");
++ lo->get_stats = &get_stats,
++ lo->mtu = (16 * 1024) + 20 + 20 + 12,
++ lo->hard_start_xmit = loopback_xmit,
++ lo->hard_header = eth_header,
++ lo->hard_header_cache = eth_header_cache,
++ lo->header_cache_update = eth_header_cache_update,
++ lo->hard_header_len = ETH_HLEN, /* 14 */
++ lo->addr_len = ETH_ALEN, /* 6 */
++ lo->tx_queue_len = 0,
++ lo->type = ARPHRD_LOOPBACK, /* 0x0001*/
++ lo->rebuild_header = eth_rebuild_header,
++ lo->flags = IFF_LOOPBACK,
++ lo->features = NETIF_F_SG | NETIF_F_FRAGLIST
+ #ifdef LOOPBACK_TSO
+ | NETIF_F_TSO
+ #endif
+ | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA
+- | NETIF_F_LLTX,
+- .ethtool_ops = &loopback_ethtool_ops,
++ | NETIF_F_LLTX
++ | NETIF_F_NETNS_LOCAL,
++ lo->ethtool_ops = &loopback_ethtool_ops,
++ lo->nd_net = net;
++ return register_netdev(lo);
++}
++
++static void loopback_net_exit(struct net *net)
++{
++ unregister_netdev(&net->loopback_dev);
++}
++
++static struct pernet_operations loopback_net_ops = {
++ .init = loopback_net_init,
++ .exit = loopback_net_exit,
+ };
+
+ /* Setup and register the loopback device. */
+ static int __init loopback_init(void)
+ {
+- return register_netdev(&loopback_dev);
++ return register_pernet_device(&loopback_net_ops);
+ };
+
+ module_init(loopback_init);
+-
+-EXPORT_SYMBOL(loopback_dev);
+diff -Nurb linux-2.6.22-570/drivers/net/natsemi.c linux-2.6.22-591/drivers/net/natsemi.c
+--- linux-2.6.22-570/drivers/net/natsemi.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/net/natsemi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2357,8 +2357,8 @@
+ np->rx_dma[entry],
+ buflen,
+ PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(skb,
+- np->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb,
++ np->rx_skbuff[entry]->data, pkt_len);
+ skb_put(skb, pkt_len);
+ pci_dma_sync_single_for_device(np->pci_dev,
+ np->rx_dma[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/ni52.c linux-2.6.22-591/drivers/net/ni52.c
+--- linux-2.6.22-570/drivers/net/ni52.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ni52.c 2007-12-21 15:36:12.000000000 -0500
+@@ -936,7 +936,7 @@
+ {
+ skb_reserve(skb,2);
+ skb_put(skb,totlen);
+- eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
++ skb_copy_to_linear_data(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/ni65.c linux-2.6.22-591/drivers/net/ni65.c
+--- linux-2.6.22-570/drivers/net/ni65.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/ni65.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1096,7 +1096,7 @@
+ #ifdef RCV_VIA_SKB
+ if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
+ skb_put(skb,len);
+- eth_copy_and_sum(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len,0);
++ skb_copy_to_linear_data(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len);
+ }
+ else {
+ struct sk_buff *skb1 = p->recv_skb[p->rmdnum];
+@@ -1108,7 +1108,7 @@
+ }
+ #else
+ skb_put(skb,len);
+- eth_copy_and_sum(skb, (unsigned char *) p->recvbounce[p->rmdnum],len,0);
++ skb_copy_to_linear_data(skb, (unsigned char *) p->recvbounce[p->rmdnum],len);
+ #endif
+ p->stats.rx_packets++;
+ p->stats.rx_bytes += len;
+diff -Nurb linux-2.6.22-570/drivers/net/pci-skeleton.c linux-2.6.22-591/drivers/net/pci-skeleton.c
+--- linux-2.6.22-570/drivers/net/pci-skeleton.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/pci-skeleton.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1567,7 +1567,7 @@
+ if (skb) {
+ skb_reserve (skb, 2); /* 16 byte align the IP fields. */
+
+- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
++ skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
+ skb_put (skb, pkt_size);
+
+ skb->protocol = eth_type_trans (skb, dev);
+diff -Nurb linux-2.6.22-570/drivers/net/pcnet32.c linux-2.6.22-591/drivers/net/pcnet32.c
+--- linux-2.6.22-570/drivers/net/pcnet32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/pcnet32.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1235,9 +1235,9 @@
+ lp->rx_dma_addr[entry],
+ pkt_len,
+ PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)(lp->rx_skbuff[entry]->data),
+- pkt_len, 0);
++ pkt_len);
+ pci_dma_sync_single_for_device(lp->pci_dev,
+ lp->rx_dma_addr[entry],
+ pkt_len,
+diff -Nurb linux-2.6.22-570/drivers/net/pppoe.c linux-2.6.22-591/drivers/net/pppoe.c
+--- linux-2.6.22-570/drivers/net/pppoe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/pppoe.c 2007-12-21 15:36:14.000000000 -0500
+@@ -78,6 +78,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+
+ #include <asm/uaccess.h>
+@@ -210,7 +211,7 @@
+ struct net_device *dev;
+ int ifindex;
+
+- dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
++ dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev);
+ if(!dev)
+ return NULL;
+ ifindex = dev->ifindex;
+@@ -295,6 +296,9 @@
+ {
+ struct net_device *dev = (struct net_device *) ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* Only look at sockets that are using this specific device. */
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+@@ -380,6 +384,9 @@
+ struct pppoe_hdr *ph;
+ struct pppox_sock *po;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ goto drop;
+
+@@ -412,6 +419,9 @@
+ struct pppoe_hdr *ph;
+ struct pppox_sock *po;
+
++ if (dev->nd_net != &init_net)
++ goto abort;
++
+ if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ goto abort;
+
+@@ -471,12 +481,12 @@
+ * Initialize a new struct sock.
+ *
+ **********************************************************************/
+-static int pppoe_create(struct socket *sock)
++static int pppoe_create(struct net *net, struct socket *sock)
+ {
+ int error = -ENOMEM;
+ struct sock *sk;
+
+- sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1);
++ sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1);
+ if (!sk)
+ goto out;
+
+@@ -588,7 +598,7 @@
+
+ /* Don't re-bind if sid==0 */
+ if (sp->sa_addr.pppoe.sid != 0) {
+- dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
++ dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev);
+
+ error = -ENODEV;
+ if (!dev)
+@@ -1064,7 +1074,7 @@
+ {
+ struct proc_dir_entry *p;
+
+- p = create_proc_entry("net/pppoe", S_IRUGO, NULL);
++ p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net);
+ if (!p)
+ return -ENOMEM;
+
+@@ -1135,7 +1145,7 @@
+ dev_remove_pack(&pppoes_ptype);
+ dev_remove_pack(&pppoed_ptype);
+ unregister_netdevice_notifier(&pppoe_notifier);
+- remove_proc_entry("net/pppoe", NULL);
++ remove_proc_entry("pppoe", init_net.proc_net);
+ proto_unregister(&pppoe_sk_proto);
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/net/pppox.c linux-2.6.22-591/drivers/net/pppox.c
+--- linux-2.6.22-570/drivers/net/pppox.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/pppox.c 2007-12-21 15:36:14.000000000 -0500
+@@ -107,10 +107,13 @@
+
+ EXPORT_SYMBOL(pppox_ioctl);
+
+-static int pppox_create(struct socket *sock, int protocol)
++static int pppox_create(struct net *net, struct socket *sock, int protocol)
+ {
+ int rc = -EPROTOTYPE;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (protocol < 0 || protocol > PX_MAX_PROTO)
+ goto out;
+
+@@ -126,7 +129,7 @@
+ !try_module_get(pppox_protos[protocol]->owner))
+ goto out;
+
+- rc = pppox_protos[protocol]->create(sock);
++ rc = pppox_protos[protocol]->create(net, sock);
+
+ module_put(pppox_protos[protocol]->owner);
+ out:
+diff -Nurb linux-2.6.22-570/drivers/net/r8169.c linux-2.6.22-591/drivers/net/r8169.c
+--- linux-2.6.22-570/drivers/net/r8169.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/net/r8169.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2492,7 +2492,7 @@
+ skb = dev_alloc_skb(pkt_size + align);
+ if (skb) {
+ skb_reserve(skb, (align - 1) & (unsigned long)skb->data);
+- eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0);
++ skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size);
+ *sk_buff = skb;
+ rtl8169_mark_to_asic(desc, rx_buf_sz);
+ ret = 0;
+diff -Nurb linux-2.6.22-570/drivers/net/saa9730.c linux-2.6.22-591/drivers/net/saa9730.c
+--- linux-2.6.22-570/drivers/net/saa9730.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/saa9730.c 2007-12-21 15:36:12.000000000 -0500
+@@ -690,9 +690,9 @@
+ lp->stats.rx_packets++;
+ skb_reserve(skb, 2); /* 16 byte align */
+ skb_put(skb, len); /* make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *) pData,
+- len, 0);
++ len);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/sgiseeq.c linux-2.6.22-591/drivers/net/sgiseeq.c
+--- linux-2.6.22-570/drivers/net/sgiseeq.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sgiseeq.c 2007-12-21 15:36:12.000000000 -0500
+@@ -320,7 +320,7 @@
+ skb_put(skb, len);
+
+ /* Copy out of kseg1 to avoid silly cache flush. */
+- eth_copy_and_sum(skb, pkt_pointer + 2, len, 0);
++ skb_copy_to_linear_data(skb, pkt_pointer + 2, len);
+ skb->protocol = eth_type_trans(skb, dev);
+
+ /* We don't want to receive our own packets */
+diff -Nurb linux-2.6.22-570/drivers/net/shaper.c linux-2.6.22-591/drivers/net/shaper.c
+--- linux-2.6.22-570/drivers/net/shaper.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/shaper.c 2007-12-21 15:36:14.000000000 -0500
+@@ -86,6 +86,7 @@
+
+ #include <net/dst.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+ struct shaper_cb {
+ unsigned long shapeclock; /* Time it should go out */
+@@ -488,7 +489,7 @@
+ {
+ case SHAPER_SET_DEV:
+ {
+- struct net_device *them=__dev_get_by_name(ss->ss_name);
++ struct net_device *them=__dev_get_by_name(&init_net, ss->ss_name);
+ if(them==NULL)
+ return -ENODEV;
+ if(sh->dev)
+diff -Nurb linux-2.6.22-570/drivers/net/sis190.c linux-2.6.22-591/drivers/net/sis190.c
+--- linux-2.6.22-570/drivers/net/sis190.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sis190.c 2007-12-21 15:36:12.000000000 -0500
+@@ -548,7 +548,7 @@
+ skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN);
+ if (skb) {
+ skb_reserve(skb, NET_IP_ALIGN);
+- eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0);
++ skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size);
+ *sk_buff = skb;
+ sis190_give_to_asic(desc, rx_buf_sz);
+ ret = 0;
+diff -Nurb linux-2.6.22-570/drivers/net/starfire.c linux-2.6.22-591/drivers/net/starfire.c
+--- linux-2.6.22-570/drivers/net/starfire.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/starfire.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1456,7 +1456,7 @@
+ pci_dma_sync_single_for_cpu(np->pci_dev,
+ np->rx_info[entry].mapping,
+ pkt_len, PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(skb, np->rx_info[entry].skb->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
+ pci_dma_sync_single_for_device(np->pci_dev,
+ np->rx_info[entry].mapping,
+ pkt_len, PCI_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/sun3_82586.c linux-2.6.22-591/drivers/net/sun3_82586.c
+--- linux-2.6.22-570/drivers/net/sun3_82586.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sun3_82586.c 2007-12-21 15:36:12.000000000 -0500
+@@ -777,7 +777,7 @@
+ {
+ skb_reserve(skb,2);
+ skb_put(skb,totlen);
+- eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
++ skb_copy_to_linear_data(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen);
+ skb->protocol=eth_type_trans(skb,dev);
+ netif_rx(skb);
+ p->stats.rx_packets++;
+diff -Nurb linux-2.6.22-570/drivers/net/sun3lance.c linux-2.6.22-591/drivers/net/sun3lance.c
+--- linux-2.6.22-570/drivers/net/sun3lance.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sun3lance.c 2007-12-21 15:36:12.000000000 -0500
+@@ -853,10 +853,9 @@
+
+ skb_reserve( skb, 2 ); /* 16 byte align */
+ skb_put( skb, pkt_len ); /* Make room */
+-// skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ PKTBUF_ADDR(head),
+- pkt_len, 0);
++ pkt_len);
+
+ skb->protocol = eth_type_trans( skb, dev );
+ netif_rx( skb );
+diff -Nurb linux-2.6.22-570/drivers/net/sunbmac.c linux-2.6.22-591/drivers/net/sunbmac.c
+--- linux-2.6.22-570/drivers/net/sunbmac.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sunbmac.c 2007-12-21 15:36:12.000000000 -0500
+@@ -860,7 +860,7 @@
+ sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
+ this->rx_addr, len,
+ SBUS_DMA_FROMDEVICE);
+- eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0);
++ skb_copy_to_linear_data(copy_skb, (unsigned char *)skb->data, len);
+ sbus_dma_sync_single_for_device(bp->bigmac_sdev,
+ this->rx_addr, len,
+ SBUS_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/sundance.c linux-2.6.22-591/drivers/net/sundance.c
+--- linux-2.6.22-570/drivers/net/sundance.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sundance.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1313,7 +1313,7 @@
+ np->rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+
+- eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
+ pci_dma_sync_single_for_device(np->pci_dev,
+ desc->frag[0].addr,
+ np->rx_buf_sz,
+diff -Nurb linux-2.6.22-570/drivers/net/sunlance.c linux-2.6.22-591/drivers/net/sunlance.c
+--- linux-2.6.22-570/drivers/net/sunlance.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sunlance.c 2007-12-21 15:36:12.000000000 -0500
+@@ -549,9 +549,9 @@
+
+ skb_reserve(skb, 2); /* 16 byte align */
+ skb_put(skb, len); /* make room */
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ (unsigned char *)&(ib->rx_buf [entry][0]),
+- len, 0);
++ len);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/sunqe.c linux-2.6.22-591/drivers/net/sunqe.c
+--- linux-2.6.22-570/drivers/net/sunqe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/sunqe.c 2007-12-21 15:36:12.000000000 -0500
+@@ -439,8 +439,8 @@
+ } else {
+ skb_reserve(skb, 2);
+ skb_put(skb, len);
+- eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
+- len, 0);
++ skb_copy_to_linear_data(skb, (unsigned char *) this_qbuf,
++ len);
+ skb->protocol = eth_type_trans(skb, qep->dev);
+ netif_rx(skb);
+ qep->dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/tg3.c linux-2.6.22-591/drivers/net/tg3.c
+--- linux-2.6.22-570/drivers/net/tg3.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tg3.c 2007-12-21 15:36:12.000000000 -0500
+@@ -11944,12 +11944,11 @@
+ * checksumming.
+ */
+ if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) {
++ dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
+ GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
+- dev->features |= NETIF_F_HW_CSUM;
+- else
+- dev->features |= NETIF_F_IP_CSUM;
+- dev->features |= NETIF_F_SG;
++ dev->features |= NETIF_F_IPV6_CSUM;
++
+ tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+ } else
+ tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+diff -Nurb linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c
+--- linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c 2007-12-21 15:36:14.000000000 -0500
+@@ -250,7 +250,7 @@
+ #if STREAMER_NETWORK_MONITOR
+ #ifdef CONFIG_PROC_FS
+ if (!dev_streamer)
+- create_proc_read_entry("net/streamer_tr", 0, 0,
++ create_proc_read_entry("streamer_tr", 0, init_net.proc_net,
+ streamer_proc_info, NULL);
+ streamer_priv->next = dev_streamer;
+ dev_streamer = streamer_priv;
+@@ -423,7 +423,7 @@
+ }
+ }
+ if (!dev_streamer)
+- remove_proc_entry("net/streamer_tr", NULL);
++ remove_proc_entry("streamer_tr", init_net.proc_net);
+ }
+ #endif
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/net/tokenring/olympic.c linux-2.6.22-591/drivers/net/tokenring/olympic.c
+--- linux-2.6.22-570/drivers/net/tokenring/olympic.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tokenring/olympic.c 2007-12-21 15:36:14.000000000 -0500
+@@ -101,6 +101,7 @@
+ #include <linux/bitops.h>
+ #include <linux/jiffies.h>
+
++#include <net/net_namespace.h>
+ #include <net/checksum.h>
+
+ #include <asm/io.h>
+@@ -268,9 +269,9 @@
+ printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name);
+ if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */
+ char proc_name[20] ;
+- strcpy(proc_name,"net/olympic_") ;
++ strcpy(proc_name,"olympic_") ;
+ strcat(proc_name,dev->name) ;
+- create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ;
++ create_proc_read_entry(proc_name,0,init_net.proc_net,olympic_proc_info,(void *)dev) ;
+ printk("Olympic: Network Monitor information: /proc/%s\n",proc_name);
+ }
+ return 0 ;
+@@ -1752,9 +1753,9 @@
+
+ if (olympic_priv->olympic_network_monitor) {
+ char proc_name[20] ;
+- strcpy(proc_name,"net/olympic_") ;
++ strcpy(proc_name,"olympic_") ;
+ strcat(proc_name,dev->name) ;
+- remove_proc_entry(proc_name,NULL);
++ remove_proc_entry(proc_name,init_net.proc_net);
+ }
+ unregister_netdev(dev) ;
+ iounmap(olympic_priv->olympic_mmio) ;
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/interrupt.c linux-2.6.22-591/drivers/net/tulip/interrupt.c
+--- linux-2.6.22-570/drivers/net/tulip/interrupt.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tulip/interrupt.c 2007-12-21 15:36:12.000000000 -0500
+@@ -197,8 +197,8 @@
+ tp->rx_buffers[entry].mapping,
+ pkt_len, PCI_DMA_FROMDEVICE);
+ #if ! defined(__alpha__)
+- eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
+- pkt_len, 0);
++ skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
++ pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ memcpy(skb_put(skb, pkt_len),
+@@ -420,8 +420,8 @@
+ tp->rx_buffers[entry].mapping,
+ pkt_len, PCI_DMA_FROMDEVICE);
+ #if ! defined(__alpha__)
+- eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
+- pkt_len, 0);
++ skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
++ pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/winbond-840.c linux-2.6.22-591/drivers/net/tulip/winbond-840.c
+--- linux-2.6.22-570/drivers/net/tulip/winbond-840.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tulip/winbond-840.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1232,7 +1232,7 @@
+ pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
+ np->rx_skbuff[entry]->len,
+ PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
+ skb_put(skb, pkt_len);
+ pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry],
+ np->rx_skbuff[entry]->len,
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_cb.c
+--- linux-2.6.22-570/drivers/net/tulip/xircom_cb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tulip/xircom_cb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1208,7 +1208,7 @@
+ goto out;
+ }
+ skb_reserve(skb, 2);
+- eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
++ skb_copy_to_linear_data(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len);
+ skb_put(skb, pkt_len);
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c
+--- linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1242,8 +1242,8 @@
+ && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
+ skb_reserve(skb, 2); /* 16 byte align the IP header */
+ #if ! defined(__alpha__)
+- eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
+- pkt_len, 0);
++ skb_copy_to_linear_data(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
++ pkt_len);
+ skb_put(skb, pkt_len);
+ #else
+ memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/tun.c linux-2.6.22-591/drivers/net/tun.c
+--- linux-2.6.22-570/drivers/net/tun.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/tun.c 2007-12-21 15:36:14.000000000 -0500
+@@ -62,6 +62,7 @@
+ #include <linux/if_ether.h>
+ #include <linux/if_tun.h>
+ #include <linux/crc32.h>
++#include <net/net_namespace.h>
+
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -432,6 +433,7 @@
+ init_waitqueue_head(&tun->read_wait);
+
+ tun->owner = -1;
++ tun->group = -1;
+
+ SET_MODULE_OWNER(dev);
+ dev->open = tun_net_open;
+@@ -467,11 +469,14 @@
+ return -EBUSY;
+
+ /* Check permissions */
+- if (tun->owner != -1 &&
+- current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++ if (((tun->owner != -1 &&
++ current->euid != tun->owner) ||
++ (tun->group != -1 &&
++ current->egid != tun->group)) &&
++ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+ }
+- else if (__dev_get_by_name(ifr->ifr_name))
++ else if (__dev_get_by_name(&init_net, ifr->ifr_name))
+ return -EINVAL;
+ else {
+ char *name;
+@@ -610,6 +615,13 @@
+ DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
+ break;
+
++ case TUNSETGROUP:
++ /* Set group of the device */
++ tun->group= (gid_t) arg;
++
++ DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
++ break;
++
+ case TUNSETLINK:
+ /* Only allow setting the type when the interface is down */
+ if (tun->dev->flags & IFF_UP) {
+diff -Nurb linux-2.6.22-570/drivers/net/typhoon.c linux-2.6.22-591/drivers/net/typhoon.c
+--- linux-2.6.22-570/drivers/net/typhoon.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/typhoon.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1703,7 +1703,7 @@
+ pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
+ PKT_BUF_SZ,
+ PCI_DMA_FROMDEVICE);
+- eth_copy_and_sum(new_skb, skb->data, pkt_len, 0);
++ skb_copy_to_linear_data(new_skb, skb->data, pkt_len);
+ pci_dma_sync_single_for_device(tp->pdev, dma_addr,
+ PKT_BUF_SZ,
+ PCI_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/usb/catc.c linux-2.6.22-591/drivers/net/usb/catc.c
+--- linux-2.6.22-570/drivers/net/usb/catc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/usb/catc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -255,7 +255,7 @@
+ if (!(skb = dev_alloc_skb(pkt_len)))
+ return;
+
+- eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
++ skb_copy_to_linear_data(skb, pkt_start + pkt_offset, pkt_len);
+ skb_put(skb, pkt_len);
+
+ skb->protocol = eth_type_trans(skb, catc->netdev);
+diff -Nurb linux-2.6.22-570/drivers/net/usb/kaweth.c linux-2.6.22-591/drivers/net/usb/kaweth.c
+--- linux-2.6.22-570/drivers/net/usb/kaweth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/usb/kaweth.c 2007-12-21 15:36:12.000000000 -0500
+@@ -635,7 +635,7 @@
+
+ skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
+
+- eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
++ skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len);
+
+ skb_put(skb, pkt_len);
+
+diff -Nurb linux-2.6.22-570/drivers/net/via-rhine.c linux-2.6.22-591/drivers/net/via-rhine.c
+--- linux-2.6.22-570/drivers/net/via-rhine.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/via-rhine.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1492,9 +1492,9 @@
+ rp->rx_buf_sz,
+ PCI_DMA_FROMDEVICE);
+
+- eth_copy_and_sum(skb,
++ skb_copy_to_linear_data(skb,
+ rp->rx_skbuff[entry]->data,
+- pkt_len, 0);
++ pkt_len);
+ skb_put(skb, pkt_len);
+ pci_dma_sync_single_for_device(rp->pdev,
+ rp->rx_skbuff_dma[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/wan/dlci.c linux-2.6.22-591/drivers/net/wan/dlci.c
+--- linux-2.6.22-570/drivers/net/wan/dlci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wan/dlci.c 2007-12-21 15:36:14.000000000 -0500
+@@ -361,7 +361,7 @@
+
+
+ /* validate slave device */
+- slave = dev_get_by_name(dlci->devname);
++ slave = dev_get_by_name(&init_net, dlci->devname);
+ if (!slave)
+ return -ENODEV;
+
+@@ -427,7 +427,7 @@
+ int err;
+
+ /* validate slave device */
+- master = __dev_get_by_name(dlci->devname);
++ master = __dev_get_by_name(&init_net, dlci->devname);
+ if (!master)
+ return(-ENODEV);
+
+@@ -513,6 +513,9 @@
+ {
+ struct net_device *dev = (struct net_device *) ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_UNREGISTER) {
+ struct dlci_local *dlp;
+
+diff -Nurb linux-2.6.22-570/drivers/net/wan/hdlc.c linux-2.6.22-591/drivers/net/wan/hdlc.c
+--- linux-2.6.22-570/drivers/net/wan/hdlc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wan/hdlc.c 2007-12-21 15:36:14.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/notifier.h>
+ #include <linux/hdlc.h>
++#include <net/net_namespace.h>
+
+
+ static const char* version = "HDLC support module revision 1.21";
+@@ -66,6 +67,12 @@
+ struct packet_type *p, struct net_device *orig_dev)
+ {
+ struct hdlc_device_desc *desc = dev_to_desc(dev);
++
++ if (dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ if (desc->netif_rx)
+ return desc->netif_rx(skb);
+
+@@ -102,6 +109,9 @@
+ unsigned long flags;
+ int on;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (dev->get_stats != hdlc_get_stats)
+ return NOTIFY_DONE; /* not an HDLC device */
+
+diff -Nurb linux-2.6.22-570/drivers/net/wan/lapbether.c linux-2.6.22-591/drivers/net/wan/lapbether.c
+--- linux-2.6.22-570/drivers/net/wan/lapbether.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wan/lapbether.c 2007-12-21 15:36:14.000000000 -0500
+@@ -91,6 +91,9 @@
+ int len, err;
+ struct lapbethdev *lapbeth;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ return NET_RX_DROP;
+
+@@ -391,6 +394,9 @@
+ struct lapbethdev *lapbeth;
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (!dev_is_ethdev(dev))
+ return NOTIFY_DONE;
+
+diff -Nurb linux-2.6.22-570/drivers/net/wan/sbni.c linux-2.6.22-591/drivers/net/wan/sbni.c
+--- linux-2.6.22-570/drivers/net/wan/sbni.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wan/sbni.c 2007-12-21 15:36:14.000000000 -0500
+@@ -54,6 +54,7 @@
+ #include <linux/init.h>
+ #include <linux/delay.h>
+
++#include <net/net_namespace.h>
+ #include <net/arp.h>
+
+ #include <asm/io.h>
+@@ -1362,7 +1363,7 @@
+
+ if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name ))
+ return -EFAULT;
+- slave_dev = dev_get_by_name( slave_name );
++ slave_dev = dev_get_by_name(&init_net, slave_name );
+ if( !slave_dev || !(slave_dev->flags & IFF_UP) ) {
+ printk( KERN_ERR "%s: trying to enslave non-active "
+ "device %s\n", dev->name, slave_name );
+diff -Nurb linux-2.6.22-570/drivers/net/wan/syncppp.c linux-2.6.22-591/drivers/net/wan/syncppp.c
+--- linux-2.6.22-570/drivers/net/wan/syncppp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wan/syncppp.c 2007-12-21 15:36:14.000000000 -0500
+@@ -51,6 +51,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+
++#include <net/net_namespace.h>
+ #include <net/syncppp.h>
+
+ #include <asm/byteorder.h>
+@@ -1445,6 +1446,11 @@
+
+ static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev)
+ {
++ if (dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ return NET_RX_DROP;
+ sppp_input(dev,skb);
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/airo.c linux-2.6.22-591/drivers/net/wireless/airo.c
+--- linux-2.6.22-570/drivers/net/wireless/airo.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wireless/airo.c 2007-12-21 15:36:12.000000000 -0500
+@@ -3079,6 +3079,7 @@
+ struct airo_info *ai = dev->priv;
+ int locked;
+
++ set_freezable();
+ while(1) {
+ /* make swsusp happy with our thread */
+ try_to_freeze();
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c
+--- linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c 2007-12-21 15:36:14.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/wireless.h>
+ #include <linux/etherdevice.h>
++#include <net/net_namespace.h>
+ #include <net/iw_handler.h>
+ #include <net/ieee80211.h>
+ #include <net/ieee80211_crypt.h>
+@@ -1094,8 +1095,8 @@
+
+ static int __init hostap_init(void)
+ {
+- if (proc_net != NULL) {
+- hostap_proc = proc_mkdir("hostap", proc_net);
++ if (init_net.proc_net != NULL) {
++ hostap_proc = proc_mkdir("hostap", init_net.proc_net);
+ if (!hostap_proc)
+ printk(KERN_WARNING "Failed to mkdir "
+ "/proc/net/hostap\n");
+@@ -1110,7 +1111,7 @@
+ {
+ if (hostap_proc != NULL) {
+ hostap_proc = NULL;
+- remove_proc_entry("hostap", proc_net);
++ remove_proc_entry("hostap", init_net.proc_net);
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/libertas/main.c linux-2.6.22-591/drivers/net/wireless/libertas/main.c
+--- linux-2.6.22-570/drivers/net/wireless/libertas/main.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wireless/libertas/main.c 2007-12-21 15:36:12.000000000 -0500
+@@ -613,6 +613,7 @@
+
+ init_waitqueue_entry(&wait, current);
+
++ set_freezable();
+ for (;;) {
+ lbs_deb_thread( "main-thread 111: intcounter=%d "
+ "currenttxskb=%p dnld_sent=%d\n",
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/strip.c linux-2.6.22-591/drivers/net/wireless/strip.c
+--- linux-2.6.22-570/drivers/net/wireless/strip.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wireless/strip.c 2007-12-21 15:36:14.000000000 -0500
+@@ -107,6 +107,7 @@
+ #include <linux/serialP.h>
+ #include <linux/rcupdate.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+@@ -1971,7 +1972,7 @@
+ sizeof(zero_address))) {
+ struct net_device *dev;
+ read_lock_bh(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (dev->type == strip_info->dev->type &&
+ !memcmp(dev->dev_addr,
+ &strip_info->true_dev_addr,
+@@ -2787,7 +2788,7 @@
+ /*
+ * Register the status file with /proc
+ */
+- proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops);
++ proc_net_fops_create(&init_net, "strip", S_IFREG | S_IRUGO, &strip_seq_fops);
+
+ return status;
+ }
+@@ -2809,7 +2810,7 @@
+ }
+
+ /* Unregister with the /proc/net file here. */
+- proc_net_remove("strip");
++ proc_net_remove(&init_net, "strip");
+
+ if ((i = tty_unregister_ldisc(N_STRIP)))
+ printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i);
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c
+--- linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1011,7 +1011,7 @@
+ } else {
+ skb->dev = dev;
+ skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */
+- eth_copy_and_sum(skb, (unsigned char *)&sig.daddr, 12, 0);
++ skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12);
+ wl3501_receive(this, skb->data, pkt_len);
+ skb_put(skb, pkt_len);
+ skb->protocol = eth_type_trans(skb, dev);
+diff -Nurb linux-2.6.22-570/drivers/net/xen-netfront.c linux-2.6.22-591/drivers/net/xen-netfront.c
+--- linux-2.6.22-570/drivers/net/xen-netfront.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/net/xen-netfront.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,1995 @@
++/*
++ * Virtual network driver for conversing with remote driver backends.
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ * Copyright (c) 2005, XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ethtool.h>
++#include <linux/if_ether.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <linux/moduleparam.h>
++#include <linux/mm.h>
++#include <net/ip.h>
++
++#include <xen/xenbus.h>
++#include <xen/events.h>
++#include <xen/page.h>
++#include <xen/grant_table.h>
++
++#include <xen/interface/io/netif.h>
++#include <xen/interface/memory.h>
++#include <xen/interface/grant_table.h>
++
++static struct ethtool_ops xennet_ethtool_ops;
++
++struct netfront_cb {
++ struct page *page;
++ unsigned offset;
++};
++
++#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
++
++/*
++ * Mutually-exclusive module options to select receive data path:
++ * copy : Packets are copied by network backend into local memory
++ * flip : Page containing packet data is transferred to our ownership
++ * For fully-virtualised guests there is no option - copying must be used.
++ * For paravirtualised guests, flipping is the default.
++ */
++typedef enum rx_mode {
++ RX_COPY = 0,
++ RX_FLIP = 1,
++} rx_mode_t;
++
++static enum rx_mode rx_mode = RX_FLIP;
++
++#define param_check_rx_mode_t(name, p) __param_check(name, p, rx_mode_t)
++
++static int param_set_rx_mode_t(const char *val, struct kernel_param *kp)
++{
++ enum rx_mode *rxmp = kp->arg;
++ int ret = 0;
++
++ if (strcmp(val, "copy") == 0)
++ *rxmp = RX_COPY;
++ else if (strcmp(val, "flip") == 0)
++ *rxmp = RX_FLIP;
++ else
++ ret = -EINVAL;
++
++ return ret;
++}
++
++static int param_get_rx_mode_t(char *buffer, struct kernel_param *kp)
++{
++ enum rx_mode *rxmp = kp->arg;
++
++ return sprintf(buffer, "%s", *rxmp == RX_COPY ? "copy" : "flip");
++}
++
++MODULE_PARM_DESC(rx_mode, "How to get packets from card: \"copy\" or \"flip\"");
++module_param(rx_mode, rx_mode_t, 0400);
++
++#define RX_COPY_THRESHOLD 256
++
++#define GRANT_INVALID_REF 0
++
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++
++struct netfront_info {
++ struct list_head list;
++ struct net_device *netdev;
++
++ struct net_device_stats stats;
++
++ struct xen_netif_tx_front_ring tx;
++ struct xen_netif_rx_front_ring rx;
++
++ spinlock_t tx_lock;
++ spinlock_t rx_lock;
++
++ unsigned int evtchn;
++ unsigned int copying_receiver;
++
++ /* Receive-ring batched refills. */
++#define RX_MIN_TARGET 8
++#define RX_DFL_MIN_TARGET 64
++#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++ unsigned rx_min_target, rx_max_target, rx_target;
++ struct sk_buff_head rx_batch;
++
++ struct timer_list rx_refill_timer;
++
++ /*
++ * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
++ * are linked from tx_skb_freelist through skb_entry.link.
++ *
++ * NB. Freelist index entries are always going to be less than
++ * PAGE_OFFSET, whereas pointers to skbs will always be equal or
++ * greater than PAGE_OFFSET: we use this property to distinguish
++ * them.
++ */
++ union skb_entry {
++ struct sk_buff *skb;
++ unsigned link;
++ } tx_skbs[NET_TX_RING_SIZE];;
++ grant_ref_t gref_tx_head;
++ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
++ unsigned tx_skb_freelist;
++
++ struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
++ grant_ref_t gref_rx_head;
++ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
++
++ struct xenbus_device *xbdev;
++ int tx_ring_ref;
++ int rx_ring_ref;
++
++ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
++ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
++ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++};
++
++struct netfront_rx_info {
++ struct xen_netif_rx_response rx;
++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++};
++
++/*
++ * Access macros for acquiring freeing slots in tx_skbs[].
++ */
++
++static void add_id_to_freelist(unsigned *head, union skb_entry *list, unsigned short id)
++{
++ list[id].link = *head;
++ *head = id;
++}
++
++static unsigned short get_id_from_freelist(unsigned *head, union skb_entry *list)
++{
++ unsigned int id = *head;
++ *head = list[id].link;
++ return id;
++}
++
++static int xennet_rxidx(RING_IDX idx)
++{
++ return idx & (NET_RX_RING_SIZE - 1);
++}
++
++static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
++ RING_IDX ri)
++{
++ int i = xennet_rxidx(ri);
++ struct sk_buff *skb = np->rx_skbs[i];
++ np->rx_skbs[i] = NULL;
++ return skb;
++}
++
++static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
++ RING_IDX ri)
++{
++ int i = xennet_rxidx(ri);
++ grant_ref_t ref = np->grant_rx_ref[i];
++ np->grant_rx_ref[i] = GRANT_INVALID_REF;
++ return ref;
++}
++
++#ifdef CONFIG_SYSFS
++static int xennet_sysfs_addif(struct net_device *netdev);
++static void xennet_sysfs_delif(struct net_device *netdev);
++#else /* !CONFIG_SYSFS */
++#define xennet_sysfs_addif(dev) (0)
++#define xennet_sysfs_delif(dev) do { } while(0)
++#endif
++
++static int xennet_can_sg(struct net_device *dev)
++{
++ return dev->features & NETIF_F_SG;
++}
++
++
++static void rx_refill_timeout(unsigned long data)
++{
++ struct net_device *dev = (struct net_device *)data;
++ netif_rx_schedule(dev);
++}
++
++static int netfront_tx_slot_available(struct netfront_info *np)
++{
++ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
++ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
++}
++
++static void xennet_maybe_wake_tx(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++
++ if (unlikely(netif_queue_stopped(dev)) &&
++ netfront_tx_slot_available(np) &&
++ likely(netif_running(dev)))
++ netif_wake_queue(dev);
++}
++
++static void xennet_alloc_rx_buffers(struct net_device *dev)
++{
++ unsigned short id;
++ struct netfront_info *np = netdev_priv(dev);
++ struct sk_buff *skb;
++ struct page *page;
++ int i, batch_target, notify;
++ RING_IDX req_prod = np->rx.req_prod_pvt;
++ struct xen_memory_reservation reservation;
++ grant_ref_t ref;
++ unsigned long pfn;
++ void *vaddr;
++ int nr_flips;
++ struct xen_netif_rx_request *req;
++
++ if (unlikely(!netif_carrier_ok(dev)))
++ return;
++
++ /*
++ * Allocate skbuffs greedily, even though we batch updates to the
++ * receive ring. This creates a less bursty demand on the memory
++ * allocator, so should reduce the chance of failed allocation requests
++ * both for ourself and for other kernel subsystems.
++ */
++ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
++ for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
++ skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
++ GFP_ATOMIC | __GFP_NOWARN);
++ if (unlikely(!skb))
++ goto no_skb;
++
++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
++ if (!page) {
++ kfree_skb(skb);
++no_skb:
++ /* Any skbuffs queued for refill? Force them out. */
++ if (i != 0)
++ goto refill;
++ /* Could not allocate any skbuffs. Try again later. */
++ mod_timer(&np->rx_refill_timer,
++ jiffies + (HZ/10));
++ break;
++ }
++
++ skb_shinfo(skb)->frags[0].page = page;
++ skb_shinfo(skb)->nr_frags = 1;
++ __skb_queue_tail(&np->rx_batch, skb);
++ }
++
++ /* Is the batch large enough to be worthwhile? */
++ if (i < (np->rx_target/2)) {
++ if (req_prod > np->rx.sring->req_prod)
++ goto push;
++ return;
++ }
++
++ /* Adjust our fill target if we risked running out of buffers. */
++ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
++ ((np->rx_target *= 2) > np->rx_max_target))
++ np->rx_target = np->rx_max_target;
++
++ refill:
++ for (nr_flips = i = 0; ; i++) {
++ if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
++ break;
++
++ skb->dev = dev;
++
++ id = xennet_rxidx(req_prod + i);
++
++ BUG_ON(np->rx_skbs[id]);
++ np->rx_skbs[id] = skb;
++
++ ref = gnttab_claim_grant_reference(&np->gref_rx_head);
++ BUG_ON((signed short)ref < 0);
++ np->grant_rx_ref[id] = ref;
++
++ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
++ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
++
++ req = RING_GET_REQUEST(&np->rx, req_prod + i);
++ if (!np->copying_receiver) {
++ gnttab_grant_foreign_transfer_ref(ref,
++ np->xbdev->otherend_id,
++ pfn);
++ np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ /* Remove this page before passing
++ * back to Xen. */
++ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++ MULTI_update_va_mapping(np->rx_mcl+i,
++ (unsigned long)vaddr,
++ __pte(0), 0);
++ }
++ nr_flips++;
++ } else {
++ gnttab_grant_foreign_access_ref(ref,
++ np->xbdev->otherend_id,
++ pfn_to_mfn(pfn),
++ 0);
++ }
++
++ req->id = id;
++ req->gref = ref;
++ }
++
++ if (nr_flips != 0) {
++ reservation.extent_start = np->rx_pfn_array;
++ reservation.nr_extents = nr_flips;
++ reservation.extent_order = 0;
++ reservation.address_bits = 0;
++ reservation.domid = DOMID_SELF;
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ /* After all PTEs have been zapped, flush the TLB. */
++ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
++ UVMF_TLB_FLUSH|UVMF_ALL;
++
++ /* Give away a batch of pages. */
++ np->rx_mcl[i].op = __HYPERVISOR_memory_op;
++ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
++ np->rx_mcl[i].args[1] = (unsigned long)&reservation;
++
++ /* Zap PTEs and give away pages in one big
++ * multicall. */
++ (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
++
++ /* Check return status of HYPERVISOR_memory_op(). */
++ if (unlikely(np->rx_mcl[i].result != i))
++ panic("Unable to reduce memory reservation\n");
++ } else {
++ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
++ &reservation) != i)
++ panic("Unable to reduce memory reservation\n");
++ }
++ } else {
++ wmb();
++ }
++
++ /* Above is a suitable barrier to ensure backend will see requests. */
++ np->rx.req_prod_pvt = req_prod + i;
++ push:
++ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
++ if (notify)
++ notify_remote_via_irq(np->netdev->irq);
++}
++
++static int xennet_open(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++
++ memset(&np->stats, 0, sizeof(np->stats));
++
++ spin_lock_bh(&np->rx_lock);
++ if (netif_carrier_ok(dev)) {
++ xennet_alloc_rx_buffers(dev);
++ np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
++ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++ netif_rx_schedule(dev);
++ }
++ spin_unlock_bh(&np->rx_lock);
++
++ xennet_maybe_wake_tx(dev);
++
++ return 0;
++}
++
++static void xennet_tx_buf_gc(struct net_device *dev)
++{
++ RING_IDX cons, prod;
++ unsigned short id;
++ struct netfront_info *np = netdev_priv(dev);
++ struct sk_buff *skb;
++
++ BUG_ON(!netif_carrier_ok(dev));
++
++ do {
++ prod = np->tx.sring->rsp_prod;
++ rmb(); /* Ensure we see responses up to 'rp'. */
++
++ for (cons = np->tx.rsp_cons; cons != prod; cons++) {
++ struct xen_netif_tx_response *txrsp;
++
++ txrsp = RING_GET_RESPONSE(&np->tx, cons);
++ if (txrsp->status == NETIF_RSP_NULL)
++ continue;
++
++ id = txrsp->id;
++ skb = np->tx_skbs[id].skb;
++ if (unlikely(gnttab_query_foreign_access(
++ np->grant_tx_ref[id]) != 0)) {
++ printk(KERN_ALERT "xennet_tx_buf_gc: warning "
++ "-- grant still in use by backend "
++ "domain.\n");
++ BUG();
++ }
++ gnttab_end_foreign_access_ref(
++ np->grant_tx_ref[id], GNTMAP_readonly);
++ gnttab_release_grant_reference(
++ &np->gref_tx_head, np->grant_tx_ref[id]);
++ np->grant_tx_ref[id] = GRANT_INVALID_REF;
++ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
++ dev_kfree_skb_irq(skb);
++ }
++
++ np->tx.rsp_cons = prod;
++
++ /*
++ * Set a new event, then check for race with update of tx_cons.
++ * Note that it is essential to schedule a callback, no matter
++ * how few buffers are pending. Even if there is space in the
++ * transmit ring, higher layers may be blocked because too much
++ * data is outstanding: in such cases notification from Xen is
++ * likely to be the only kick that we'll get.
++ */
++ np->tx.sring->rsp_event =
++ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
++ mb();
++ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
++
++ xennet_maybe_wake_tx(dev);
++}
++
++static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
++ struct xen_netif_tx_request *tx)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ char *data = skb->data;
++ unsigned long mfn;
++ RING_IDX prod = np->tx.req_prod_pvt;
++ int frags = skb_shinfo(skb)->nr_frags;
++ unsigned int offset = offset_in_page(data);
++ unsigned int len = skb_headlen(skb);
++ unsigned int id;
++ grant_ref_t ref;
++ int i;
++
++ /* While the header overlaps a page boundary (including being
++ larger than a page), split it it into page-sized chunks. */
++ while (len > PAGE_SIZE - offset) {
++ tx->size = PAGE_SIZE - offset;
++ tx->flags |= NETTXF_more_data;
++ len -= tx->size;
++ data += tx->size;
++ offset = 0;
++
++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++ np->tx_skbs[id].skb = skb_get(skb);
++ tx = RING_GET_REQUEST(&np->tx, prod++);
++ tx->id = id;
++ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++ BUG_ON((signed short)ref < 0);
++
++ mfn = virt_to_mfn(data);
++ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++ mfn, GNTMAP_readonly);
++
++ tx->gref = np->grant_tx_ref[id] = ref;
++ tx->offset = offset;
++ tx->size = len;
++ tx->flags = 0;
++ }
++
++ /* Grant backend access to each skb fragment page. */
++ for (i = 0; i < frags; i++) {
++ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
++
++ tx->flags |= NETTXF_more_data;
++
++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++ np->tx_skbs[id].skb = skb_get(skb);
++ tx = RING_GET_REQUEST(&np->tx, prod++);
++ tx->id = id;
++ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++ BUG_ON((signed short)ref < 0);
++
++ mfn = pfn_to_mfn(page_to_pfn(frag->page));
++ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++ mfn, GNTMAP_readonly);
++
++ tx->gref = np->grant_tx_ref[id] = ref;
++ tx->offset = frag->page_offset;
++ tx->size = frag->size;
++ tx->flags = 0;
++ }
++
++ np->tx.req_prod_pvt = prod;
++}
++
++static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++ unsigned short id;
++ struct netfront_info *np = netdev_priv(dev);
++ struct xen_netif_tx_request *tx;
++ struct xen_netif_extra_info *extra;
++ char *data = skb->data;
++ RING_IDX i;
++ grant_ref_t ref;
++ unsigned long mfn;
++ int notify;
++ int frags = skb_shinfo(skb)->nr_frags;
++ unsigned int offset = offset_in_page(data);
++ unsigned int len = skb_headlen(skb);
++
++ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
++ if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
++ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
++ frags);
++ dump_stack();
++ goto drop;
++ }
++
++ spin_lock_irq(&np->tx_lock);
++
++ if (unlikely(!netif_carrier_ok(dev) ||
++ (frags > 1 && !xennet_can_sg(dev)) ||
++ netif_needs_gso(dev, skb))) {
++ spin_unlock_irq(&np->tx_lock);
++ goto drop;
++ }
++
++ i = np->tx.req_prod_pvt;
++
++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++ np->tx_skbs[id].skb = skb;
++
++ tx = RING_GET_REQUEST(&np->tx, i);
++
++ tx->id = id;
++ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++ BUG_ON((signed short)ref < 0);
++ mfn = virt_to_mfn(data);
++ gnttab_grant_foreign_access_ref(
++ ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
++ tx->gref = np->grant_tx_ref[id] = ref;
++ tx->offset = offset;
++ tx->size = len;
++ extra = NULL;
++
++ tx->flags = 0;
++ if (skb->ip_summed == CHECKSUM_PARTIAL)
++ /* local packet? */
++ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++ /* remote but checksummed. */
++ tx->flags |= NETTXF_data_validated;
++
++ if (skb_shinfo(skb)->gso_size) {
++ struct xen_netif_extra_info *gso;
++
++ gso = (struct xen_netif_extra_info *)
++ RING_GET_REQUEST(&np->tx, ++i);
++
++ if (extra)
++ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
++ else
++ tx->flags |= NETTXF_extra_info;
++
++ gso->u.gso.size = skb_shinfo(skb)->gso_size;
++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++ gso->u.gso.pad = 0;
++ gso->u.gso.features = 0;
++
++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++ gso->flags = 0;
++ extra = gso;
++ }
++
++ np->tx.req_prod_pvt = i + 1;
++
++ xennet_make_frags(skb, dev, tx);
++ tx->size = skb->len;
++
++ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
++ if (notify)
++ notify_remote_via_irq(np->netdev->irq);
++
++ xennet_tx_buf_gc(dev);
++
++ if (!netfront_tx_slot_available(np))
++ netif_stop_queue(dev);
++
++ spin_unlock_irq(&np->tx_lock);
++
++ np->stats.tx_bytes += skb->len;
++ np->stats.tx_packets++;
++
++ return 0;
++
++ drop:
++ np->stats.tx_dropped++;
++ dev_kfree_skb(skb);
++ return 0;
++}
++
++static int xennet_close(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ netif_stop_queue(np->netdev);
++ return 0;
++}
++
++static struct net_device_stats *xennet_get_stats(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ return &np->stats;
++}
++
++static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
++ grant_ref_t ref)
++{
++ int new = xennet_rxidx(np->rx.req_prod_pvt);
++
++ BUG_ON(np->rx_skbs[new]);
++ np->rx_skbs[new] = skb;
++ np->grant_rx_ref[new] = ref;
++ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
++ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
++ np->rx.req_prod_pvt++;
++}
++
++static int xennet_get_extras(struct netfront_info *np,
++ struct xen_netif_extra_info *extras,
++ RING_IDX rp)
++
++{
++ struct xen_netif_extra_info *extra;
++ struct device *dev = &np->netdev->dev;
++ RING_IDX cons = np->rx.rsp_cons;
++ int err = 0;
++
++ do {
++ struct sk_buff *skb;
++ grant_ref_t ref;
++
++ if (unlikely(cons + 1 == rp)) {
++ if (net_ratelimit())
++ dev_warn(dev, "Missing extra info\n");
++ err = -EBADR;
++ break;
++ }
++
++ extra = (struct xen_netif_extra_info *)
++ RING_GET_RESPONSE(&np->rx, ++cons);
++
++ if (unlikely(!extra->type ||
++ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++ if (net_ratelimit())
++ dev_warn(dev, "Invalid extra type: %d\n",
++ extra->type);
++ err = -EINVAL;
++ } else {
++ memcpy(&extras[extra->type - 1], extra,
++ sizeof(*extra));
++ }
++
++ skb = xennet_get_rx_skb(np, cons);
++ ref = xennet_get_rx_ref(np, cons);
++ xennet_move_rx_slot(np, skb, ref);
++ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++ np->rx.rsp_cons = cons;
++ return err;
++}
++
++static int xennet_get_responses(struct netfront_info *np,
++ struct netfront_rx_info *rinfo, RING_IDX rp,
++ struct sk_buff_head *list,
++ int *pages_flipped_p)
++{
++ int pages_flipped = *pages_flipped_p;
++ struct mmu_update *mmu;
++ struct multicall_entry *mcl;
++ struct xen_netif_rx_response *rx = &rinfo->rx;
++ struct xen_netif_extra_info *extras = rinfo->extras;
++ struct device *dev = &np->netdev->dev;
++ RING_IDX cons = np->rx.rsp_cons;
++ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
++ grant_ref_t ref = xennet_get_rx_ref(np, cons);
++ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
++ int frags = 1;
++ int err = 0;
++ unsigned long ret;
++
++ if (rx->flags & NETRXF_extra_info) {
++ err = xennet_get_extras(np, extras, rp);
++ cons = np->rx.rsp_cons;
++ }
++
++ for (;;) {
++ unsigned long mfn;
++
++ if (unlikely(rx->status < 0 ||
++ rx->offset + rx->status > PAGE_SIZE)) {
++ if (net_ratelimit())
++ dev_warn(dev, "rx->offset: %x, size: %u\n",
++ rx->offset, rx->status);
++ xennet_move_rx_slot(np, skb, ref);
++ err = -EINVAL;
++ goto next;
++ }
++
++ /*
++ * This definitely indicates a bug, either in this driver or in
++ * the backend driver. In future this should flag the bad
++ * situation to the system controller to reboot the backed.
++ */
++ if (ref == GRANT_INVALID_REF) {
++ if (net_ratelimit())
++ dev_warn(dev, "Bad rx response id %d.\n",
++ rx->id);
++ err = -EINVAL;
++ goto next;
++ }
++
++ if (!np->copying_receiver) {
++ /* Memory pressure, insufficient buffer
++ * headroom, ... */
++ mfn = gnttab_end_foreign_transfer_ref(ref);
++ if (!mfn) {
++ if (net_ratelimit())
++ dev_warn(dev, "Unfulfilled rx req "
++ "(id=%d, st=%d).\n",
++ rx->id, rx->status);
++ xennet_move_rx_slot(np, skb, ref);
++ err = -ENOMEM;
++ goto next;
++ }
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ /* Remap the page. */
++ struct page *page =
++ skb_shinfo(skb)->frags[0].page;
++ unsigned long pfn = page_to_pfn(page);
++ void *vaddr = page_address(page);
++
++ mcl = np->rx_mcl + pages_flipped;
++ mmu = np->rx_mmu + pages_flipped;
++
++ MULTI_update_va_mapping(mcl,
++ (unsigned long)vaddr,
++ mfn_pte(mfn, PAGE_KERNEL),
++ 0);
++ mmu->ptr = ((u64)mfn << PAGE_SHIFT)
++ | MMU_MACHPHYS_UPDATE;
++ mmu->val = pfn;
++
++ set_phys_to_machine(pfn, mfn);
++ }
++ pages_flipped++;
++ } else {
++ ret = gnttab_end_foreign_access_ref(ref, 0);
++ BUG_ON(!ret);
++ }
++
++ gnttab_release_grant_reference(&np->gref_rx_head, ref);
++
++ __skb_queue_tail(list, skb);
++
++next:
++ if (!(rx->flags & NETRXF_more_data))
++ break;
++
++ if (cons + frags == rp) {
++ if (net_ratelimit())
++ dev_warn(dev, "Need more frags\n");
++ err = -ENOENT;
++ break;
++ }
++
++ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
++ skb = xennet_get_rx_skb(np, cons + frags);
++ ref = xennet_get_rx_ref(np, cons + frags);
++ frags++;
++ }
++
++ if (unlikely(frags > max)) {
++ if (net_ratelimit())
++ dev_warn(dev, "Too many frags\n");
++ err = -E2BIG;
++ }
++
++ if (unlikely(err))
++ np->rx.rsp_cons = cons + frags;
++
++ *pages_flipped_p = pages_flipped;
++
++ return err;
++}
++
++static int xennet_set_skb_gso(struct sk_buff *skb,
++ struct xen_netif_extra_info *gso)
++{
++ if (!gso->u.gso.size) {
++ if (net_ratelimit())
++ printk(KERN_WARNING "GSO size must not be zero.\n");
++ return -EINVAL;
++ }
++
++ /* Currently only TCPv4 S.O. is supported. */
++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++ if (net_ratelimit())
++ printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
++ return -EINVAL;
++ }
++
++ skb_shinfo(skb)->gso_size = gso->u.gso.size;
++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++ /* Header must be checked, and gso_segs computed. */
++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++ skb_shinfo(skb)->gso_segs = 0;
++
++ return 0;
++}
++
++static RING_IDX xennet_fill_frags(struct netfront_info *np,
++ struct sk_buff *skb,
++ struct sk_buff_head *list)
++{
++ struct skb_shared_info *shinfo = skb_shinfo(skb);
++ int nr_frags = shinfo->nr_frags;
++ RING_IDX cons = np->rx.rsp_cons;
++ skb_frag_t *frag = shinfo->frags + nr_frags;
++ struct sk_buff *nskb;
++
++ while ((nskb = __skb_dequeue(list))) {
++ struct xen_netif_rx_response *rx =
++ RING_GET_RESPONSE(&np->rx, ++cons);
++
++ frag->page = skb_shinfo(nskb)->frags[0].page;
++ frag->page_offset = rx->offset;
++ frag->size = rx->status;
++
++ skb->data_len += rx->status;
++
++ skb_shinfo(nskb)->nr_frags = 0;
++ kfree_skb(nskb);
++
++ frag++;
++ nr_frags++;
++ }
++
++ shinfo->nr_frags = nr_frags;
++ return cons;
++}
++
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++ struct iphdr *iph;
++ unsigned char *th;
++ int err = -EPROTO;
++
++ if (skb->protocol != htons(ETH_P_IP))
++ goto out;
++
++ iph = (void *)skb->data;
++ th = skb->data + 4 * iph->ihl;
++ if (th >= skb_tail_pointer(skb))
++ goto out;
++
++ skb->csum_start = th - skb->head;
++ switch (iph->protocol) {
++ case IPPROTO_TCP:
++ skb->csum_offset = offsetof(struct tcphdr, check);
++ break;
++ case IPPROTO_UDP:
++ skb->csum_offset = offsetof(struct udphdr, check);
++ break;
++ default:
++ if (net_ratelimit())
++ printk(KERN_ERR "Attempting to checksum a non-"
++ "TCP/UDP packet, dropping a protocol"
++ " %d packet", iph->protocol);
++ goto out;
++ }
++
++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++ goto out;
++
++ err = 0;
++
++out:
++ return err;
++}
++
++static int handle_incoming_queue(struct net_device *dev,
++ struct sk_buff_head *rxq)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ int packets_dropped = 0;
++ struct sk_buff *skb;
++
++ while ((skb = __skb_dequeue(rxq)) != NULL) {
++ struct page *page = NETFRONT_SKB_CB(skb)->page;
++ void *vaddr = page_address(page);
++ unsigned offset = NETFRONT_SKB_CB(skb)->offset;
++
++ memcpy(skb->data, vaddr + offset,
++ skb_headlen(skb));
++
++ if (page != skb_shinfo(skb)->frags[0].page)
++ __free_page(page);
++
++ /* Ethernet work: Delayed to here as it peeks the header. */
++ skb->protocol = eth_type_trans(skb, dev);
++
++ if (skb->ip_summed == CHECKSUM_PARTIAL) {
++ if (skb_checksum_setup(skb)) {
++ kfree_skb(skb);
++ packets_dropped++;
++ np->stats.rx_errors++;
++ continue;
++ }
++ }
++
++ np->stats.rx_packets++;
++ np->stats.rx_bytes += skb->len;
++
++ /* Pass it up. */
++ netif_receive_skb(skb);
++ dev->last_rx = jiffies;
++ }
++
++ return packets_dropped;
++}
++
++static int xennet_poll(struct net_device *dev, int *pbudget)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ struct sk_buff *skb;
++ struct netfront_rx_info rinfo;
++ struct xen_netif_rx_response *rx = &rinfo.rx;
++ struct xen_netif_extra_info *extras = rinfo.extras;
++ RING_IDX i, rp;
++ struct multicall_entry *mcl;
++ int work_done, budget, more_to_do = 1;
++ struct sk_buff_head rxq;
++ struct sk_buff_head errq;
++ struct sk_buff_head tmpq;
++ unsigned long flags;
++ unsigned int len;
++ int pages_flipped = 0;
++ int err;
++
++ spin_lock(&np->rx_lock);
++
++ if (unlikely(!netif_carrier_ok(dev))) {
++ spin_unlock(&np->rx_lock);
++ return 0;
++ }
++
++ skb_queue_head_init(&rxq);
++ skb_queue_head_init(&errq);
++ skb_queue_head_init(&tmpq);
++
++ if ((budget = *pbudget) > dev->quota)
++ budget = dev->quota;
++ rp = np->rx.sring->rsp_prod;
++ rmb(); /* Ensure we see queued responses up to 'rp'. */
++
++ i = np->rx.rsp_cons;
++ work_done = 0;
++ while ((i != rp) && (work_done < budget)) {
++ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
++ memset(extras, 0, sizeof(rinfo.extras));
++
++ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
++ &pages_flipped);
++
++ if (unlikely(err)) {
++err:
++ while ((skb = __skb_dequeue(&tmpq)))
++ __skb_queue_tail(&errq, skb);
++ np->stats.rx_errors++;
++ i = np->rx.rsp_cons;
++ continue;
++ }
++
++ skb = __skb_dequeue(&tmpq);
++
++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++ struct xen_netif_extra_info *gso;
++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++ if (unlikely(xennet_set_skb_gso(skb, gso))) {
++ __skb_queue_head(&tmpq, skb);
++ np->rx.rsp_cons += skb_queue_len(&tmpq);
++ goto err;
++ }
++ }
++
++ NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
++ NETFRONT_SKB_CB(skb)->offset = rx->offset;
++
++ len = rx->status;
++ if (len > RX_COPY_THRESHOLD)
++ len = RX_COPY_THRESHOLD;
++ skb_put(skb, len);
++
++ if (rx->status > len) {
++ skb_shinfo(skb)->frags[0].page_offset =
++ rx->offset + len;
++ skb_shinfo(skb)->frags[0].size = rx->status - len;
++ skb->data_len = rx->status - len;
++ } else {
++ skb_shinfo(skb)->frags[0].page = NULL;
++ skb_shinfo(skb)->nr_frags = 0;
++ }
++
++ i = xennet_fill_frags(np, skb, &tmpq);
++
++ /*
++ * Truesize approximates the size of true data plus
++ * any supervisor overheads. Adding hypervisor
++ * overheads has been shown to significantly reduce
++ * achievable bandwidth with the default receive
++ * buffer size. It is therefore not wise to account
++ * for it here.
++ *
++ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
++ * to RX_COPY_THRESHOLD + the supervisor
++ * overheads. Here, we add the size of the data pulled
++ * in xennet_fill_frags().
++ *
++ * We also adjust for any unused space in the main
++ * data area by subtracting (RX_COPY_THRESHOLD -
++ * len). This is especially important with drivers
++ * which split incoming packets into header and data,
++ * using only 66 bytes of the main data area (see the
++ * e1000 driver for example.) On such systems,
++ * without this last adjustement, our achievable
++ * receive throughout using the standard receive
++ * buffer size was cut by 25%(!!!).
++ */
++ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
++ skb->len += skb->data_len;
++
++ if (rx->flags & NETRXF_csum_blank)
++ skb->ip_summed = CHECKSUM_PARTIAL;
++ else if (rx->flags & NETRXF_data_validated)
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++ __skb_queue_tail(&rxq, skb);
++
++ np->rx.rsp_cons = ++i;
++ work_done++;
++ }
++
++ if (pages_flipped) {
++ /* Do all the remapping work, and M2P updates. */
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ mcl = np->rx_mcl + pages_flipped;
++ MULTI_mmu_update(mcl, np->rx_mmu,
++ pages_flipped, 0, DOMID_SELF);
++ (void)HYPERVISOR_multicall(np->rx_mcl,
++ pages_flipped + 1);
++ }
++ }
++
++ while ((skb = __skb_dequeue(&errq)))
++ kfree_skb(skb);
++
++ work_done -= handle_incoming_queue(dev, &rxq);
++
++ /* If we get a callback with very few responses, reduce fill target. */
++ /* NB. Note exponential increase, linear decrease. */
++ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
++ ((3*np->rx_target) / 4)) &&
++ (--np->rx_target < np->rx_min_target))
++ np->rx_target = np->rx_min_target;
++
++ xennet_alloc_rx_buffers(dev);
++
++ *pbudget -= work_done;
++ dev->quota -= work_done;
++
++ if (work_done < budget) {
++ local_irq_save(flags);
++
++ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
++ if (!more_to_do)
++ __netif_rx_complete(dev);
++
++ local_irq_restore(flags);
++ }
++
++ spin_unlock(&np->rx_lock);
++
++ return more_to_do;
++}
++
++static int xennet_change_mtu(struct net_device *dev, int mtu)
++{
++ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++ if (mtu > max)
++ return -EINVAL;
++ dev->mtu = mtu;
++ return 0;
++}
++
++static void xennet_release_tx_bufs(struct netfront_info *np)
++{
++ struct sk_buff *skb;
++ int i;
++
++ for (i = 0; i < NET_TX_RING_SIZE; i++) {
++ /* Skip over entries which are actually freelist references */
++ if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
++ continue;
++
++ skb = np->tx_skbs[i].skb;
++ gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
++ GNTMAP_readonly);
++ gnttab_release_grant_reference(&np->gref_tx_head,
++ np->grant_tx_ref[i]);
++ np->grant_tx_ref[i] = GRANT_INVALID_REF;
++ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
++ dev_kfree_skb_irq(skb);
++ }
++}
++
++static void xennet_release_rx_bufs(struct netfront_info *np)
++{
++ struct mmu_update *mmu = np->rx_mmu;
++ struct multicall_entry *mcl = np->rx_mcl;
++ struct sk_buff_head free_list;
++ struct sk_buff *skb;
++ unsigned long mfn;
++ int xfer = 0, noxfer = 0, unused = 0;
++ int id, ref;
++
++ if (np->copying_receiver) {
++ dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
++ __func__);
++ return;
++ }
++
++ skb_queue_head_init(&free_list);
++
++ spin_lock_bh(&np->rx_lock);
++
++ for (id = 0; id < NET_RX_RING_SIZE; id++) {
++ if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
++ unused++;
++ continue;
++ }
++
++ skb = np->rx_skbs[id];
++ mfn = gnttab_end_foreign_transfer_ref(ref);
++ gnttab_release_grant_reference(&np->gref_rx_head, ref);
++ np->grant_rx_ref[id] = GRANT_INVALID_REF;
++
++ if (0 == mfn) {
++ skb_shinfo(skb)->nr_frags = 0;
++ dev_kfree_skb(skb);
++ noxfer++;
++ continue;
++ }
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ /* Remap the page. */
++ struct page *page = skb_shinfo(skb)->frags[0].page;
++ unsigned long pfn = page_to_pfn(page);
++ void *vaddr = page_address(page);
++
++ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
++ mfn_pte(mfn, PAGE_KERNEL),
++ 0);
++ mcl++;
++ mmu->ptr = ((u64)mfn << PAGE_SHIFT)
++ | MMU_MACHPHYS_UPDATE;
++ mmu->val = pfn;
++ mmu++;
++
++ set_phys_to_machine(pfn, mfn);
++ }
++ __skb_queue_tail(&free_list, skb);
++ xfer++;
++ }
++
++ dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
++ __func__, xfer, noxfer, unused);
++
++ if (xfer) {
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ /* Do all the remapping work and M2P updates. */
++ MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
++ 0, DOMID_SELF);
++ mcl++;
++ HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
++ }
++ }
++
++ while ((skb = __skb_dequeue(&free_list)) != NULL)
++ dev_kfree_skb(skb);
++
++ spin_unlock_bh(&np->rx_lock);
++}
++
++static void xennet_uninit(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ xennet_release_tx_bufs(np);
++ xennet_release_rx_bufs(np);
++ gnttab_free_grant_references(np->gref_tx_head);
++ gnttab_free_grant_references(np->gref_rx_head);
++}
++
++static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
++{
++ int i, err;
++ struct net_device *netdev;
++ struct netfront_info *np;
++
++ netdev = alloc_etherdev(sizeof(struct netfront_info));
++ if (!netdev) {
++ printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
++ __func__);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ np = netdev_priv(netdev);
++ np->xbdev = dev;
++
++ spin_lock_init(&np->tx_lock);
++ spin_lock_init(&np->rx_lock);
++
++ skb_queue_head_init(&np->rx_batch);
++ np->rx_target = RX_DFL_MIN_TARGET;
++ np->rx_min_target = RX_DFL_MIN_TARGET;
++ np->rx_max_target = RX_MAX_TARGET;
++
++ init_timer(&np->rx_refill_timer);
++ np->rx_refill_timer.data = (unsigned long)netdev;
++ np->rx_refill_timer.function = rx_refill_timeout;
++
++ /* Initialise tx_skbs as a free chain containing every entry. */
++ np->tx_skb_freelist = 0;
++ for (i = 0; i < NET_TX_RING_SIZE; i++) {
++ np->tx_skbs[i].link = i+1;
++ np->grant_tx_ref[i] = GRANT_INVALID_REF;
++ }
++
++ /* Clear out rx_skbs */
++ for (i = 0; i < NET_RX_RING_SIZE; i++) {
++ np->rx_skbs[i] = NULL;
++ np->grant_rx_ref[i] = GRANT_INVALID_REF;
++ }
++
++ /* A grant for every tx ring slot */
++ if (gnttab_alloc_grant_references(TX_MAX_TARGET,
++ &np->gref_tx_head) < 0) {
++ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
++ err = -ENOMEM;
++ goto exit;
++ }
++ /* A grant for every rx ring slot */
++ if (gnttab_alloc_grant_references(RX_MAX_TARGET,
++ &np->gref_rx_head) < 0) {
++ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
++ err = -ENOMEM;
++ goto exit_free_tx;
++ }
++
++ netdev->open = xennet_open;
++ netdev->hard_start_xmit = xennet_start_xmit;
++ netdev->stop = xennet_close;
++ netdev->get_stats = xennet_get_stats;
++ netdev->poll = xennet_poll;
++ netdev->uninit = xennet_uninit;
++ netdev->change_mtu = xennet_change_mtu;
++ netdev->weight = 64;
++ netdev->features = NETIF_F_IP_CSUM;
++
++ SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
++ SET_MODULE_OWNER(netdev);
++ SET_NETDEV_DEV(netdev, &dev->dev);
++
++ np->netdev = netdev;
++
++ netif_carrier_off(netdev);
++
++ return netdev;
++
++ exit_free_tx:
++ gnttab_free_grant_references(np->gref_tx_head);
++ exit:
++ free_netdev(netdev);
++ return ERR_PTR(err);
++}
++
++/**
++ * Entry point to this code when a new device is created. Allocate the basic
++ * structures and the ring buffers for communication with the backend, and
++ * inform the backend of the appropriate details for those.
++ */
++static int __devinit netfront_probe(struct xenbus_device *dev,
++ const struct xenbus_device_id *id)
++{
++ int err;
++ struct net_device *netdev;
++ struct netfront_info *info;
++
++ netdev = xennet_create_dev(dev);
++ if (IS_ERR(netdev)) {
++ err = PTR_ERR(netdev);
++ xenbus_dev_fatal(dev, err, "creating netdev");
++ return err;
++ }
++
++ info = netdev_priv(netdev);
++ dev->dev.driver_data = info;
++
++ err = register_netdev(info->netdev);
++ if (err) {
++ printk(KERN_WARNING "%s: register_netdev err=%d\n",
++ __func__, err);
++ goto fail;
++ }
++
++ err = xennet_sysfs_addif(info->netdev);
++ if (err) {
++ unregister_netdev(info->netdev);
++ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
++ __func__, err);
++ goto fail;
++ }
++
++ return 0;
++
++ fail:
++ free_netdev(netdev);
++ dev->dev.driver_data = NULL;
++ return err;
++}
++
++static void xennet_end_access(int ref, void *page)
++{
++ /* This frees the page as a side-effect */
++ if (ref != GRANT_INVALID_REF)
++ gnttab_end_foreign_access(ref, 0, (unsigned long)page);
++}
++
++static void xennet_disconnect_backend(struct netfront_info *info)
++{
++ /* Stop old i/f to prevent errors whilst we rebuild the state. */
++ spin_lock_bh(&info->rx_lock);
++ spin_lock_irq(&info->tx_lock);
++ netif_carrier_off(info->netdev);
++ spin_unlock_irq(&info->tx_lock);
++ spin_unlock_bh(&info->rx_lock);
++
++ if (info->netdev->irq)
++ unbind_from_irqhandler(info->netdev->irq, info->netdev);
++ info->evtchn = info->netdev->irq = 0;
++
++ /* End access and free the pages */
++ xennet_end_access(info->tx_ring_ref, info->tx.sring);
++ xennet_end_access(info->rx_ring_ref, info->rx.sring);
++
++ info->tx_ring_ref = GRANT_INVALID_REF;
++ info->rx_ring_ref = GRANT_INVALID_REF;
++ info->tx.sring = NULL;
++ info->rx.sring = NULL;
++}
++
++/**
++ * We are reconnecting to the backend, due to a suspend/resume, or a backend
++ * driver restart. We tear down our netif structure and recreate it, but
++ * leave the device-layer structures intact so that this is transparent to the
++ * rest of the kernel.
++ */
++static int netfront_resume(struct xenbus_device *dev)
++{
++ struct netfront_info *info = dev->dev.driver_data;
++
++ dev_dbg(&dev->dev, "%s\n", dev->nodename);
++
++ xennet_disconnect_backend(info);
++ return 0;
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++ char *s, *e, *macstr;
++ int i;
++
++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++ if (IS_ERR(macstr))
++ return PTR_ERR(macstr);
++
++ for (i = 0; i < ETH_ALEN; i++) {
++ mac[i] = simple_strtoul(s, &e, 16);
++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++ kfree(macstr);
++ return -ENOENT;
++ }
++ s = e+1;
++ }
++
++ kfree(macstr);
++ return 0;
++}
++
++static irqreturn_t xennet_interrupt(int irq, void *dev_id)
++{
++ struct net_device *dev = dev_id;
++ struct netfront_info *np = netdev_priv(dev);
++ unsigned long flags;
++
++ spin_lock_irqsave(&np->tx_lock, flags);
++
++ if (likely(netif_carrier_ok(dev))) {
++ xennet_tx_buf_gc(dev);
++ /* Under tx_lock: protects access to rx shared-ring indexes. */
++ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++ netif_rx_schedule(dev);
++ }
++
++ spin_unlock_irqrestore(&np->tx_lock, flags);
++
++ return IRQ_HANDLED;
++}
++
++static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
++{
++ struct xen_netif_tx_sring *txs;
++ struct xen_netif_rx_sring *rxs;
++ int err;
++ struct net_device *netdev = info->netdev;
++
++ info->tx_ring_ref = GRANT_INVALID_REF;
++ info->rx_ring_ref = GRANT_INVALID_REF;
++ info->rx.sring = NULL;
++ info->tx.sring = NULL;
++ netdev->irq = 0;
++
++ err = xen_net_read_mac(dev, netdev->dev_addr);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++ goto fail;
++ }
++
++ txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
++ if (!txs) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(dev, err, "allocating tx ring page");
++ goto fail;
++ }
++ SHARED_RING_INIT(txs);
++ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
++
++ err = xenbus_grant_ring(dev, virt_to_mfn(txs));
++ if (err < 0) {
++ free_page((unsigned long)txs);
++ goto fail;
++ }
++
++ info->tx_ring_ref = err;
++ rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
++ if (!rxs) {
++ err = -ENOMEM;
++ xenbus_dev_fatal(dev, err, "allocating rx ring page");
++ goto fail;
++ }
++ SHARED_RING_INIT(rxs);
++ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
++
++ err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
++ if (err < 0) {
++ free_page((unsigned long)rxs);
++ goto fail;
++ }
++ info->rx_ring_ref = err;
++
++ err = xenbus_alloc_evtchn(dev, &info->evtchn);
++ if (err)
++ goto fail;
++
++ err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
++ IRQF_SAMPLE_RANDOM, netdev->name,
++ netdev);
++ if (err < 0)
++ goto fail;
++ netdev->irq = err;
++ return 0;
++
++ fail:
++ return err;
++}
++
++/* Common code used when first setting up, and when resuming. */
++static int talk_to_backend(struct xenbus_device *dev,
++ struct netfront_info *info)
++{
++ const char *message;
++ struct xenbus_transaction xbt;
++ int err;
++
++ /* Create shared ring, alloc event channel. */
++ err = setup_netfront(dev, info);
++ if (err)
++ goto out;
++
++again:
++ err = xenbus_transaction_start(&xbt);
++ if (err) {
++ xenbus_dev_fatal(dev, err, "starting transaction");
++ goto destroy_ring;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
++ info->tx_ring_ref);
++ if (err) {
++ message = "writing tx ring-ref";
++ goto abort_transaction;
++ }
++ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
++ info->rx_ring_ref);
++ if (err) {
++ message = "writing rx ring-ref";
++ goto abort_transaction;
++ }
++ err = xenbus_printf(xbt, dev->nodename,
++ "event-channel", "%u", info->evtchn);
++ if (err) {
++ message = "writing event-channel";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
++ info->copying_receiver);
++ if (err) {
++ message = "writing request-rx-copy";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
++ if (err) {
++ message = "writing feature-rx-notify";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
++ if (err) {
++ message = "writing feature-sg";
++ goto abort_transaction;
++ }
++
++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
++ if (err) {
++ message = "writing feature-gso-tcpv4";
++ goto abort_transaction;
++ }
++
++ err = xenbus_transaction_end(xbt, 0);
++ if (err) {
++ if (err == -EAGAIN)
++ goto again;
++ xenbus_dev_fatal(dev, err, "completing transaction");
++ goto destroy_ring;
++ }
++
++ return 0;
++
++ abort_transaction:
++ xenbus_transaction_end(xbt, 1);
++ xenbus_dev_fatal(dev, err, "%s", message);
++ destroy_ring:
++ xennet_disconnect_backend(info);
++ out:
++ return err;
++}
++
++static int xennet_set_sg(struct net_device *dev, u32 data)
++{
++ if (data) {
++ struct netfront_info *np = netdev_priv(dev);
++ int val;
++
++ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
++ "%d", &val) < 0)
++ val = 0;
++ if (!val)
++ return -ENOSYS;
++ } else if (dev->mtu > ETH_DATA_LEN)
++ dev->mtu = ETH_DATA_LEN;
++
++ return ethtool_op_set_sg(dev, data);
++}
++
++static int xennet_set_tso(struct net_device *dev, u32 data)
++{
++ if (data) {
++ struct netfront_info *np = netdev_priv(dev);
++ int val;
++
++ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++ "feature-gso-tcpv4", "%d", &val) < 0)
++ val = 0;
++ if (!val)
++ return -ENOSYS;
++ }
++
++ return ethtool_op_set_tso(dev, data);
++}
++
++static void xennet_set_features(struct net_device *dev)
++{
++ /* Turn off all GSO bits except ROBUST. */
++ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
++ dev->features |= NETIF_F_GSO_ROBUST;
++ xennet_set_sg(dev, 0);
++
++ /* We need checksum offload to enable scatter/gather and TSO. */
++ if (!(dev->features & NETIF_F_IP_CSUM))
++ return;
++
++ if (!xennet_set_sg(dev, 1))
++ xennet_set_tso(dev, 1);
++}
++
++static int xennet_connect(struct net_device *dev)
++{
++ struct netfront_info *np = netdev_priv(dev);
++ int i, requeue_idx, err;
++ struct sk_buff *skb;
++ grant_ref_t ref;
++ struct xen_netif_rx_request *req;
++ unsigned int feature_rx_copy, feature_rx_flip;
++
++ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++ "feature-rx-copy", "%u", &feature_rx_copy);
++ if (err != 1)
++ feature_rx_copy = 0;
++
++ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++ "feature-rx-flip", "%u", &feature_rx_flip);
++ /* Flip is the default, since it was once the only mode of
++ operation. */
++ if (err != 1)
++ feature_rx_flip = 1;
++
++ /*
++ * Copy packets on receive path if:
++ * (a) This was requested by user, and the backend supports it; or
++ * (b) Flipping was requested, but this is unsupported by the backend.
++ */
++ np->copying_receiver = (((rx_mode == RX_COPY) && feature_rx_copy) ||
++ ((rx_mode == RX_FLIP) && !feature_rx_flip));
++
++ err = talk_to_backend(np->xbdev, np);
++ if (err)
++ return err;
++
++ xennet_set_features(dev);
++
++ dev_info(&dev->dev, "has %s receive path.\n",
++ np->copying_receiver ? "copying" : "flipping");
++
++ spin_lock_bh(&np->rx_lock);
++ spin_lock_irq(&np->tx_lock);
++
++ /* Step 1: Discard all pending TX packet fragments. */
++ xennet_release_tx_bufs(np);
++
++ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
++ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
++ if (!np->rx_skbs[i])
++ continue;
++
++ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
++ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
++ req = RING_GET_REQUEST(&np->rx, requeue_idx);
++
++ if (!np->copying_receiver) {
++ gnttab_grant_foreign_transfer_ref(
++ ref, np->xbdev->otherend_id,
++ page_to_pfn(skb_shinfo(skb)->frags->page));
++ } else {
++ gnttab_grant_foreign_access_ref(
++ ref, np->xbdev->otherend_id,
++ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
++ frags->page)),
++ 0);
++ }
++ req->gref = ref;
++ req->id = requeue_idx;
++
++ requeue_idx++;
++ }
++
++ np->rx.req_prod_pvt = requeue_idx;
++
++ /*
++ * Step 3: All public and private state should now be sane. Get
++ * ready to start sending and receiving packets and give the driver
++ * domain a kick because we've probably just requeued some
++ * packets.
++ */
++ netif_carrier_on(np->netdev);
++ notify_remote_via_irq(np->netdev->irq);
++ xennet_tx_buf_gc(dev);
++ xennet_alloc_rx_buffers(dev);
++
++ spin_unlock_irq(&np->tx_lock);
++ spin_unlock_bh(&np->rx_lock);
++
++ return 0;
++}
++
++/**
++ * Callback received when the backend's state changes.
++ */
++static void backend_changed(struct xenbus_device *dev,
++ enum xenbus_state backend_state)
++{
++ struct netfront_info *np = dev->dev.driver_data;
++ struct net_device *netdev = np->netdev;
++
++ dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
++
++ switch (backend_state) {
++ case XenbusStateInitialising:
++ case XenbusStateInitialised:
++ case XenbusStateConnected:
++ case XenbusStateUnknown:
++ case XenbusStateClosed:
++ break;
++
++ case XenbusStateInitWait:
++ if (dev->state != XenbusStateInitialising)
++ break;
++ if (xennet_connect(netdev) != 0)
++ break;
++ xenbus_switch_state(dev, XenbusStateConnected);
++ break;
++
++ case XenbusStateClosing:
++ xenbus_frontend_closed(dev);
++ break;
++ }
++}
++
++static struct ethtool_ops xennet_ethtool_ops =
++{
++ .get_tx_csum = ethtool_op_get_tx_csum,
++ .set_tx_csum = ethtool_op_set_tx_csum,
++ .get_sg = ethtool_op_get_sg,
++ .set_sg = xennet_set_sg,
++ .get_tso = ethtool_op_get_tso,
++ .set_tso = xennet_set_tso,
++ .get_link = ethtool_op_get_link,
++};
++
++#ifdef CONFIG_SYSFS
++static ssize_t show_rxbuf_min(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct net_device *netdev = to_net_dev(dev);
++ struct netfront_info *info = netdev_priv(netdev);
++
++ return sprintf(buf, "%u\n", info->rx_min_target);
++}
++
++static ssize_t store_rxbuf_min(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ struct net_device *netdev = to_net_dev(dev);
++ struct netfront_info *np = netdev_priv(netdev);
++ char *endp;
++ unsigned long target;
++
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
++ target = simple_strtoul(buf, &endp, 0);
++ if (endp == buf)
++ return -EBADMSG;
++
++ if (target < RX_MIN_TARGET)
++ target = RX_MIN_TARGET;
++ if (target > RX_MAX_TARGET)
++ target = RX_MAX_TARGET;
++
++ spin_lock_bh(&np->rx_lock);
++ if (target > np->rx_max_target)
++ np->rx_max_target = target;
++ np->rx_min_target = target;
++ if (target > np->rx_target)
++ np->rx_target = target;
++
++ xennet_alloc_rx_buffers(netdev);
++
++ spin_unlock_bh(&np->rx_lock);
++ return len;
++}
++
++static ssize_t show_rxbuf_max(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct net_device *netdev = to_net_dev(dev);
++ struct netfront_info *info = netdev_priv(netdev);
++
++ return sprintf(buf, "%u\n", info->rx_max_target);
++}
++
++static ssize_t store_rxbuf_max(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t len)
++{
++ struct net_device *netdev = to_net_dev(dev);
++ struct netfront_info *np = netdev_priv(netdev);
++ char *endp;
++ unsigned long target;
++
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++
++ target = simple_strtoul(buf, &endp, 0);
++ if (endp == buf)
++ return -EBADMSG;
++
++ if (target < RX_MIN_TARGET)
++ target = RX_MIN_TARGET;
++ if (target > RX_MAX_TARGET)
++ target = RX_MAX_TARGET;
++
++ spin_lock_bh(&np->rx_lock);
++ if (target < np->rx_min_target)
++ np->rx_min_target = target;
++ np->rx_max_target = target;
++ if (target < np->rx_target)
++ np->rx_target = target;
++
++ xennet_alloc_rx_buffers(netdev);
++
++ spin_unlock_bh(&np->rx_lock);
++ return len;
++}
++
++static ssize_t show_rxbuf_cur(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct net_device *netdev = to_net_dev(dev);
++ struct netfront_info *info = netdev_priv(netdev);
++
++ return sprintf(buf, "%u\n", info->rx_target);
++}
++
++static struct device_attribute xennet_attrs[] = {
++ __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
++ __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
++ __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
++};
++
++static int xennet_sysfs_addif(struct net_device *netdev)
++{
++ int i;
++ int err;
++
++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
++ err = device_create_file(&netdev->dev,
++ &xennet_attrs[i]);
++ if (err)
++ goto fail;
++ }
++ return 0;
++
++ fail:
++ while (--i >= 0)
++ device_remove_file(&netdev->dev, &xennet_attrs[i]);
++ return err;
++}
++
++static void xennet_sysfs_delif(struct net_device *netdev)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
++ device_remove_file(&netdev->dev, &xennet_attrs[i]);
++}
++
++#endif /* CONFIG_SYSFS */
++
++static struct xenbus_device_id netfront_ids[] = {
++ { "vif" },
++ { "" }
++};
++
++
++static int __devexit xennet_remove(struct xenbus_device *dev)
++{
++ struct netfront_info *info = dev->dev.driver_data;
++
++ dev_dbg(&dev->dev, "%s\n", dev->nodename);
++
++ unregister_netdev(info->netdev);
++
++ xennet_disconnect_backend(info);
++
++ del_timer_sync(&info->rx_refill_timer);
++
++ xennet_sysfs_delif(info->netdev);
++
++ free_netdev(info->netdev);
++
++ return 0;
++}
++
++static struct xenbus_driver netfront = {
++ .name = "vif",
++ .owner = THIS_MODULE,
++ .ids = netfront_ids,
++ .probe = netfront_probe,
++ .remove = __devexit_p(xennet_remove),
++ .resume = netfront_resume,
++ .otherend_changed = backend_changed,
++};
++
++static int __init netif_init(void)
++{
++ if (!is_running_on_xen())
++ return -ENODEV;
++
++ if (is_initial_xendomain())
++ return 0;
++
++ printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
++
++ return xenbus_register_frontend(&netfront);
++}
++module_init(netif_init);
++
++
++static void __exit netif_exit(void)
++{
++ if (is_initial_xendomain())
++ return;
++
++ return xenbus_unregister_driver(&netfront);
++}
++module_exit(netif_exit);
++
++MODULE_DESCRIPTION("Xen virtual network device frontend");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/net/yellowfin.c linux-2.6.22-591/drivers/net/yellowfin.c
+--- linux-2.6.22-570/drivers/net/yellowfin.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/net/yellowfin.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1137,7 +1137,7 @@
+ if (skb == NULL)
+ break;
+ skb_reserve(skb, 2); /* 16 byte align the IP header */
+- eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
++ skb_copy_to_linear_data(skb, rx_skb->data, pkt_len);
+ skb_put(skb, pkt_len);
+ pci_dma_sync_single_for_device(yp->pci_dev, desc->addr,
+ yp->rx_buf_sz,
+diff -Nurb linux-2.6.22-570/drivers/parisc/led.c linux-2.6.22-591/drivers/parisc/led.c
+--- linux-2.6.22-570/drivers/parisc/led.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/parisc/led.c 2007-12-21 15:36:14.000000000 -0500
+@@ -359,7 +359,7 @@
+ * for reading should be OK */
+ read_lock(&dev_base_lock);
+ rcu_read_lock();
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ struct net_device_stats *stats;
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev || !in_dev->ifa_list)
+diff -Nurb linux-2.6.22-570/drivers/parisc/pdc_stable.c linux-2.6.22-591/drivers/parisc/pdc_stable.c
+--- linux-2.6.22-570/drivers/parisc/pdc_stable.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/parisc/pdc_stable.c 2007-12-21 15:36:12.000000000 -0500
+@@ -121,14 +121,14 @@
+
+ #define PDCS_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute pdcs_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++ .attr = {.name = __stringify(_name), .mode = _mode}, \
+ .show = _show, \
+ .store = _store, \
+ };
+
+ #define PATHS_ATTR(_name, _mode, _show, _store) \
+ struct pdcspath_attribute paths_attr_##_name = { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++ .attr = {.name = __stringify(_name), .mode = _mode}, \
+ .show = _show, \
+ .store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c
+--- linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -106,6 +106,7 @@
+ static void ibm_handle_events(acpi_handle handle, u32 event, void *context);
+ static int ibm_get_table_from_acpi(char **bufp);
+ static ssize_t ibm_read_apci_table(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t size);
+ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
+ u32 lvl, void *context, void **rv);
+@@ -117,7 +118,6 @@
+ static struct bin_attribute ibm_apci_table_attr = {
+ .attr = {
+ .name = "apci_table",
+- .owner = THIS_MODULE,
+ .mode = S_IRUGO,
+ },
+ .read = ibm_read_apci_table,
+@@ -358,6 +358,7 @@
+ * our solution is to only allow reading the table in all at once
+ **/
+ static ssize_t ibm_read_apci_table(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t size)
+ {
+ int bytes_read = -EINVAL;
+diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c
+--- linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -159,8 +159,8 @@
+ /* Claim new bus resources */
+ pcibios_claim_one_bus(dev->bus);
+
+- /* ioremap() for child bus, which may or may not succeed */
+- remap_bus_range(dev->subordinate);
++ /* Map IO space for child bus, which may or may not succeed */
++ pcibios_map_io_space(dev->subordinate);
+
+ /* Add new devices to global lists. Register in proc, sysfs. */
+ pci_bus_add_devices(phb->bus);
+@@ -390,7 +390,7 @@
+ } else
+ pcibios_remove_pci_devices(bus);
+
+- if (unmap_bus_range(bus)) {
++ if (pcibios_unmap_io_space(bus)) {
+ printk(KERN_ERR "%s: failed to unmap bus range\n",
+ __FUNCTION__);
+ return -ERANGE;
+diff -Nurb linux-2.6.22-570/drivers/pci/pci-sysfs.c linux-2.6.22-591/drivers/pci/pci-sysfs.c
+--- linux-2.6.22-570/drivers/pci/pci-sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pci/pci-sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -213,7 +213,8 @@
+ };
+
+ static ssize_t
+-pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+ unsigned int size = 64;
+@@ -285,7 +286,8 @@
+ }
+
+ static ssize_t
+-pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+ unsigned int size = count;
+@@ -352,7 +354,8 @@
+ * callback routine (pci_legacy_read).
+ */
+ ssize_t
+-pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_bus *bus = to_pci_bus(container_of(kobj,
+ struct class_device,
+@@ -376,7 +379,8 @@
+ * callback routine (pci_legacy_write).
+ */
+ ssize_t
+-pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_bus *bus = to_pci_bus(container_of(kobj,
+ struct class_device,
+@@ -499,7 +503,6 @@
+ sprintf(res_attr_name, "resource%d", i);
+ res_attr->attr.name = res_attr_name;
+ res_attr->attr.mode = S_IRUSR | S_IWUSR;
+- res_attr->attr.owner = THIS_MODULE;
+ res_attr->size = pci_resource_len(pdev, i);
+ res_attr->mmap = pci_mmap_resource;
+ res_attr->private = &pdev->resource[i];
+@@ -529,7 +532,8 @@
+ * writing anything except 0 enables it
+ */
+ static ssize_t
+-pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
+
+@@ -552,7 +556,8 @@
+ * device corresponding to @kobj.
+ */
+ static ssize_t
+-pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
+ void __iomem *rom;
+@@ -582,7 +587,6 @@
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 256,
+ .read = pci_read_config,
+@@ -593,7 +597,6 @@
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 4096,
+ .read = pci_read_config,
+@@ -628,7 +631,6 @@
+ rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+ rom_attr->attr.name = "rom";
+ rom_attr->attr.mode = S_IRUSR;
+- rom_attr->attr.owner = THIS_MODULE;
+ rom_attr->read = pci_read_rom;
+ rom_attr->write = pci_write_rom;
+ retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr);
+diff -Nurb linux-2.6.22-570/drivers/pci/probe.c linux-2.6.22-591/drivers/pci/probe.c
+--- linux-2.6.22-570/drivers/pci/probe.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/pci/probe.c 2007-12-21 15:36:12.000000000 -0500
+@@ -39,7 +39,6 @@
+ b->legacy_io->attr.name = "legacy_io";
+ b->legacy_io->size = 0xffff;
+ b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+- b->legacy_io->attr.owner = THIS_MODULE;
+ b->legacy_io->read = pci_read_legacy_io;
+ b->legacy_io->write = pci_write_legacy_io;
+ class_device_create_bin_file(&b->class_dev, b->legacy_io);
+@@ -49,7 +48,6 @@
+ b->legacy_mem->attr.name = "legacy_mem";
+ b->legacy_mem->size = 1024*1024;
+ b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+- b->legacy_mem->attr.owner = THIS_MODULE;
+ b->legacy_mem->mmap = pci_mmap_legacy_mem;
+ class_device_create_bin_file(&b->class_dev, b->legacy_mem);
+ }
+diff -Nurb linux-2.6.22-570/drivers/pcmcia/cs.c linux-2.6.22-591/drivers/pcmcia/cs.c
+--- linux-2.6.22-570/drivers/pcmcia/cs.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/pcmcia/cs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -654,6 +654,7 @@
+ add_wait_queue(&skt->thread_wait, &wait);
+ complete(&skt->thread_done);
+
++ set_freezable();
+ for (;;) {
+ unsigned long flags;
+ unsigned int events;
+diff -Nurb linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c
+--- linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -283,7 +283,9 @@
+ return (ret);
+ }
+
+-static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t pccard_show_cis(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ unsigned int size = 0x200;
+
+@@ -311,7 +313,9 @@
+ return (count);
+ }
+
+-static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t pccard_store_cis(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj));
+ cisdump_t *cis;
+@@ -366,7 +370,7 @@
+ };
+
+ static struct bin_attribute pccard_cis_attr = {
+- .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE},
++ .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR },
+ .size = 0x200,
+ .read = pccard_show_cis,
+ .write = pccard_store_cis,
+diff -Nurb linux-2.6.22-570/drivers/pnp/driver.c linux-2.6.22-591/drivers/pnp/driver.c
+--- linux-2.6.22-570/drivers/pnp/driver.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pnp/driver.c 2007-12-21 15:36:12.000000000 -0500
+@@ -167,6 +167,8 @@
+ return error;
+ }
+
++ if (pnp_dev->protocol && pnp_dev->protocol->suspend)
++ pnp_dev->protocol->suspend(pnp_dev, state);
+ return 0;
+ }
+
+@@ -179,6 +181,9 @@
+ if (!pnp_drv)
+ return 0;
+
++ if (pnp_dev->protocol && pnp_dev->protocol->resume)
++ pnp_dev->protocol->resume(pnp_dev);
++
+ if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) {
+ error = pnp_start_dev(pnp_dev);
+ if (error)
+diff -Nurb linux-2.6.22-570/drivers/pnp/pnpacpi/core.c linux-2.6.22-591/drivers/pnp/pnpacpi/core.c
+--- linux-2.6.22-570/drivers/pnp/pnpacpi/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pnp/pnpacpi/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -119,11 +119,23 @@
+ return ACPI_FAILURE(status) ? -ENODEV : 0;
+ }
+
++static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
++{
++ return acpi_bus_set_power((acpi_handle)dev->data, 3);
++}
++
++static int pnpacpi_resume(struct pnp_dev *dev)
++{
++ return acpi_bus_set_power((acpi_handle)dev->data, 0);
++}
++
+ static struct pnp_protocol pnpacpi_protocol = {
+ .name = "Plug and Play ACPI",
+ .get = pnpacpi_get_resources,
+ .set = pnpacpi_set_resources,
+ .disable = pnpacpi_disable_resources,
++ .suspend = pnpacpi_suspend,
++ .resume = pnpacpi_resume,
+ };
+
+ static int __init pnpacpi_add_device(struct acpi_device *device)
+diff -Nurb linux-2.6.22-570/drivers/pnp/pnpbios/core.c linux-2.6.22-591/drivers/pnp/pnpbios/core.c
+--- linux-2.6.22-570/drivers/pnp/pnpbios/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/pnp/pnpbios/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -147,7 +147,7 @@
+ info->location_id, info->serial, info->capabilities);
+ envp[i] = NULL;
+
+- value = call_usermodehelper (argv [0], argv, envp, 0);
++ value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
+ kfree (buf);
+ kfree (envp);
+ return 0;
+@@ -160,6 +160,7 @@
+ {
+ static struct pnp_docking_station_info now;
+ int docked = -1, d = 0;
++ set_freezable();
+ while (!unloading)
+ {
+ int status;
+diff -Nurb linux-2.6.22-570/drivers/rapidio/rio-sysfs.c linux-2.6.22-591/drivers/rapidio/rio-sysfs.c
+--- linux-2.6.22-570/drivers/rapidio/rio-sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/rapidio/rio-sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -67,7 +67,8 @@
+ };
+
+ static ssize_t
+-rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct rio_dev *dev =
+ to_rio_dev(container_of(kobj, struct device, kobj));
+@@ -137,7 +138,8 @@
+ }
+
+ static ssize_t
+-rio_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct rio_dev *dev =
+ to_rio_dev(container_of(kobj, struct device, kobj));
+@@ -197,7 +199,6 @@
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 0x200000,
+ .read = rio_read_config,
+diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1553.c linux-2.6.22-591/drivers/rtc/rtc-ds1553.c
+--- linux-2.6.22-570/drivers/rtc/rtc-ds1553.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/rtc/rtc-ds1553.c 2007-12-21 15:36:12.000000000 -0500
+@@ -258,8 +258,9 @@
+ .ioctl = ds1553_rtc_ioctl,
+ };
+
+-static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf,
+- loff_t pos, size_t size)
++static ssize_t ds1553_nvram_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t size)
+ {
+ struct platform_device *pdev =
+ to_platform_device(container_of(kobj, struct device, kobj));
+@@ -272,8 +273,9 @@
+ return count;
+ }
+
+-static ssize_t ds1553_nvram_write(struct kobject *kobj, char *buf,
+- loff_t pos, size_t size)
++static ssize_t ds1553_nvram_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t size)
+ {
+ struct platform_device *pdev =
+ to_platform_device(container_of(kobj, struct device, kobj));
+@@ -290,7 +292,6 @@
+ .attr = {
+ .name = "nvram",
+ .mode = S_IRUGO | S_IWUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = RTC_OFFSET,
+ .read = ds1553_nvram_read,
+diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1742.c linux-2.6.22-591/drivers/rtc/rtc-ds1742.c
+--- linux-2.6.22-570/drivers/rtc/rtc-ds1742.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/rtc/rtc-ds1742.c 2007-12-21 15:36:12.000000000 -0500
+@@ -127,8 +127,9 @@
+ .set_time = ds1742_rtc_set_time,
+ };
+
+-static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf,
+- loff_t pos, size_t size)
++static ssize_t ds1742_nvram_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t size)
+ {
+ struct platform_device *pdev =
+ to_platform_device(container_of(kobj, struct device, kobj));
+@@ -141,8 +142,9 @@
+ return count;
+ }
+
+-static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf,
+- loff_t pos, size_t size)
++static ssize_t ds1742_nvram_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t pos, size_t size)
+ {
+ struct platform_device *pdev =
+ to_platform_device(container_of(kobj, struct device, kobj));
+@@ -159,7 +161,6 @@
+ .attr = {
+ .name = "nvram",
+ .mode = S_IRUGO | S_IWUGO,
+- .owner = THIS_MODULE,
+ },
+ .read = ds1742_nvram_read,
+ .write = ds1742_nvram_write,
+diff -Nurb linux-2.6.22-570/drivers/s390/cio/chp.c linux-2.6.22-591/drivers/s390/cio/chp.c
+--- linux-2.6.22-570/drivers/s390/cio/chp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/s390/cio/chp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -141,8 +141,9 @@
+ /*
+ * Channel measurement related functions
+ */
+-static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t chp_measurement_chars_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct channel_path *chp;
+ unsigned int size;
+@@ -165,7 +166,6 @@
+ .attr = {
+ .name = "measurement_chars",
+ .mode = S_IRUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = sizeof(struct cmg_chars),
+ .read = chp_measurement_chars_read,
+@@ -193,8 +193,9 @@
+ } while (reference_buf.values[0] != buf->values[0]);
+ }
+
+-static ssize_t chp_measurement_read(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t chp_measurement_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct channel_path *chp;
+ struct channel_subsystem *css;
+@@ -217,7 +218,6 @@
+ .attr = {
+ .name = "measurement",
+ .mode = S_IRUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = sizeof(struct cmg_entry),
+ .read = chp_measurement_read,
+diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_main.c linux-2.6.22-591/drivers/s390/net/qeth_main.c
+--- linux-2.6.22-570/drivers/s390/net/qeth_main.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/s390/net/qeth_main.c 2007-12-21 15:36:14.000000000 -0500
+@@ -8127,7 +8127,7 @@
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
+
+- neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key);
++ neigh->type = inet_addr_type(&init_net, *(__be32 *) neigh->primary_key);
+ neigh->nud_state = NUD_NOARP;
+ neigh->ops = arp_direct_ops;
+ neigh->output = neigh->ops->queue_xmit;
+diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_sys.c linux-2.6.22-591/drivers/s390/net/qeth_sys.c
+--- linux-2.6.22-570/drivers/s390/net/qeth_sys.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/s390/net/qeth_sys.c 2007-12-21 15:36:12.000000000 -0500
+@@ -991,7 +991,7 @@
+
+ #define QETH_DEVICE_ATTR(_id,_name,_mode,_show,_store) \
+ struct device_attribute dev_attr_##_id = { \
+- .attr = {.name=__stringify(_name), .mode=_mode, .owner=THIS_MODULE },\
++ .attr = {.name=__stringify(_name), .mode=_mode, },\
+ .show = _show, \
+ .store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c
+--- linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c 2007-12-21 15:36:12.000000000 -0500
+@@ -815,9 +815,7 @@
+ struct zfcp_unit *
+ zfcp_unit_enqueue(struct zfcp_port *port, fcp_lun_t fcp_lun)
+ {
+- struct zfcp_unit *unit, *tmp_unit;
+- unsigned int scsi_lun;
+- int found;
++ struct zfcp_unit *unit;
+
+ /*
+ * check that there is no unit with this FCP_LUN already in list
+@@ -863,21 +861,9 @@
+ }
+
+ zfcp_unit_get(unit);
++ unit->scsi_lun = scsilun_to_int((struct scsi_lun *)&unit->fcp_lun);
+
+- scsi_lun = 0;
+- found = 0;
+ write_lock_irq(&zfcp_data.config_lock);
+- list_for_each_entry(tmp_unit, &port->unit_list_head, list) {
+- if (tmp_unit->scsi_lun != scsi_lun) {
+- found = 1;
+- break;
+- }
+- scsi_lun++;
+- }
+- unit->scsi_lun = scsi_lun;
+- if (found)
+- list_add_tail(&unit->list, &tmp_unit->list);
+- else
+ list_add_tail(&unit->list, &port->unit_list_head);
+ atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status);
+ atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status);
+diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c
+--- linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1986,6 +1986,10 @@
+ failed_openfcp:
+ zfcp_close_fsf(erp_action->adapter);
+ failed_qdio:
++ atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
++ ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
++ ZFCP_STATUS_ADAPTER_XPORT_OK,
++ &erp_action->adapter->status);
+ out:
+ return retval;
+ }
+@@ -2167,6 +2171,9 @@
+ sleep *= 2;
+ }
+
++ atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
++ &adapter->status);
++
+ if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+ &adapter->status)) {
+ ZFCP_LOG_INFO("error: exchange of configuration data for "
+diff -Nurb linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c
+--- linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -7,6 +7,7 @@
+ #include <linux/kthread.h>
+ #include <linux/delay.h>
+ #include <linux/kmod.h>
++#include <linux/reboot.h>
+ #include <asm/oplib.h>
+ #include <asm/ebus.h>
+
+@@ -170,8 +171,6 @@
+ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
+ {
+ static int shutting_down = 0;
+- static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+- char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
+ char *type = "???";
+ s8 val = -1;
+
+@@ -195,7 +194,7 @@
+ printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
+
+ shutting_down = 1;
+- if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
++ if (orderly_poweroff(true) < 0)
+ printk(KERN_CRIT "envctrl: shutdown execution failed\n");
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/sbus/char/envctrl.c linux-2.6.22-591/drivers/sbus/char/envctrl.c
+--- linux-2.6.22-570/drivers/sbus/char/envctrl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/sbus/char/envctrl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <linux/ioport.h>
+ #include <linux/miscdevice.h>
+ #include <linux/kmod.h>
++#include <linux/reboot.h>
+
+ #include <asm/ebus.h>
+ #include <asm/uaccess.h>
+@@ -966,10 +967,6 @@
+ static void envctrl_do_shutdown(void)
+ {
+ static int inprog = 0;
+- static char *envp[] = {
+- "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+- char *argv[] = {
+- "/sbin/shutdown", "-h", "now", NULL };
+ int ret;
+
+ if (inprog != 0)
+@@ -977,7 +974,7 @@
+
+ inprog = 1;
+ printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
+- ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0);
++ ret = orderly_poweroff(true);
+ if (ret < 0) {
+ printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
+ inprog = 0; /* unlikely to succeed, but we could try again */
+diff -Nurb linux-2.6.22-570/drivers/scsi/3w-9xxx.c linux-2.6.22-591/drivers/scsi/3w-9xxx.c
+--- linux-2.6.22-570/drivers/scsi/3w-9xxx.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/3w-9xxx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1307,22 +1307,26 @@
+ wake_up(&tw_dev->ioctl_wqueue);
+ }
+ } else {
++ struct scsi_cmnd *cmd;
++
++ cmd = tw_dev->srb[request_id];
++
+ twa_scsiop_execute_scsi_complete(tw_dev, request_id);
+ /* If no error command was a success */
+ if (error == 0) {
+- tw_dev->srb[request_id]->result = (DID_OK << 16);
++ cmd->result = (DID_OK << 16);
+ }
+
+ /* If error, command failed */
+ if (error == 1) {
+ /* Ask for a host reset */
+- tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
++ cmd->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
+ }
+
+ /* Report residual bytes for single sgl */
+- if ((tw_dev->srb[request_id]->use_sg <= 1) && (full_command_packet->command.newcommand.status == 0)) {
+- if (full_command_packet->command.newcommand.sg_list[0].length < tw_dev->srb[request_id]->request_bufflen)
+- tw_dev->srb[request_id]->resid = tw_dev->srb[request_id]->request_bufflen - full_command_packet->command.newcommand.sg_list[0].length;
++ if ((scsi_sg_count(cmd) <= 1) && (full_command_packet->command.newcommand.status == 0)) {
++ if (full_command_packet->command.newcommand.sg_list[0].length < scsi_bufflen(tw_dev->srb[request_id]))
++ scsi_set_resid(cmd, scsi_bufflen(cmd) - full_command_packet->command.newcommand.sg_list[0].length);
+ }
+
+ /* Now complete the io */
+@@ -1385,52 +1389,20 @@
+ {
+ int use_sg;
+ struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+- struct pci_dev *pdev = tw_dev->tw_pci_dev;
+- int retval = 0;
+-
+- if (cmd->use_sg == 0)
+- goto out;
+-
+- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+
+- if (use_sg == 0) {
++ use_sg = scsi_dma_map(cmd);
++ if (!use_sg)
++ return 0;
++ else if (use_sg < 0) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to map scatter gather list");
+- goto out;
++ return 0;
+ }
+
+ cmd->SCp.phase = TW_PHASE_SGLIST;
+ cmd->SCp.have_data_in = use_sg;
+- retval = use_sg;
+-out:
+- return retval;
+-} /* End twa_map_scsi_sg_data() */
+-
+-/* This function will perform a pci-dma map for a single buffer */
+-static dma_addr_t twa_map_scsi_single_data(TW_Device_Extension *tw_dev, int request_id)
+-{
+- dma_addr_t mapping;
+- struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+- struct pci_dev *pdev = tw_dev->tw_pci_dev;
+- dma_addr_t retval = 0;
+-
+- if (cmd->request_bufflen == 0) {
+- retval = 0;
+- goto out;
+- }
+-
+- mapping = pci_map_single(pdev, cmd->request_buffer, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-
+- if (mapping == 0) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Failed to map page");
+- goto out;
+- }
+
+- cmd->SCp.phase = TW_PHASE_SINGLE;
+- cmd->SCp.have_data_in = mapping;
+- retval = mapping;
+-out:
+- return retval;
+-} /* End twa_map_scsi_single_data() */
++ return use_sg;
++} /* End twa_map_scsi_sg_data() */
+
+ /* This function will poll for a response interrupt of a request */
+ static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds)
+@@ -1816,15 +1788,13 @@
+ u32 num_sectors = 0x0;
+ int i, sg_count;
+ struct scsi_cmnd *srb = NULL;
+- struct scatterlist *sglist = NULL;
+- dma_addr_t buffaddr = 0x0;
++ struct scatterlist *sglist = NULL, *sg;
+ int retval = 1;
+
+ if (tw_dev->srb[request_id]) {
+- if (tw_dev->srb[request_id]->request_buffer) {
+- sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
+- }
+ srb = tw_dev->srb[request_id];
++ if (scsi_sglist(srb))
++ sglist = scsi_sglist(srb);
+ }
+
+ /* Initialize command packet */
+@@ -1857,32 +1827,12 @@
+
+ if (!sglistarg) {
+ /* Map sglist from scsi layer to cmd packet */
+- if (tw_dev->srb[request_id]->use_sg == 0) {
+- if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH) {
+- command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]);
+- command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH);
+- if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)
+- memcpy(tw_dev->generic_buffer_virt[request_id], tw_dev->srb[request_id]->request_buffer, tw_dev->srb[request_id]->request_bufflen);
+- } else {
+- buffaddr = twa_map_scsi_single_data(tw_dev, request_id);
+- if (buffaddr == 0)
+- goto out;
+-
+- command_packet->sg_list[0].address = TW_CPU_TO_SGL(buffaddr);
+- command_packet->sg_list[0].length = cpu_to_le32(tw_dev->srb[request_id]->request_bufflen);
+- }
+- command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), 1));
+
+- if (command_packet->sg_list[0].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2d, "Found unaligned address during execute scsi");
+- goto out;
+- }
+- }
+-
+- if (tw_dev->srb[request_id]->use_sg > 0) {
+- if ((tw_dev->srb[request_id]->use_sg == 1) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) {
+- if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) {
+- struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++ if (scsi_sg_count(srb)) {
++ if ((scsi_sg_count(srb) == 1) &&
++ (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) {
++ if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) {
++ struct scatterlist *sg = scsi_sglist(srb);
+ char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
+ kunmap_atomic(buf - sg->offset, KM_IRQ0);
+@@ -1894,16 +1844,16 @@
+ if (sg_count == 0)
+ goto out;
+
+- for (i = 0; i < sg_count; i++) {
+- command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(&sglist[i]));
+- command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(&sglist[i]));
++ scsi_for_each_sg(srb, sg, sg_count, i) {
++ command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(sg));
++ command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(sg));
+ if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+ TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi");
+ goto out;
+ }
+ }
+ }
+- command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), tw_dev->srb[request_id]->use_sg));
++ command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), scsi_sg_count(tw_dev->srb[request_id])));
+ }
+ } else {
+ /* Internal cdb post */
+@@ -1933,7 +1883,7 @@
+
+ /* Update SG statistics */
+ if (srb) {
+- tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg;
++ tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]);
+ if (tw_dev->sgl_entries > tw_dev->max_sgl_entries)
+ tw_dev->max_sgl_entries = tw_dev->sgl_entries;
+ }
+@@ -1952,16 +1902,13 @@
+ /* This function completes an execute scsi operation */
+ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id)
+ {
+- if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH &&
+- (tw_dev->srb[request_id]->sc_data_direction == DMA_FROM_DEVICE ||
+- tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)) {
+- if (tw_dev->srb[request_id]->use_sg == 0) {
+- memcpy(tw_dev->srb[request_id]->request_buffer,
+- tw_dev->generic_buffer_virt[request_id],
+- tw_dev->srb[request_id]->request_bufflen);
+- }
+- if (tw_dev->srb[request_id]->use_sg == 1) {
+- struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++ struct scsi_cmnd *cmd = tw_dev->srb[request_id];
++
++ if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH &&
++ (cmd->sc_data_direction == DMA_FROM_DEVICE ||
++ cmd->sc_data_direction == DMA_BIDIRECTIONAL)) {
++ if (scsi_sg_count(cmd) == 1) {
++ struct scatterlist *sg = scsi_sglist(tw_dev->srb[request_id]);
+ char *buf;
+ unsigned long flags = 0;
+ local_irq_save(flags);
+@@ -2018,16 +1965,8 @@
+ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id)
+ {
+ struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+- struct pci_dev *pdev = tw_dev->tw_pci_dev;
+
+- switch(cmd->SCp.phase) {
+- case TW_PHASE_SINGLE:
+- pci_unmap_single(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+- break;
+- case TW_PHASE_SGLIST:
+- pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+- break;
+- }
++ scsi_dma_unmap(cmd);
+ } /* End twa_unmap_scsi_data() */
+
+ /* scsi_host_template initializer */
+diff -Nurb linux-2.6.22-570/drivers/scsi/3w-xxxx.c linux-2.6.22-591/drivers/scsi/3w-xxxx.c
+--- linux-2.6.22-570/drivers/scsi/3w-xxxx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/3w-xxxx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1274,12 +1274,8 @@
+
+ dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data()\n");
+
+- if (cmd->use_sg == 0)
+- return 0;
+-
+- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+-
+- if (use_sg == 0) {
++ use_sg = scsi_dma_map(cmd);
++ if (use_sg < 0) {
+ printk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data(): pci_map_sg() failed.\n");
+ return 0;
+ }
+@@ -1290,40 +1286,11 @@
+ return use_sg;
+ } /* End tw_map_scsi_sg_data() */
+
+-static u32 tw_map_scsi_single_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+-{
+- dma_addr_t mapping;
+-
+- dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data()\n");
+-
+- if (cmd->request_bufflen == 0)
+- return 0;
+-
+- mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), offset_in_page(cmd->request_buffer), cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-
+- if (mapping == 0) {
+- printk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data(): pci_map_page() failed.\n");
+- return 0;
+- }
+-
+- cmd->SCp.phase = TW_PHASE_SINGLE;
+- cmd->SCp.have_data_in = mapping;
+-
+- return mapping;
+-} /* End tw_map_scsi_single_data() */
+-
+ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+ {
+ dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n");
+
+- switch(cmd->SCp.phase) {
+- case TW_PHASE_SINGLE:
+- pci_unmap_page(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+- break;
+- case TW_PHASE_SGLIST:
+- pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+- break;
+- }
++ scsi_dma_unmap(cmd);
+ } /* End tw_unmap_scsi_data() */
+
+ /* This function will reset a device extension */
+@@ -1499,27 +1466,16 @@
+ void *buf;
+ unsigned int transfer_len;
+ unsigned long flags = 0;
++ struct scatterlist *sg = scsi_sglist(cmd);
+
+- if (cmd->use_sg) {
+- struct scatterlist *sg =
+- (struct scatterlist *)cmd->request_buffer;
+ local_irq_save(flags);
+ buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ transfer_len = min(sg->length, len);
+- } else {
+- buf = cmd->request_buffer;
+- transfer_len = min(cmd->request_bufflen, len);
+- }
+
+ memcpy(buf, data, transfer_len);
+
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
+-
+- sg = (struct scatterlist *)cmd->request_buffer;
+ kunmap_atomic(buf - sg->offset, KM_IRQ0);
+ local_irq_restore(flags);
+- }
+ }
+
+ /* This function is called by the isr to complete an inquiry command */
+@@ -1764,19 +1720,20 @@
+ {
+ TW_Command *command_packet;
+ unsigned long command_que_value;
+- u32 lba = 0x0, num_sectors = 0x0, buffaddr = 0x0;
++ u32 lba = 0x0, num_sectors = 0x0;
+ int i, use_sg;
+ struct scsi_cmnd *srb;
+- struct scatterlist *sglist;
++ struct scatterlist *sglist, *sg;
+
+ dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write()\n");
+
+- if (tw_dev->srb[request_id]->request_buffer == NULL) {
++ srb = tw_dev->srb[request_id];
++
++ sglist = scsi_sglist(srb);
++ if (!sglist) {
+ printk(KERN_WARNING "3w-xxxx: tw_scsiop_read_write(): Request buffer NULL.\n");
+ return 1;
+ }
+- sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
+- srb = tw_dev->srb[request_id];
+
+ /* Initialize command packet */
+ command_packet = (TW_Command *)tw_dev->command_packet_virtual_address[request_id];
+@@ -1819,33 +1776,18 @@
+ command_packet->byte8.io.lba = lba;
+ command_packet->byte6.block_count = num_sectors;
+
+- /* Do this if there are no sg list entries */
+- if (tw_dev->srb[request_id]->use_sg == 0) {
+- dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write(): SG = 0\n");
+- buffaddr = tw_map_scsi_single_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]);
+- if (buffaddr == 0)
+- return 1;
+-
+- command_packet->byte8.io.sgl[0].address = buffaddr;
+- command_packet->byte8.io.sgl[0].length = tw_dev->srb[request_id]->request_bufflen;
+- command_packet->size+=2;
+- }
+-
+- /* Do this if we have multiple sg list entries */
+- if (tw_dev->srb[request_id]->use_sg > 0) {
+ use_sg = tw_map_scsi_sg_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]);
+- if (use_sg == 0)
++ if (!use_sg)
+ return 1;
+
+- for (i=0;i<use_sg; i++) {
+- command_packet->byte8.io.sgl[i].address = sg_dma_address(&sglist[i]);
+- command_packet->byte8.io.sgl[i].length = sg_dma_len(&sglist[i]);
++ scsi_for_each_sg(tw_dev->srb[request_id], sg, use_sg, i) {
++ command_packet->byte8.io.sgl[i].address = sg_dma_address(sg);
++ command_packet->byte8.io.sgl[i].length = sg_dma_len(sg);
+ command_packet->size+=2;
+ }
+- }
+
+ /* Update SG statistics */
+- tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg;
++ tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]);
+ if (tw_dev->sgl_entries > tw_dev->max_sgl_entries)
+ tw_dev->max_sgl_entries = tw_dev->sgl_entries;
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.c linux-2.6.22-591/drivers/scsi/53c700.c
+--- linux-2.6.22-570/drivers/scsi/53c700.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c700.c 2007-12-21 15:36:12.000000000 -0500
+@@ -585,16 +585,8 @@
+ struct NCR_700_command_slot *slot)
+ {
+ if(SCp->sc_data_direction != DMA_NONE &&
+- SCp->sc_data_direction != DMA_BIDIRECTIONAL) {
+- if(SCp->use_sg) {
+- dma_unmap_sg(hostdata->dev, SCp->request_buffer,
+- SCp->use_sg, SCp->sc_data_direction);
+- } else {
+- dma_unmap_single(hostdata->dev, slot->dma_handle,
+- SCp->request_bufflen,
+- SCp->sc_data_direction);
+- }
+- }
++ SCp->sc_data_direction != DMA_BIDIRECTIONAL)
++ scsi_dma_unmap(SCp);
+ }
+
+ STATIC inline void
+@@ -661,7 +653,6 @@
+ {
+ struct NCR_700_Host_Parameters *hostdata =
+ (struct NCR_700_Host_Parameters *)host->hostdata[0];
+- __u32 dcntl_extra = 0;
+ __u8 min_period;
+ __u8 min_xferp = (hostdata->chip710 ? NCR_710_MIN_XFERP : NCR_700_MIN_XFERP);
+
+@@ -686,13 +677,14 @@
+ burst_disable = BURST_DISABLE;
+ break;
+ }
+- dcntl_extra = COMPAT_700_MODE;
++ hostdata->dcntl_extra |= COMPAT_700_MODE;
+
+- NCR_700_writeb(dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(hostdata->dcntl_extra, host, DCNTL_REG);
+ NCR_700_writeb(burst_length | hostdata->dmode_extra,
+ host, DMODE_710_REG);
+- NCR_700_writeb(burst_disable | (hostdata->differential ?
+- DIFF : 0), host, CTEST7_REG);
++ NCR_700_writeb(burst_disable | hostdata->ctest7_extra |
++ (hostdata->differential ? DIFF : 0),
++ host, CTEST7_REG);
+ NCR_700_writeb(BTB_TIMER_DISABLE, host, CTEST0_REG);
+ NCR_700_writeb(FULL_ARBITRATION | ENABLE_PARITY | PARITY
+ | AUTO_ATN, host, SCNTL0_REG);
+@@ -727,13 +719,13 @@
+ * of spec: sync divider 2, async divider 3 */
+ DEBUG(("53c700: sync 2 async 3\n"));
+ NCR_700_writeb(SYNC_DIV_2_0, host, SBCL_REG);
+- NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ hostdata->sync_clock = hostdata->clock/2;
+ } else if(hostdata->clock > 50 && hostdata->clock <= 75) {
+ /* sync divider 1.5, async divider 3 */
+ DEBUG(("53c700: sync 1.5 async 3\n"));
+ NCR_700_writeb(SYNC_DIV_1_5, host, SBCL_REG);
+- NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ hostdata->sync_clock = hostdata->clock*2;
+ hostdata->sync_clock /= 3;
+
+@@ -741,18 +733,18 @@
+ /* sync divider 1, async divider 2 */
+ DEBUG(("53c700: sync 1 async 2\n"));
+ NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+- NCR_700_writeb(ASYNC_DIV_2_0 | dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(ASYNC_DIV_2_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ hostdata->sync_clock = hostdata->clock;
+ } else if(hostdata->clock > 25 && hostdata->clock <=37) {
+ /* sync divider 1, async divider 1.5 */
+ DEBUG(("53c700: sync 1 async 1.5\n"));
+ NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+- NCR_700_writeb(ASYNC_DIV_1_5 | dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(ASYNC_DIV_1_5 | hostdata->dcntl_extra, host, DCNTL_REG);
+ hostdata->sync_clock = hostdata->clock;
+ } else {
+ DEBUG(("53c700: sync 1 async 1\n"));
+ NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+- NCR_700_writeb(ASYNC_DIV_1_0 | dcntl_extra, host, DCNTL_REG);
++ NCR_700_writeb(ASYNC_DIV_1_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ /* sync divider 1, async divider 1 */
+ hostdata->sync_clock = hostdata->clock;
+ }
+@@ -1263,12 +1255,11 @@
+ host->host_no, pun, lun, NCR_700_condition[i],
+ NCR_700_phase[j], dsp - hostdata->pScript);
+ if(SCp != NULL) {
+- scsi_print_command(SCp);
++ struct scatterlist *sg;
+
+- if(SCp->use_sg) {
+- for(i = 0; i < SCp->use_sg + 1; i++) {
+- printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, ((struct scatterlist *)SCp->request_buffer)[i].length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr);
+- }
++ scsi_print_command(SCp);
++ scsi_for_each_sg(SCp, sg, scsi_sg_count(SCp) + 1, i) {
++ printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, sg->length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr);
+ }
+ }
+ NCR_700_internal_bus_reset(host);
+@@ -1844,8 +1835,8 @@
+ }
+ /* sanity check: some of the commands generated by the mid-layer
+ * have an eccentric idea of their sc_data_direction */
+- if(!SCp->use_sg && !SCp->request_bufflen
+- && SCp->sc_data_direction != DMA_NONE) {
++ if(!scsi_sg_count(SCp) && !scsi_bufflen(SCp) &&
++ SCp->sc_data_direction != DMA_NONE) {
+ #ifdef NCR_700_DEBUG
+ printk("53c700: Command");
+ scsi_print_command(SCp);
+@@ -1887,31 +1878,15 @@
+ int i;
+ int sg_count;
+ dma_addr_t vPtr = 0;
++ struct scatterlist *sg;
+ __u32 count = 0;
+
+- if(SCp->use_sg) {
+- sg_count = dma_map_sg(hostdata->dev,
+- SCp->request_buffer, SCp->use_sg,
+- direction);
+- } else {
+- vPtr = dma_map_single(hostdata->dev,
+- SCp->request_buffer,
+- SCp->request_bufflen,
+- direction);
+- count = SCp->request_bufflen;
+- slot->dma_handle = vPtr;
+- sg_count = 1;
+- }
+-
+-
+- for(i = 0; i < sg_count; i++) {
++ sg_count = scsi_dma_map(SCp);
++ BUG_ON(sg_count < 0);
+
+- if(SCp->use_sg) {
+- struct scatterlist *sg = SCp->request_buffer;
+-
+- vPtr = sg_dma_address(&sg[i]);
+- count = sg_dma_len(&sg[i]);
+- }
++ scsi_for_each_sg(SCp, sg, sg_count, i) {
++ vPtr = sg_dma_address(sg);
++ count = sg_dma_len(sg);
+
+ slot->SG[i].ins = bS_to_host(move_ins | count);
+ DEBUG((" scatter block %d: move %d[%08x] from 0x%lx\n",
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.h linux-2.6.22-591/drivers/scsi/53c700.h
+--- linux-2.6.22-570/drivers/scsi/53c700.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c700.h 2007-12-21 15:36:12.000000000 -0500
+@@ -177,6 +177,7 @@
+ __u8 state;
+ #define NCR_700_FLAG_AUTOSENSE 0x01
+ __u8 flags;
++ __u8 pad1[2]; /* Needed for m68k where min alignment is 2 bytes */
+ int tag;
+ __u32 resume_offset;
+ struct scsi_cmnd *cmnd;
+@@ -196,6 +197,8 @@
+ void __iomem *base; /* the base for the port (copied to host) */
+ struct device *dev;
+ __u32 dmode_extra; /* adjustable bus settings */
++ __u32 dcntl_extra; /* adjustable bus settings */
++ __u32 ctest7_extra; /* adjustable bus settings */
+ __u32 differential:1; /* if we are differential */
+ #ifdef CONFIG_53C700_LE_ON_BE
+ /* This option is for HP only. Set it if your chip is wired for
+@@ -352,6 +355,7 @@
+ #define SEL_TIMEOUT_DISABLE 0x10 /* 710 only */
+ #define DFP 0x08
+ #define EVP 0x04
++#define CTEST7_TT1 0x02
+ #define DIFF 0x01
+ #define CTEST6_REG 0x1A
+ #define TEMP_REG 0x1C
+@@ -385,6 +389,7 @@
+ #define SOFTWARE_RESET 0x01
+ #define COMPAT_700_MODE 0x01
+ #define SCRPTS_16BITS 0x20
++#define EA_710 0x20
+ #define ASYNC_DIV_2_0 0x00
+ #define ASYNC_DIV_1_5 0x40
+ #define ASYNC_DIV_1_0 0x80
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.c linux-2.6.22-591/drivers/scsi/53c7xx.c
+--- linux-2.6.22-570/drivers/scsi/53c7xx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c7xx.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,6102 +0,0 @@
+-/*
+- * 53c710 driver. Modified from Drew Eckhardts driver
+- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+- * Check out PERM_OPTIONS and EXPECTED_CLOCK, which may be defined in the
+- * relevant machine specific file (eg. mvme16x.[ch], amiga7xx.[ch]).
+- * There are also currently some defines at the top of 53c7xx.scr.
+- * The chip type is #defined in script_asm.pl, as well as the Makefile.
+- * Host scsi ID expected to be 7 - see NCR53c7x0_init().
+- *
+- * I have removed the PCI code and some of the 53c8xx specific code -
+- * simply to make this file smaller and easier to manage.
+- *
+- * MVME16x issues:
+- * Problems trying to read any chip registers in NCR53c7x0_init(), as they
+- * may never have been set by 16xBug (eg. If kernel has come in over tftp).
+- */
+-
+-/*
+- * Adapted for Linux/m68k Amiga platforms for the A4000T/A4091 and
+- * WarpEngine SCSI controllers.
+- * By Alan Hourihane <alanh@fairlite.demon.co.uk>
+- * Thanks to Richard Hirst for making it possible with the MVME additions
+- */
+-
+-/*
+- * 53c710 rev 0 doesn't support add with carry. Rev 1 and 2 does. To
+- * overcome this problem you can define FORCE_DSA_ALIGNMENT, which ensures
+- * that the DSA address is always xxxxxx00. If disconnection is not allowed,
+- * then the script only ever tries to add small (< 256) positive offsets to
+- * DSA, so lack of carry isn't a problem. FORCE_DSA_ALIGNMENT can, of course,
+- * be defined for all chip revisions at a small cost in memory usage.
+- */
+-
+-#define FORCE_DSA_ALIGNMENT
+-
+-/*
+- * Selection timer does not always work on the 53c710, depending on the
+- * timing at the last disconnect, if this is a problem for you, try
+- * using validids as detailed below.
+- *
+- * Options for the NCR7xx driver
+- *
+- * noasync:0 - disables sync and asynchronous negotiation
+- * nosync:0 - disables synchronous negotiation (does async)
+- * nodisconnect:0 - disables disconnection
+- * validids:0x?? - Bitmask field that disallows certain ID's.
+- * - e.g. 0x03 allows ID 0,1
+- * - 0x1F allows ID 0,1,2,3,4
+- * opthi:n - replace top word of options with 'n'
+- * optlo:n - replace bottom word of options with 'n'
+- * - ALWAYS SPECIFY opthi THEN optlo <<<<<<<<<<
+- */
+-
+-/*
+- * PERM_OPTIONS are driver options which will be enabled for all NCR boards
+- * in the system at driver initialization time.
+- *
+- * Don't THINK about touching these in PERM_OPTIONS :
+- * OPTION_MEMORY_MAPPED
+- * 680x0 doesn't have an IO map!
+- *
+- * OPTION_DEBUG_TEST1
+- * Test 1 does bus mastering and interrupt tests, which will help weed
+- * out brain damaged main boards.
+- *
+- * Other PERM_OPTIONS settings are listed below. Note the actual options
+- * required are set in the relevant file (mvme16x.c, amiga7xx.c, etc):
+- *
+- * OPTION_NO_ASYNC
+- * Don't negotiate for asynchronous transfers on the first command
+- * when OPTION_ALWAYS_SYNCHRONOUS is set. Useful for dain bramaged
+- * devices which do something bad rather than sending a MESSAGE
+- * REJECT back to us like they should if they can't cope.
+- *
+- * OPTION_SYNCHRONOUS
+- * Enable support for synchronous transfers. Target negotiated
+- * synchronous transfers will be responded to. To initiate
+- * a synchronous transfer request, call
+- *
+- * request_synchronous (hostno, target)
+- *
+- * from within KGDB.
+- *
+- * OPTION_ALWAYS_SYNCHRONOUS
+- * Negotiate for synchronous transfers with every target after
+- * driver initialization or a SCSI bus reset. This is a bit dangerous,
+- * since there are some dain bramaged SCSI devices which will accept
+- * SDTR messages but keep talking asynchronously.
+- *
+- * OPTION_DISCONNECT
+- * Enable support for disconnect/reconnect. To change the
+- * default setting on a given host adapter, call
+- *
+- * request_disconnect (hostno, allow)
+- *
+- * where allow is non-zero to allow, 0 to disallow.
+- *
+- * If you really want to run 10MHz FAST SCSI-II transfers, you should
+- * know that the NCR driver currently ignores parity information. Most
+- * systems do 5MHz SCSI fine. I've seen a lot that have problems faster
+- * than 8MHz. To play it safe, we only request 5MHz transfers.
+- *
+- * If you'd rather get 10MHz transfers, edit sdtr_message and change
+- * the fourth byte from 50 to 25.
+- */
+-
+-/*
+- * Sponsored by
+- * iX Multiuser Multitasking Magazine
+- * Hannover, Germany
+- * hm@ix.de
+- *
+- * Copyright 1993, 1994, 1995 Drew Eckhardt
+- * Visionary Computing
+- * (Unix and Linux consulting and custom programming)
+- * drew@PoohSticks.ORG
+- * +1 (303) 786-7975
+- *
+- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+- *
+- * For more information, please consult
+- *
+- * NCR53C810
+- * SCSI I/O Processor
+- * Programmer's Guide
+- *
+- * NCR 53C810
+- * PCI-SCSI I/O Processor
+- * Data Manual
+- *
+- * NCR 53C810/53C820
+- * PCI-SCSI I/O Processor Design In Guide
+- *
+- * For literature on Symbios Logic Inc. formerly NCR, SCSI,
+- * and Communication products please call (800) 334-5454 or
+- * (719) 536-3300.
+- *
+- * PCI BIOS Specification Revision
+- * PCI Local Bus Specification
+- * PCI System Design Guide
+- *
+- * PCI Special Interest Group
+- * M/S HF3-15A
+- * 5200 N.E. Elam Young Parkway
+- * Hillsboro, Oregon 97124-6497
+- * +1 (503) 696-2000
+- * +1 (800) 433-5177
+- */
+-
+-/*
+- * Design issues :
+- * The cumulative latency needed to propagate a read/write request
+- * through the file system, buffer cache, driver stacks, SCSI host, and
+- * SCSI device is ultimately the limiting factor in throughput once we
+- * have a sufficiently fast host adapter.
+- *
+- * So, to maximize performance we want to keep the ratio of latency to data
+- * transfer time to a minimum by
+- * 1. Minimizing the total number of commands sent (typical command latency
+- * including drive and bus mastering host overhead is as high as 4.5ms)
+- * to transfer a given amount of data.
+- *
+- * This is accomplished by placing no arbitrary limit on the number
+- * of scatter/gather buffers supported, since we can transfer 1K
+- * per scatter/gather buffer without Eric's cluster patches,
+- * 4K with.
+- *
+- * 2. Minimizing the number of fatal interrupts serviced, since
+- * fatal interrupts halt the SCSI I/O processor. Basically,
+- * this means offloading the practical maximum amount of processing
+- * to the SCSI chip.
+- *
+- * On the NCR53c810/820/720, this is accomplished by using
+- * interrupt-on-the-fly signals when commands complete,
+- * and only handling fatal errors and SDTR / WDTR messages
+- * in the host code.
+- *
+- * On the NCR53c710, interrupts are generated as on the NCR53c8x0,
+- * only the lack of a interrupt-on-the-fly facility complicates
+- * things. Also, SCSI ID registers and commands are
+- * bit fielded rather than binary encoded.
+- *
+- * On the NCR53c700 and NCR53c700-66, operations that are done via
+- * indirect, table mode on the more advanced chips must be
+- * replaced by calls through a jump table which
+- * acts as a surrogate for the DSA. Unfortunately, this
+- * will mean that we must service an interrupt for each
+- * disconnect/reconnect.
+- *
+- * 3. Eliminating latency by pipelining operations at the different levels.
+- *
+- * This driver allows a configurable number of commands to be enqueued
+- * for each target/lun combination (experimentally, I have discovered
+- * that two seems to work best) and will ultimately allow for
+- * SCSI-II tagged queuing.
+- *
+- *
+- * Architecture :
+- * This driver is built around a Linux queue of commands waiting to
+- * be executed, and a shared Linux/NCR array of commands to start. Commands
+- * are transferred to the array by the run_process_issue_queue() function
+- * which is called whenever a command completes.
+- *
+- * As commands are completed, the interrupt routine is triggered,
+- * looks for commands in the linked list of completed commands with
+- * valid status, removes these commands from a list of running commands,
+- * calls the done routine, and flags their target/luns as not busy.
+- *
+- * Due to limitations in the intelligence of the NCR chips, certain
+- * concessions are made. In many cases, it is easier to dynamically
+- * generate/fix-up code rather than calculate on the NCR at run time.
+- * So, code is generated or fixed up for
+- *
+- * - Handling data transfers, using a variable number of MOVE instructions
+- * interspersed with CALL MSG_IN, WHEN MSGIN instructions.
+- *
+- * The DATAIN and DATAOUT routines are separate, so that an incorrect
+- * direction can be trapped, and space isn't wasted.
+- *
+- * It may turn out that we're better off using some sort
+- * of table indirect instruction in a loop with a variable
+- * sized table on the NCR53c710 and newer chips.
+- *
+- * - Checking for reselection (NCR53c710 and better)
+- *
+- * - Handling the details of SCSI context switches (NCR53c710 and better),
+- * such as reprogramming appropriate synchronous parameters,
+- * removing the dsa structure from the NCR's queue of outstanding
+- * commands, etc.
+- *
+- */
+-
+-#include <linux/module.h>
+-
+-
+-#include <linux/types.h>
+-#include <asm/setup.h>
+-#include <asm/dma.h>
+-#include <asm/io.h>
+-#include <asm/system.h>
+-#include <linux/delay.h>
+-#include <linux/signal.h>
+-#include <linux/sched.h>
+-#include <linux/errno.h>
+-#include <linux/string.h>
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+-#include <linux/mm.h>
+-#include <linux/ioport.h>
+-#include <linux/time.h>
+-#include <linux/blkdev.h>
+-#include <linux/spinlock.h>
+-#include <linux/interrupt.h>
+-#include <asm/pgtable.h>
+-
+-#ifdef CONFIG_AMIGA
+-#include <asm/amigahw.h>
+-#include <asm/amigaints.h>
+-#include <asm/irq.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#endif
+-
+-#ifdef CONFIG_MVME16x
+-#include <asm/mvme16xhw.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#define VALID_IDS
+-#endif
+-
+-#ifdef CONFIG_BVME6000
+-#include <asm/bvme6000hw.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#define VALID_IDS
+-#endif
+-
+-#include "scsi.h"
+-#include <scsi/scsi_dbg.h>
+-#include <scsi/scsi_host.h>
+-#include <scsi/scsi_transport_spi.h>
+-#include "53c7xx.h"
+-#include <linux/stat.h>
+-#include <linux/stddef.h>
+-
+-#ifdef NO_IO_SPACE
+-/*
+- * The following make the definitions in 53c7xx.h (write8, etc) smaller,
+- * we don't have separate i/o space anyway.
+- */
+-#undef inb
+-#undef outb
+-#undef inw
+-#undef outw
+-#undef inl
+-#undef outl
+-#define inb(x) 1
+-#define inw(x) 1
+-#define inl(x) 1
+-#define outb(x,y) 1
+-#define outw(x,y) 1
+-#define outl(x,y) 1
+-#endif
+-
+-static int check_address (unsigned long addr, int size);
+-static void dump_events (struct Scsi_Host *host, int count);
+-static Scsi_Cmnd * return_outstanding_commands (struct Scsi_Host *host,
+- int free, int issue);
+-static void hard_reset (struct Scsi_Host *host);
+-static void ncr_scsi_reset (struct Scsi_Host *host);
+-static void print_lots (struct Scsi_Host *host);
+-static void set_synchronous (struct Scsi_Host *host, int target, int sxfer,
+- int scntl3, int now_connected);
+-static int datapath_residual (struct Scsi_Host *host);
+-static const char * sbcl_to_phase (int sbcl);
+-static void print_progress (Scsi_Cmnd *cmd);
+-static void print_queues (struct Scsi_Host *host);
+-static void process_issue_queue (unsigned long flags);
+-static int shutdown (struct Scsi_Host *host);
+-static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int result);
+-static int disable (struct Scsi_Host *host);
+-static int NCR53c7xx_run_tests (struct Scsi_Host *host);
+-static irqreturn_t NCR53c7x0_intr(int irq, void *dev_id);
+-static void NCR53c7x0_intfly (struct Scsi_Host *host);
+-static int ncr_halt (struct Scsi_Host *host);
+-static void intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd
+- *cmd);
+-static void intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd);
+-static void print_dsa (struct Scsi_Host *host, u32 *dsa,
+- const char *prefix);
+-static int print_insn (struct Scsi_Host *host, const u32 *insn,
+- const char *prefix, int kernel);
+-
+-static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd);
+-static void NCR53c7x0_init_fixup (struct Scsi_Host *host);
+-static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct
+- NCR53c7x0_cmd *cmd);
+-static void NCR53c7x0_soft_reset (struct Scsi_Host *host);
+-
+-/* Size of event list (per host adapter) */
+-static int track_events = 0;
+-static struct Scsi_Host *first_host = NULL; /* Head of list of NCR boards */
+-static struct scsi_host_template *the_template = NULL;
+-
+-/* NCR53c710 script handling code */
+-
+-#include "53c7xx_d.h"
+-#ifdef A_int_debug_sync
+-#define DEBUG_SYNC_INTR A_int_debug_sync
+-#endif
+-int NCR53c7xx_script_len = sizeof (SCRIPT);
+-int NCR53c7xx_dsa_len = A_dsa_end + Ent_dsa_zero - Ent_dsa_code_template;
+-#ifdef FORCE_DSA_ALIGNMENT
+-int CmdPageStart = (0 - Ent_dsa_zero - sizeof(struct NCR53c7x0_cmd)) & 0xff;
+-#endif
+-
+-static char *setup_strings[] =
+- {"","","","","","","",""};
+-
+-#define MAX_SETUP_STRINGS ARRAY_SIZE(setup_strings)
+-#define SETUP_BUFFER_SIZE 200
+-static char setup_buffer[SETUP_BUFFER_SIZE];
+-static char setup_used[MAX_SETUP_STRINGS];
+-
+-void ncr53c7xx_setup (char *str, int *ints)
+-{
+- int i;
+- char *p1, *p2;
+-
+- p1 = setup_buffer;
+- *p1 = '\0';
+- if (str)
+- strncpy(p1, str, SETUP_BUFFER_SIZE - strlen(setup_buffer));
+- setup_buffer[SETUP_BUFFER_SIZE - 1] = '\0';
+- p1 = setup_buffer;
+- i = 0;
+- while (*p1 && (i < MAX_SETUP_STRINGS)) {
+- p2 = strchr(p1, ',');
+- if (p2) {
+- *p2 = '\0';
+- if (p1 != p2)
+- setup_strings[i] = p1;
+- p1 = p2 + 1;
+- i++;
+- }
+- else {
+- setup_strings[i] = p1;
+- break;
+- }
+- }
+- for (i=0; i<MAX_SETUP_STRINGS; i++)
+- setup_used[i] = 0;
+-}
+-
+-
+-/* check_setup_strings() returns index if key found, 0 if not
+- */
+-
+-static int check_setup_strings(char *key, int *flags, int *val, char *buf)
+-{
+-int x;
+-char *cp;
+-
+- for (x=0; x<MAX_SETUP_STRINGS; x++) {
+- if (setup_used[x])
+- continue;
+- if (!strncmp(setup_strings[x], key, strlen(key)))
+- break;
+- if (!strncmp(setup_strings[x], "next", strlen("next")))
+- return 0;
+- }
+- if (x == MAX_SETUP_STRINGS)
+- return 0;
+- setup_used[x] = 1;
+- cp = setup_strings[x] + strlen(key);
+- *val = -1;
+- if (*cp != ':')
+- return ++x;
+- cp++;
+- if ((*cp >= '0') && (*cp <= '9')) {
+- *val = simple_strtoul(cp,NULL,0);
+- }
+- return ++x;
+-}
+-
+-
+-
+-/*
+- * KNOWN BUGS :
+- * - There is some sort of conflict when the PPP driver is compiled with
+- * support for 16 channels?
+- *
+- * - On systems which predate the 1.3.x initialization order change,
+- * the NCR driver will cause Cannot get free page messages to appear.
+- * These are harmless, but I don't know of an easy way to avoid them.
+- *
+- * - With OPTION_DISCONNECT, on two systems under unknown circumstances,
+- * we get a PHASE MISMATCH with DSA set to zero (suggests that we
+- * are occurring somewhere in the reselection code) where
+- * DSP=some value DCMD|DBC=same value.
+- *
+- * Closer inspection suggests that we may be trying to execute
+- * some portion of the DSA?
+- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO)
+- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO)
+- * scsi0 : no current command : unexpected phase MSGIN.
+- * DSP=0x1c46cc, DCMD|DBC=0x1c46ac, DSA=0x0
+- * DSPS=0x0, TEMP=0x1c3e70, DMODE=0x80
+- * scsi0 : DSP->
+- * 001c46cc : 0x001c46cc 0x00000000
+- * 001c46d4 : 0x001c5ea0 0x000011f8
+- *
+- * Changed the print code in the phase_mismatch handler so
+- * that we call print_lots to try to diagnose this.
+- *
+- */
+-
+-/*
+- * Possible future direction of architecture for max performance :
+- *
+- * We're using a single start array for the NCR chip. This is
+- * sub-optimal, because we cannot add a command which would conflict with
+- * an executing command to this start queue, and therefore must insert the
+- * next command for a given I/T/L combination after the first has completed;
+- * incurring our interrupt latency between SCSI commands.
+- *
+- * To allow further pipelining of the NCR and host CPU operation, we want
+- * to set things up so that immediately on termination of a command destined
+- * for a given LUN, we get that LUN busy again.
+- *
+- * To do this, we need to add a 32 bit pointer to which is jumped to
+- * on completion of a command. If no new command is available, this
+- * would point to the usual DSA issue queue select routine.
+- *
+- * If one were, it would point to a per-NCR53c7x0_cmd select routine
+- * which starts execution immediately, inserting the command at the head
+- * of the start queue if the NCR chip is selected or reselected.
+- *
+- * We would change so that we keep a list of outstanding commands
+- * for each unit, rather than a single running_list. We'd insert
+- * a new command into the right running list; if the NCR didn't
+- * have something running for that yet, we'd put it in the
+- * start queue as well. Some magic needs to happen to handle the
+- * race condition between the first command terminating before the
+- * new one is written.
+- *
+- * Potential for profiling :
+- * Call do_gettimeofday(struct timeval *tv) to get 800ns resolution.
+- */
+-
+-
+-/*
+- * TODO :
+- * 1. To support WIDE transfers, not much needs to happen. We
+- * should do CHMOVE instructions instead of MOVEs when
+- * we have scatter/gather segments of uneven length. When
+- * we do this, we need to handle the case where we disconnect
+- * between segments.
+- *
+- * 2. Currently, when Icky things happen we do a FATAL(). Instead,
+- * we want to do an integrity check on the parts of the NCR hostdata
+- * structure which were initialized at boot time; FATAL() if that
+- * fails, and otherwise try to recover. Keep track of how many
+- * times this has happened within a single SCSI command; if it
+- * gets excessive, then FATAL().
+- *
+- * 3. Parity checking is currently disabled, and a few things should
+- * happen here now that we support synchronous SCSI transfers :
+- * 1. On soft-reset, we shoould set the EPC (Enable Parity Checking)
+- * and AAP (Assert SATN/ on parity error) bits in SCNTL0.
+- *
+- * 2. We should enable the parity interrupt in the SIEN0 register.
+- *
+- * 3. intr_phase_mismatch() needs to believe that message out is
+- * always an "acceptable" phase to have a mismatch in. If
+- * the old phase was MSG_IN, we should send a MESSAGE PARITY
+- * error. If the old phase was something else, we should send
+- * a INITIATOR_DETECTED_ERROR message. Note that this could
+- * cause a RESTORE POINTERS message; so we should handle that
+- * correctly first. Instead, we should probably do an
+- * initiator_abort.
+- *
+- * 4. MPEE bit of CTEST4 should be set so we get interrupted if
+- * we detect an error.
+- *
+- *
+- * 5. The initial code has been tested on the NCR53c810. I don't
+- * have access to NCR53c700, 700-66 (Forex boards), NCR53c710
+- * (NCR Pentium systems), NCR53c720, NCR53c820, or NCR53c825 boards to
+- * finish development on those platforms.
+- *
+- * NCR53c820/825/720 - need to add wide transfer support, including WDTR
+- * negotiation, programming of wide transfer capabilities
+- * on reselection and table indirect selection.
+- *
+- * NCR53c710 - need to add fatal interrupt or GEN code for
+- * command completion signaling. Need to modify all
+- * SDID, SCID, etc. registers, and table indirect select code
+- * since these use bit fielded (ie 1<<target) instead of
+- * binary encoded target ids. Need to accommodate
+- * different register mappings, probably scan through
+- * the SCRIPT code and change the non SFBR register operand
+- * of all MOVE instructions.
+- *
+- * It is rather worse than this actually, the 710 corrupts
+- * both TEMP and DSA when you do a MOVE MEMORY. This
+- * screws you up all over the place. MOVE MEMORY 4 with a
+- * destination of DSA seems to work OK, which helps some.
+- * Richard Hirst richard@sleepie.demon.co.uk
+- *
+- * NCR53c700/700-66 - need to add code to refix addresses on
+- * every nexus change, eliminate all table indirect code,
+- * very messy.
+- *
+- * 6. The NCR53c7x0 series is very popular on other platforms that
+- * could be running Linux - ie, some high performance AMIGA SCSI
+- * boards use it.
+- *
+- * So, I should include #ifdef'd code so that it is
+- * compatible with these systems.
+- *
+- * Specifically, the little Endian assumptions I made in my
+- * bit fields need to change, and if the NCR doesn't see memory
+- * the right way, we need to provide options to reverse words
+- * when the scripts are relocated.
+- *
+- * 7. Use vremap() to access memory mapped boards.
+- */
+-
+-/*
+- * Allow for simultaneous existence of multiple SCSI scripts so we
+- * can have a single driver binary for all of the family.
+- *
+- * - one for NCR53c700 and NCR53c700-66 chips (not yet supported)
+- * - one for rest (only the NCR53c810, 815, 820, and 825 are currently
+- * supported)
+- *
+- * So that we only need two SCSI scripts, we need to modify things so
+- * that we fixup register accesses in READ/WRITE instructions, and
+- * we'll also have to accommodate the bit vs. binary encoding of IDs
+- * with the 7xx chips.
+- */
+-
+-#define ROUNDUP(adr,type) \
+- ((void *) (((long) (adr) + sizeof(type) - 1) & ~(sizeof(type) - 1)))
+-
+-
+-/*
+- * Function: issue_to_cmd
+- *
+- * Purpose: convert jump instruction in issue array to NCR53c7x0_cmd
+- * structure pointer.
+- *
+- * Inputs; issue - pointer to start of NOP or JUMP instruction
+- * in issue array.
+- *
+- * Returns: pointer to command on success; 0 if opcode is NOP.
+- */
+-
+-static inline struct NCR53c7x0_cmd *
+-issue_to_cmd (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata,
+- u32 *issue)
+-{
+- return (issue[0] != hostdata->NOP_insn) ?
+- /*
+- * If the IF TRUE bit is set, it's a JUMP instruction. The
+- * operand is a bus pointer to the dsa_begin routine for this DSA. The
+- * dsa field of the NCR53c7x0_cmd structure starts with the
+- * DSA code template. By converting to a virtual address,
+- * subtracting the code template size, and offset of the
+- * dsa field, we end up with a pointer to the start of the
+- * structure (alternatively, we could use the
+- * dsa_cmnd field, an anachronism from when we weren't
+- * sure what the relationship between the NCR structures
+- * and host structures were going to be.
+- */
+- (struct NCR53c7x0_cmd *) ((char *) bus_to_virt (issue[1]) -
+- (hostdata->E_dsa_code_begin - hostdata->E_dsa_code_template) -
+- offsetof(struct NCR53c7x0_cmd, dsa))
+- /* If the IF TRUE bit is not set, it's a NOP */
+- : NULL;
+-}
+-
+-
+-/*
+- * FIXME: we should junk these, in favor of synchronous_want and
+- * wide_want in the NCR53c7x0_hostdata structure.
+- */
+-
+-/* Template for "preferred" synchronous transfer parameters. */
+-
+-static const unsigned char sdtr_message[] = {
+-#ifdef CONFIG_SCSI_NCR53C7xx_FAST
+- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 25 /* *4ns */, 8 /* off */
+-#else
+- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 50 /* *4ns */, 8 /* off */
+-#endif
+-};
+-
+-/* Template to request asynchronous transfers */
+-
+-static const unsigned char async_message[] = {
+- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 0, 0 /* asynchronous */
+-};
+-
+-/* Template for "preferred" WIDE transfer parameters */
+-
+-static const unsigned char wdtr_message[] = {
+- EXTENDED_MESSAGE, 2 /* length */, EXTENDED_WDTR, 1 /* 2^1 bytes */
+-};
+-
+-#if 0
+-/*
+- * Function : struct Scsi_Host *find_host (int host)
+- *
+- * Purpose : KGDB support function which translates a host number
+- * to a host structure.
+- *
+- * Inputs : host - number of SCSI host
+- *
+- * Returns : NULL on failure, pointer to host structure on success.
+- */
+-
+-static struct Scsi_Host *
+-find_host (int host) {
+- struct Scsi_Host *h;
+- for (h = first_host; h && h->host_no != host; h = h->next);
+- if (!h) {
+- printk (KERN_ALERT "scsi%d not found\n", host);
+- return NULL;
+- } else if (h->hostt != the_template) {
+- printk (KERN_ALERT "scsi%d is not a NCR board\n", host);
+- return NULL;
+- }
+- return h;
+-}
+-
+-#if 0
+-/*
+- * Function : request_synchronous (int host, int target)
+- *
+- * Purpose : KGDB interface which will allow us to negotiate for
+- * synchronous transfers. This ill be replaced with a more
+- * integrated function; perhaps a new entry in the scsi_host
+- * structure, accessible via an ioctl() or perhaps /proc/scsi.
+- *
+- * Inputs : host - number of SCSI host; target - number of target.
+- *
+- * Returns : 0 when negotiation has been setup for next SCSI command,
+- * -1 on failure.
+- */
+-
+-static int
+-request_synchronous (int host, int target) {
+- struct Scsi_Host *h;
+- struct NCR53c7x0_hostdata *hostdata;
+- unsigned long flags;
+- if (target < 0) {
+- printk (KERN_ALERT "target %d is bogus\n", target);
+- return -1;
+- }
+- if (!(h = find_host (host)))
+- return -1;
+- else if (h->this_id == target) {
+- printk (KERN_ALERT "target %d is host ID\n", target);
+- return -1;
+- }
+- else if (target >= h->max_id) {
+- printk (KERN_ALERT "target %d exceeds maximum of %d\n", target,
+- h->max_id);
+- return -1;
+- }
+- hostdata = (struct NCR53c7x0_hostdata *)h->hostdata[0];
+-
+- local_irq_save(flags);
+- if (hostdata->initiate_sdtr & (1 << target)) {
+- local_irq_restore(flags);
+- printk (KERN_ALERT "target %d already doing SDTR\n", target);
+- return -1;
+- }
+- hostdata->initiate_sdtr |= (1 << target);
+- local_irq_restore(flags);
+- return 0;
+-}
+-#endif
+-
+-/*
+- * Function : request_disconnect (int host, int on_or_off)
+- *
+- * Purpose : KGDB support function, tells us to allow or disallow
+- * disconnections.
+- *
+- * Inputs : host - number of SCSI host; on_or_off - non-zero to allow,
+- * zero to disallow.
+- *
+- * Returns : 0 on success, * -1 on failure.
+- */
+-
+-static int
+-request_disconnect (int host, int on_or_off) {
+- struct Scsi_Host *h;
+- struct NCR53c7x0_hostdata *hostdata;
+- if (!(h = find_host (host)))
+- return -1;
+- hostdata = (struct NCR53c7x0_hostdata *) h->hostdata[0];
+- if (on_or_off)
+- hostdata->options |= OPTION_DISCONNECT;
+- else
+- hostdata->options &= ~OPTION_DISCONNECT;
+- return 0;
+-}
+-#endif
+-
+-/*
+- * Function : static void NCR53c7x0_driver_init (struct Scsi_Host *host)
+- *
+- * Purpose : Initialize internal structures, as required on startup, or
+- * after a SCSI bus reset.
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- */
+-
+-static void
+-NCR53c7x0_driver_init (struct Scsi_Host *host) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int i, j;
+- u32 *ncrcurrent;
+-
+- for (i = 0; i < 16; ++i) {
+- hostdata->request_sense[i] = 0;
+- for (j = 0; j < 8; ++j)
+- hostdata->busy[i][j] = 0;
+- set_synchronous (host, i, /* sxfer */ 0, hostdata->saved_scntl3, 0);
+- }
+- hostdata->issue_queue = NULL;
+- hostdata->running_list = hostdata->finished_queue =
+- hostdata->ncrcurrent = NULL;
+- for (i = 0, ncrcurrent = (u32 *) hostdata->schedule;
+- i < host->can_queue; ++i, ncrcurrent += 2) {
+- ncrcurrent[0] = hostdata->NOP_insn;
+- ncrcurrent[1] = 0xdeadbeef;
+- }
+- ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | DBC_TCI_TRUE;
+- ncrcurrent[1] = (u32) virt_to_bus (hostdata->script) +
+- hostdata->E_wait_reselect;
+- hostdata->reconnect_dsa_head = 0;
+- hostdata->addr_reconnect_dsa_head = (u32)
+- virt_to_bus((void *) &(hostdata->reconnect_dsa_head));
+- hostdata->expecting_iid = 0;
+- hostdata->expecting_sto = 0;
+- if (hostdata->options & OPTION_ALWAYS_SYNCHRONOUS)
+- hostdata->initiate_sdtr = 0xffff;
+- else
+- hostdata->initiate_sdtr = 0;
+- hostdata->talked_to = 0;
+- hostdata->idle = 1;
+-}
+-
+-/*
+- * Function : static int clock_to_ccf_710 (int clock)
+- *
+- * Purpose : Return the clock conversion factor for a given SCSI clock.
+- *
+- * Inputs : clock - SCSI clock expressed in Hz.
+- *
+- * Returns : ccf on success, -1 on failure.
+- */
+-
+-static int
+-clock_to_ccf_710 (int clock) {
+- if (clock <= 16666666)
+- return -1;
+- if (clock <= 25000000)
+- return 2; /* Divide by 1.0 */
+- else if (clock <= 37500000)
+- return 1; /* Divide by 1.5 */
+- else if (clock <= 50000000)
+- return 0; /* Divide by 2.0 */
+- else if (clock <= 66000000)
+- return 3; /* Divide by 3.0 */
+- else
+- return -1;
+-}
+-
+-/*
+- * Function : static int NCR53c7x0_init (struct Scsi_Host *host)
+- *
+- * Purpose : initialize the internal structures for a given SCSI host
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- * Preconditions : when this function is called, the chip_type
+- * field of the hostdata structure MUST have been set.
+- *
+- * Returns : 0 on success, -1 on failure.
+- */
+-
+-int
+-NCR53c7x0_init (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- int i, ccf;
+- unsigned char revision;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- /*
+- * There are some things which we need to know about in order to provide
+- * a semblance of support. Print 'em if they aren't what we expect,
+- * otherwise don't add to the noise.
+- *
+- * -1 means we don't know what to expect.
+- */
+- int val, flags;
+- char buf[32];
+- int expected_id = -1;
+- int expected_clock = -1;
+- int uninitialized = 0;
+-#ifdef NO_IO_SPACE
+- int expected_mapping = OPTION_MEMORY_MAPPED;
+-#else
+- int expected_mapping = OPTION_IO_MAPPED;
+-#endif
+- for (i=0;i<7;i++)
+- hostdata->valid_ids[i] = 1; /* Default all ID's to scan */
+-
+- /* Parse commandline flags */
+- if (check_setup_strings("noasync",&flags,&val,buf))
+- {
+- hostdata->options |= OPTION_NO_ASYNC;
+- hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS);
+- }
+-
+- if (check_setup_strings("nosync",&flags,&val,buf))
+- {
+- hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS);
+- }
+-
+- if (check_setup_strings("nodisconnect",&flags,&val,buf))
+- hostdata->options &= ~OPTION_DISCONNECT;
+-
+- if (check_setup_strings("validids",&flags,&val,buf))
+- {
+- for (i=0;i<7;i++)
+- hostdata->valid_ids[i] = val & (1<<i);
+- }
+-
+- if ((i = check_setup_strings("next",&flags,&val,buf)))
+- {
+- while (i)
+- setup_used[--i] = 1;
+- }
+-
+- if (check_setup_strings("opthi",&flags,&val,buf))
+- hostdata->options = (long long)val << 32;
+- if (check_setup_strings("optlo",&flags,&val,buf))
+- hostdata->options |= val;
+-
+- NCR53c7x0_local_setup(host);
+- switch (hostdata->chip) {
+- case 710:
+- case 770:
+- hostdata->dstat_sir_intr = NCR53c7x0_dstat_sir_intr;
+- hostdata->init_save_regs = NULL;
+- hostdata->dsa_fixup = NCR53c7xx_dsa_fixup;
+- hostdata->init_fixup = NCR53c7x0_init_fixup;
+- hostdata->soft_reset = NCR53c7x0_soft_reset;
+- hostdata->run_tests = NCR53c7xx_run_tests;
+- expected_clock = hostdata->scsi_clock;
+- expected_id = 7;
+- break;
+- default:
+- printk ("scsi%d : chip type of %d is not supported yet, detaching.\n",
+- host->host_no, hostdata->chip);
+- scsi_unregister (host);
+- return -1;
+- }
+-
+- /* Assign constants accessed by NCR */
+- hostdata->NCR53c7xx_zero = 0;
+- hostdata->NCR53c7xx_msg_reject = MESSAGE_REJECT;
+- hostdata->NCR53c7xx_msg_abort = ABORT;
+- hostdata->NCR53c7xx_msg_nop = NOP;
+- hostdata->NOP_insn = (DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24;
+- if (expected_mapping == -1 ||
+- (hostdata->options & (OPTION_MEMORY_MAPPED)) !=
+- (expected_mapping & OPTION_MEMORY_MAPPED))
+- printk ("scsi%d : using %s mapped access\n", host->host_no,
+- (hostdata->options & OPTION_MEMORY_MAPPED) ? "memory" :
+- "io");
+-
+- hostdata->dmode = (hostdata->chip == 700 || hostdata->chip == 70066) ?
+- DMODE_REG_00 : DMODE_REG_10;
+- hostdata->istat = ((hostdata->chip / 100) == 8) ?
+- ISTAT_REG_800 : ISTAT_REG_700;
+-
+-/* We have to assume that this may be the first access to the chip, so
+- * we must set EA in DCNTL. */
+-
+- NCR53c7x0_write8 (DCNTL_REG, DCNTL_10_EA|DCNTL_10_COM);
+-
+-
+-/* Only the ISTAT register is readable when the NCR is running, so make
+- sure it's halted. */
+- ncr_halt(host);
+-
+-/*
+- * XXX - the NCR53c700 uses bitfielded registers for SCID, SDID, etc,
+- * as does the 710 with one bit per SCSI ID. Conversely, the NCR
+- * uses a normal, 3 bit binary representation of these values.
+- *
+- * Get the rest of the NCR documentation, and FIND OUT where the change
+- * was.
+- */
+-
+-#if 0
+- /* May not be able to do this - chip my not have been set up yet */
+- tmp = hostdata->this_id_mask = NCR53c7x0_read8(SCID_REG);
+- for (host->this_id = 0; tmp != 1; tmp >>=1, ++host->this_id);
+-#else
+- host->this_id = 7;
+-#endif
+-
+-/*
+- * Note : we should never encounter a board setup for ID0. So,
+- * if we see ID0, assume that it was uninitialized and set it
+- * to the industry standard 7.
+- */
+- if (!host->this_id) {
+- printk("scsi%d : initiator ID was %d, changing to 7\n",
+- host->host_no, host->this_id);
+- host->this_id = 7;
+- hostdata->this_id_mask = 1 << 7;
+- uninitialized = 1;
+- };
+-
+- if (expected_id == -1 || host->this_id != expected_id)
+- printk("scsi%d : using initiator ID %d\n", host->host_no,
+- host->this_id);
+-
+- /*
+- * Save important registers to allow a soft reset.
+- */
+-
+- /*
+- * CTEST7 controls cache snooping, burst mode, and support for
+- * external differential drivers. This isn't currently used - the
+- * default value may not be optimal anyway.
+- * Even worse, it may never have been set up since reset.
+- */
+- hostdata->saved_ctest7 = NCR53c7x0_read8(CTEST7_REG) & CTEST7_SAVE;
+- revision = (NCR53c7x0_read8(CTEST8_REG) & 0xF0) >> 4;
+- switch (revision) {
+- case 1: revision = 0; break;
+- case 2: revision = 1; break;
+- case 4: revision = 2; break;
+- case 8: revision = 3; break;
+- default: revision = 255; break;
+- }
+- printk("scsi%d: Revision 0x%x\n",host->host_no,revision);
+-
+- if ((revision == 0 || revision == 255) && (hostdata->options & (OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS)))
+- {
+- printk ("scsi%d: Disabling sync working and disconnect/reselect\n",
+- host->host_no);
+- hostdata->options &= ~(OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS);
+- }
+-
+- /*
+- * On NCR53c700 series chips, DCNTL controls the SCSI clock divisor,
+- * on 800 series chips, it allows for a totem-pole IRQ driver.
+- * NOTE saved_dcntl currently overwritten in init function.
+- * The value read here may be garbage anyway, MVME16x board at least
+- * does not initialise chip if kernel arrived via tftp.
+- */
+-
+- hostdata->saved_dcntl = NCR53c7x0_read8(DCNTL_REG);
+-
+- /*
+- * DMODE controls DMA burst length, and on 700 series chips,
+- * 286 mode and bus width
+- * NOTE: On MVME16x, chip may have been reset, so this could be a
+- * power-on/reset default value.
+- */
+- hostdata->saved_dmode = NCR53c7x0_read8(hostdata->dmode);
+-
+- /*
+- * Now that burst length and enabled/disabled status is known,
+- * clue the user in on it.
+- */
+-
+- ccf = clock_to_ccf_710 (expected_clock);
+-
+- for (i = 0; i < 16; ++i)
+- hostdata->cmd_allocated[i] = 0;
+-
+- if (hostdata->init_save_regs)
+- hostdata->init_save_regs (host);
+- if (hostdata->init_fixup)
+- hostdata->init_fixup (host);
+-
+- if (!the_template) {
+- the_template = host->hostt;
+- first_host = host;
+- }
+-
+- /*
+- * Linux SCSI drivers have always been plagued with initialization
+- * problems - some didn't work with the BIOS disabled since they expected
+- * initialization from it, some didn't work when the networking code
+- * was enabled and registers got scrambled, etc.
+- *
+- * To avoid problems like this, in the future, we will do a soft
+- * reset on the SCSI chip, taking it back to a sane state.
+- */
+-
+- hostdata->soft_reset (host);
+-
+-#if 1
+- hostdata->debug_count_limit = -1;
+-#else
+- hostdata->debug_count_limit = 1;
+-#endif
+- hostdata->intrs = -1;
+- hostdata->resets = -1;
+- memcpy ((void *) hostdata->synchronous_want, (void *) sdtr_message,
+- sizeof (hostdata->synchronous_want));
+-
+- NCR53c7x0_driver_init (host);
+-
+- if (request_irq(host->irq, NCR53c7x0_intr, IRQF_SHARED, "53c7xx", host))
+- {
+- printk("scsi%d : IRQ%d not free, detaching\n",
+- host->host_no, host->irq);
+- goto err_unregister;
+- }
+-
+- if ((hostdata->run_tests && hostdata->run_tests(host) == -1) ||
+- (hostdata->options & OPTION_DEBUG_TESTS_ONLY)) {
+- /* XXX Should disable interrupts, etc. here */
+- goto err_free_irq;
+- } else {
+- if (host->io_port) {
+- host->n_io_port = 128;
+- if (!request_region (host->io_port, host->n_io_port, "ncr53c7xx"))
+- goto err_free_irq;
+- }
+- }
+-
+- if (NCR53c7x0_read8 (SBCL_REG) & SBCL_BSY) {
+- printk ("scsi%d : bus wedge, doing SCSI reset\n", host->host_no);
+- hard_reset (host);
+- }
+- return 0;
+-
+- err_free_irq:
+- free_irq(host->irq, NCR53c7x0_intr);
+- err_unregister:
+- scsi_unregister(host);
+- return -1;
+-}
+-
+-/*
+- * Function : int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip,
+- * unsigned long base, int io_port, int irq, int dma, long long options,
+- * int clock);
+- *
+- * Purpose : initializes a NCR53c7,8x0 based on base addresses,
+- * IRQ, and DMA channel.
+- *
+- * Inputs : tpnt - Template for this SCSI adapter, board - board level
+- * product, chip - 710
+- *
+- * Returns : 0 on success, -1 on failure.
+- *
+- */
+-
+-int
+-ncr53c7xx_init (struct scsi_host_template *tpnt, int board, int chip,
+- unsigned long base, int io_port, int irq, int dma,
+- long long options, int clock)
+-{
+- struct Scsi_Host *instance;
+- struct NCR53c7x0_hostdata *hostdata;
+- char chip_str[80];
+- int script_len = 0, dsa_len = 0, size = 0, max_cmd_size = 0,
+- schedule_size = 0, ok = 0;
+- void *tmp;
+- unsigned long page;
+-
+- switch (chip) {
+- case 710:
+- case 770:
+- schedule_size = (tpnt->can_queue + 1) * 8 /* JUMP instruction size */;
+- script_len = NCR53c7xx_script_len;
+- dsa_len = NCR53c7xx_dsa_len;
+- options |= OPTION_INTFLY;
+- sprintf (chip_str, "NCR53c%d", chip);
+- break;
+- default:
+- printk("scsi-ncr53c7xx : unsupported SCSI chip %d\n", chip);
+- return -1;
+- }
+-
+- printk("scsi-ncr53c7xx : %s at memory 0x%lx, io 0x%x, irq %d",
+- chip_str, base, io_port, irq);
+- if (dma == DMA_NONE)
+- printk("\n");
+- else
+- printk(", dma %d\n", dma);
+-
+- if (options & OPTION_DEBUG_PROBE_ONLY) {
+- printk ("scsi-ncr53c7xx : probe only enabled, aborting initialization\n");
+- return -1;
+- }
+-
+- max_cmd_size = sizeof(struct NCR53c7x0_cmd) + dsa_len +
+- /* Size of dynamic part of command structure : */
+- 2 * /* Worst case : we don't know if we need DATA IN or DATA out */
+- ( 2 * /* Current instructions per scatter/gather segment */
+- tpnt->sg_tablesize +
+- 3 /* Current startup / termination required per phase */
+- ) *
+- 8 /* Each instruction is eight bytes */;
+-
+- /* Allocate fixed part of hostdata, dynamic part to hold appropriate
+- SCSI SCRIPT(tm) plus a single, maximum-sized NCR53c7x0_cmd structure.
+-
+- We need a NCR53c7x0_cmd structure for scan_scsis() when we are
+- not loaded as a module, and when we're loaded as a module, we
+- can't use a non-dynamically allocated structure because modules
+- are vmalloc()'d, which can allow structures to cross page
+- boundaries and breaks our physical/virtual address assumptions
+- for DMA.
+-
+- So, we stick it past the end of our hostdata structure.
+-
+- ASSUMPTION :
+- Regardless of how many simultaneous SCSI commands we allow,
+- the probe code only executes a _single_ instruction at a time,
+- so we only need one here, and don't need to allocate NCR53c7x0_cmd
+- structures for each target until we are no longer in scan_scsis
+- and kmalloc() has become functional (memory_init() happens
+- after all device driver initialization).
+- */
+-
+- size = sizeof(struct NCR53c7x0_hostdata) + script_len +
+- /* Note that alignment will be guaranteed, since we put the command
+- allocated at probe time after the fixed-up SCSI script, which
+- consists of 32 bit words, aligned on a 32 bit boundary. But
+- on a 64bit machine we need 8 byte alignment for hostdata->free, so
+- we add in another 4 bytes to take care of potential misalignment
+- */
+- (sizeof(void *) - sizeof(u32)) + max_cmd_size + schedule_size;
+-
+- page = __get_free_pages(GFP_ATOMIC,1);
+- if(page==0)
+- {
+- printk(KERN_ERR "53c7xx: out of memory.\n");
+- return -ENOMEM;
+- }
+-#ifdef FORCE_DSA_ALIGNMENT
+- /*
+- * 53c710 rev.0 doesn't have an add-with-carry instruction.
+- * Ensure we allocate enough memory to force DSA alignment.
+- */
+- size += 256;
+-#endif
+- /* Size should be < 8K, so we can fit it in two pages. */
+- if (size > 8192) {
+- printk(KERN_ERR "53c7xx: hostdata > 8K\n");
+- return -1;
+- }
+-
+- instance = scsi_register (tpnt, 4);
+- if (!instance)
+- {
+- free_page(page);
+- return -1;
+- }
+- instance->hostdata[0] = page;
+- memset((void *)instance->hostdata[0], 0, 8192);
+- cache_push(virt_to_phys((void *)(instance->hostdata[0])), 8192);
+- cache_clear(virt_to_phys((void *)(instance->hostdata[0])), 8192);
+- kernel_set_cachemode((void *)instance->hostdata[0], 8192, IOMAP_NOCACHE_SER);
+-
+- /* FIXME : if we ever support an ISA NCR53c7xx based board, we
+- need to check if the chip is running in a 16 bit mode, and if so
+- unregister it if it is past the 16M (0x1000000) mark */
+-
+- hostdata = (struct NCR53c7x0_hostdata *)instance->hostdata[0];
+- hostdata->size = size;
+- hostdata->script_count = script_len / sizeof(u32);
+- hostdata->board = board;
+- hostdata->chip = chip;
+-
+- /*
+- * Being memory mapped is more desirable, since
+- *
+- * - Memory accesses may be faster.
+- *
+- * - The destination and source address spaces are the same for
+- * all instructions, meaning we don't have to twiddle dmode or
+- * any other registers.
+- *
+- * So, we try for memory mapped, and if we don't get it,
+- * we go for port mapped, and that failing we tell the user
+- * it can't work.
+- */
+-
+- if (base) {
+- instance->base = base;
+- /* Check for forced I/O mapping */
+- if (!(options & OPTION_IO_MAPPED)) {
+- options |= OPTION_MEMORY_MAPPED;
+- ok = 1;
+- }
+- } else {
+- options &= ~OPTION_MEMORY_MAPPED;
+- }
+-
+- if (io_port) {
+- instance->io_port = io_port;
+- options |= OPTION_IO_MAPPED;
+- ok = 1;
+- } else {
+- options &= ~OPTION_IO_MAPPED;
+- }
+-
+- if (!ok) {
+- printk ("scsi%d : not initializing, no I/O or memory mapping known \n",
+- instance->host_no);
+- scsi_unregister (instance);
+- return -1;
+- }
+- instance->irq = irq;
+- instance->dma_channel = dma;
+-
+- hostdata->options = options;
+- hostdata->dsa_len = dsa_len;
+- hostdata->max_cmd_size = max_cmd_size;
+- hostdata->num_cmds = 1;
+- hostdata->scsi_clock = clock;
+- /* Initialize single command */
+- tmp = (hostdata->script + hostdata->script_count);
+-#ifdef FORCE_DSA_ALIGNMENT
+- {
+- void *t = ROUNDUP(tmp, void *);
+- if (((u32)t & 0xff) > CmdPageStart)
+- t = (void *)((u32)t + 255);
+- t = (void *)(((u32)t & ~0xff) + CmdPageStart);
+- hostdata->free = t;
+-#if 0
+- printk ("scsi: Registered size increased by 256 to %d\n", size);
+- printk ("scsi: CmdPageStart = 0x%02x\n", CmdPageStart);
+- printk ("scsi: tmp = 0x%08x, hostdata->free set to 0x%08x\n",
+- (u32)tmp, (u32)t);
+-#endif
+- }
+-#else
+- hostdata->free = ROUNDUP(tmp, void *);
+-#endif
+- hostdata->free->real = tmp;
+- hostdata->free->size = max_cmd_size;
+- hostdata->free->free = NULL;
+- hostdata->free->next = NULL;
+- hostdata->extra_allocate = 0;
+-
+- /* Allocate command start code space */
+- hostdata->schedule = (chip == 700 || chip == 70066) ?
+- NULL : (u32 *) ((char *)hostdata->free + max_cmd_size);
+-
+-/*
+- * For diagnostic purposes, we don't really care how fast things blaze.
+- * For profiling, we want to access the 800ns resolution system clock,
+- * using a 'C' call on the host processor.
+- *
+- * Therefore, there's no need for the NCR chip to directly manipulate
+- * this data, and we should put it wherever is most convenient for
+- * Linux.
+- */
+- if (track_events)
+- hostdata->events = (struct NCR53c7x0_event *) (track_events ?
+- vmalloc (sizeof (struct NCR53c7x0_event) * track_events) : NULL);
+- else
+- hostdata->events = NULL;
+-
+- if (hostdata->events) {
+- memset ((void *) hostdata->events, 0, sizeof(struct NCR53c7x0_event) *
+- track_events);
+- hostdata->event_size = track_events;
+- hostdata->event_index = 0;
+- } else
+- hostdata->event_size = 0;
+-
+- return NCR53c7x0_init(instance);
+-}
+-
+-
+-/*
+- * Function : static void NCR53c7x0_init_fixup (struct Scsi_Host *host)
+- *
+- * Purpose : copy and fixup the SCSI SCRIPTS(tm) code for this device.
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- */
+-
+-static void
+-NCR53c7x0_init_fixup (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned char tmp;
+- int i, ncr_to_memory, memory_to_ncr;
+- u32 base;
+- NCR53c7x0_local_setup(host);
+-
+-
+- /* XXX - NOTE : this code MUST be made endian aware */
+- /* Copy code into buffer that was allocated at detection time. */
+- memcpy ((void *) hostdata->script, (void *) SCRIPT,
+- sizeof(SCRIPT));
+- /* Fixup labels */
+- for (i = 0; i < PATCHES; ++i)
+- hostdata->script[LABELPATCHES[i]] +=
+- virt_to_bus(hostdata->script);
+- /* Fixup addresses of constants that used to be EXTERNAL */
+-
+- patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_abort,
+- virt_to_bus(&(hostdata->NCR53c7xx_msg_abort)));
+- patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_reject,
+- virt_to_bus(&(hostdata->NCR53c7xx_msg_reject)));
+- patch_abs_32 (hostdata->script, 0, NCR53c7xx_zero,
+- virt_to_bus(&(hostdata->NCR53c7xx_zero)));
+- patch_abs_32 (hostdata->script, 0, NCR53c7xx_sink,
+- virt_to_bus(&(hostdata->NCR53c7xx_sink)));
+- patch_abs_32 (hostdata->script, 0, NOP_insn,
+- virt_to_bus(&(hostdata->NOP_insn)));
+- patch_abs_32 (hostdata->script, 0, schedule,
+- virt_to_bus((void *) hostdata->schedule));
+-
+- /* Fixup references to external variables: */
+- for (i = 0; i < EXTERNAL_PATCHES_LEN; ++i)
+- hostdata->script[EXTERNAL_PATCHES[i].offset] +=
+- virt_to_bus(EXTERNAL_PATCHES[i].address);
+-
+- /*
+- * Fixup absolutes set at boot-time.
+- *
+- * All non-code absolute variables suffixed with "dsa_" and "int_"
+- * are constants, and need no fixup provided the assembler has done
+- * it for us (I don't know what the "real" NCR assembler does in
+- * this case, my assembler does the right magic).
+- */
+-
+- patch_abs_rwri_data (hostdata->script, 0, dsa_save_data_pointer,
+- Ent_dsa_code_save_data_pointer - Ent_dsa_zero);
+- patch_abs_rwri_data (hostdata->script, 0, dsa_restore_pointers,
+- Ent_dsa_code_restore_pointers - Ent_dsa_zero);
+- patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect,
+- Ent_dsa_code_check_reselect - Ent_dsa_zero);
+-
+- /*
+- * Just for the hell of it, preserve the settings of
+- * Burst Length and Enable Read Line bits from the DMODE
+- * register. Make sure SCRIPTS start automagically.
+- */
+-
+-#if defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)
+- /* We know better what we want than 16xBug does! */
+- tmp = DMODE_10_BL_8 | DMODE_10_FC2;
+-#else
+- tmp = NCR53c7x0_read8(DMODE_REG_10);
+- tmp &= (DMODE_BL_MASK | DMODE_10_FC2 | DMODE_10_FC1 | DMODE_710_PD |
+- DMODE_710_UO);
+-#endif
+-
+- if (!(hostdata->options & OPTION_MEMORY_MAPPED)) {
+- base = (u32) host->io_port;
+- memory_to_ncr = tmp|DMODE_800_DIOM;
+- ncr_to_memory = tmp|DMODE_800_SIOM;
+- } else {
+- base = virt_to_bus((void *)host->base);
+- memory_to_ncr = ncr_to_memory = tmp;
+- }
+-
+- /* SCRATCHB_REG_10 == SCRATCHA_REG_800, as it happens */
+- patch_abs_32 (hostdata->script, 0, addr_scratch, base + SCRATCHA_REG_800);
+- patch_abs_32 (hostdata->script, 0, addr_temp, base + TEMP_REG);
+- patch_abs_32 (hostdata->script, 0, addr_dsa, base + DSA_REG);
+-
+- /*
+- * I needed some variables in the script to be accessible to
+- * both the NCR chip and the host processor. For these variables,
+- * I made the arbitrary decision to store them directly in the
+- * hostdata structure rather than in the RELATIVE area of the
+- * SCRIPTS.
+- */
+-
+-
+- patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_memory, tmp);
+- patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_ncr, memory_to_ncr);
+- patch_abs_rwri_data (hostdata->script, 0, dmode_ncr_to_memory, ncr_to_memory);
+-
+- patch_abs_32 (hostdata->script, 0, msg_buf,
+- virt_to_bus((void *)&(hostdata->msg_buf)));
+- patch_abs_32 (hostdata->script, 0, reconnect_dsa_head,
+- virt_to_bus((void *)&(hostdata->reconnect_dsa_head)));
+- patch_abs_32 (hostdata->script, 0, addr_reconnect_dsa_head,
+- virt_to_bus((void *)&(hostdata->addr_reconnect_dsa_head)));
+- patch_abs_32 (hostdata->script, 0, reselected_identify,
+- virt_to_bus((void *)&(hostdata->reselected_identify)));
+-/* reselected_tag is currently unused */
+-#if 0
+- patch_abs_32 (hostdata->script, 0, reselected_tag,
+- virt_to_bus((void *)&(hostdata->reselected_tag)));
+-#endif
+-
+- patch_abs_32 (hostdata->script, 0, test_dest,
+- virt_to_bus((void*)&hostdata->test_dest));
+- patch_abs_32 (hostdata->script, 0, test_src,
+- virt_to_bus(&hostdata->test_source));
+- patch_abs_32 (hostdata->script, 0, saved_dsa,
+- virt_to_bus((void *)&hostdata->saved2_dsa));
+- patch_abs_32 (hostdata->script, 0, emulfly,
+- virt_to_bus((void *)&hostdata->emulated_intfly));
+-
+- patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect,
+- (unsigned char)(Ent_dsa_code_check_reselect - Ent_dsa_zero));
+-
+-/* These are for event logging; the ncr_event enum contains the
+- actual interrupt numbers. */
+-#ifdef A_int_EVENT_SELECT
+- patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT, (u32) EVENT_SELECT);
+-#endif
+-#ifdef A_int_EVENT_DISCONNECT
+- patch_abs_32 (hostdata->script, 0, int_EVENT_DISCONNECT, (u32) EVENT_DISCONNECT);
+-#endif
+-#ifdef A_int_EVENT_RESELECT
+- patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT, (u32) EVENT_RESELECT);
+-#endif
+-#ifdef A_int_EVENT_COMPLETE
+- patch_abs_32 (hostdata->script, 0, int_EVENT_COMPLETE, (u32) EVENT_COMPLETE);
+-#endif
+-#ifdef A_int_EVENT_IDLE
+- patch_abs_32 (hostdata->script, 0, int_EVENT_IDLE, (u32) EVENT_IDLE);
+-#endif
+-#ifdef A_int_EVENT_SELECT_FAILED
+- patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT_FAILED,
+- (u32) EVENT_SELECT_FAILED);
+-#endif
+-#ifdef A_int_EVENT_BEFORE_SELECT
+- patch_abs_32 (hostdata->script, 0, int_EVENT_BEFORE_SELECT,
+- (u32) EVENT_BEFORE_SELECT);
+-#endif
+-#ifdef A_int_EVENT_RESELECT_FAILED
+- patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT_FAILED,
+- (u32) EVENT_RESELECT_FAILED);
+-#endif
+-
+- /*
+- * Make sure the NCR and Linux code agree on the location of
+- * certain fields.
+- */
+-
+- hostdata->E_accept_message = Ent_accept_message;
+- hostdata->E_command_complete = Ent_command_complete;
+- hostdata->E_cmdout_cmdout = Ent_cmdout_cmdout;
+- hostdata->E_data_transfer = Ent_data_transfer;
+- hostdata->E_debug_break = Ent_debug_break;
+- hostdata->E_dsa_code_template = Ent_dsa_code_template;
+- hostdata->E_dsa_code_template_end = Ent_dsa_code_template_end;
+- hostdata->E_end_data_transfer = Ent_end_data_transfer;
+- hostdata->E_initiator_abort = Ent_initiator_abort;
+- hostdata->E_msg_in = Ent_msg_in;
+- hostdata->E_other_transfer = Ent_other_transfer;
+- hostdata->E_other_in = Ent_other_in;
+- hostdata->E_other_out = Ent_other_out;
+- hostdata->E_reject_message = Ent_reject_message;
+- hostdata->E_respond_message = Ent_respond_message;
+- hostdata->E_select = Ent_select;
+- hostdata->E_select_msgout = Ent_select_msgout;
+- hostdata->E_target_abort = Ent_target_abort;
+-#ifdef Ent_test_0
+- hostdata->E_test_0 = Ent_test_0;
+-#endif
+- hostdata->E_test_1 = Ent_test_1;
+- hostdata->E_test_2 = Ent_test_2;
+-#ifdef Ent_test_3
+- hostdata->E_test_3 = Ent_test_3;
+-#endif
+- hostdata->E_wait_reselect = Ent_wait_reselect;
+- hostdata->E_dsa_code_begin = Ent_dsa_code_begin;
+-
+- hostdata->dsa_cmdout = A_dsa_cmdout;
+- hostdata->dsa_cmnd = A_dsa_cmnd;
+- hostdata->dsa_datain = A_dsa_datain;
+- hostdata->dsa_dataout = A_dsa_dataout;
+- hostdata->dsa_end = A_dsa_end;
+- hostdata->dsa_msgin = A_dsa_msgin;
+- hostdata->dsa_msgout = A_dsa_msgout;
+- hostdata->dsa_msgout_other = A_dsa_msgout_other;
+- hostdata->dsa_next = A_dsa_next;
+- hostdata->dsa_select = A_dsa_select;
+- hostdata->dsa_start = Ent_dsa_code_template - Ent_dsa_zero;
+- hostdata->dsa_status = A_dsa_status;
+- hostdata->dsa_jump_dest = Ent_dsa_code_fix_jump - Ent_dsa_zero +
+- 8 /* destination operand */;
+-
+- /* sanity check */
+- if (A_dsa_fields_start != Ent_dsa_code_template_end -
+- Ent_dsa_zero)
+- printk("scsi%d : NCR dsa_fields start is %d not %d\n",
+- host->host_no, A_dsa_fields_start, Ent_dsa_code_template_end -
+- Ent_dsa_zero);
+-
+- printk("scsi%d : NCR code relocated to 0x%lx (virt 0x%p)\n", host->host_no,
+- virt_to_bus(hostdata->script), hostdata->script);
+-}
+-
+-/*
+- * Function : static int NCR53c7xx_run_tests (struct Scsi_Host *host)
+- *
+- * Purpose : run various verification tests on the NCR chip,
+- * including interrupt generation, and proper bus mastering
+- * operation.
+- *
+- * Inputs : host - a properly initialized Scsi_Host structure
+- *
+- * Preconditions : the NCR chip must be in a halted state.
+- *
+- * Returns : 0 if all tests were successful, -1 on error.
+- *
+- */
+-
+-static int
+-NCR53c7xx_run_tests (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned long timeout;
+- u32 start;
+- int failed, i;
+- unsigned long flags;
+- NCR53c7x0_local_setup(host);
+-
+- /* The NCR chip _must_ be idle to run the test scripts */
+-
+- local_irq_save(flags);
+- if (!hostdata->idle) {
+- printk ("scsi%d : chip not idle, aborting tests\n", host->host_no);
+- local_irq_restore(flags);
+- return -1;
+- }
+-
+- /*
+- * Check for functional interrupts, this could work as an
+- * autoprobe routine.
+- */
+-
+- if ((hostdata->options & OPTION_DEBUG_TEST1) &&
+- hostdata->state != STATE_DISABLED) {
+- hostdata->idle = 0;
+- hostdata->test_running = 1;
+- hostdata->test_completed = -1;
+- hostdata->test_dest = 0;
+- hostdata->test_source = 0xdeadbeef;
+- start = virt_to_bus (hostdata->script) + hostdata->E_test_1;
+- hostdata->state = STATE_RUNNING;
+- printk ("scsi%d : test 1", host->host_no);
+- NCR53c7x0_write32 (DSP_REG, start);
+- if (hostdata->options & OPTION_DEBUG_TRACE)
+- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM |
+- DCNTL_STD);
+- printk (" started\n");
+- local_irq_restore(flags);
+-
+- /*
+- * This is currently a .5 second timeout, since (in theory) no slow
+- * board will take that long. In practice, we've seen one
+- * pentium which occassionally fails with this, but works with
+- * 10 times as much?
+- */
+-
+- timeout = jiffies + 5 * HZ / 10;
+- while ((hostdata->test_completed == -1) && time_before(jiffies, timeout))
+- barrier();
+-
+- failed = 1;
+- if (hostdata->test_completed == -1)
+- printk ("scsi%d : driver test 1 timed out%s\n",host->host_no ,
+- (hostdata->test_dest == 0xdeadbeef) ?
+- " due to lost interrupt.\n"
+- " Please verify that the correct IRQ is being used for your board,\n"
+- : "");
+- else if (hostdata->test_completed != 1)
+- printk ("scsi%d : test 1 bad interrupt value (%d)\n",
+- host->host_no, hostdata->test_completed);
+- else
+- failed = (hostdata->test_dest != 0xdeadbeef);
+-
+- if (hostdata->test_dest != 0xdeadbeef) {
+- printk ("scsi%d : driver test 1 read 0x%x instead of 0xdeadbeef indicating a\n"
+- " probable cache invalidation problem. Please configure caching\n"
+- " as write-through or disabled\n",
+- host->host_no, hostdata->test_dest);
+- }
+-
+- if (failed) {
+- printk ("scsi%d : DSP = 0x%p (script at 0x%p, start at 0x%x)\n",
+- host->host_no, bus_to_virt(NCR53c7x0_read32(DSP_REG)),
+- hostdata->script, start);
+- printk ("scsi%d : DSPS = 0x%x\n", host->host_no,
+- NCR53c7x0_read32(DSPS_REG));
+- local_irq_restore(flags);
+- return -1;
+- }
+- hostdata->test_running = 0;
+- }
+-
+- if ((hostdata->options & OPTION_DEBUG_TEST2) &&
+- hostdata->state != STATE_DISABLED) {
+- u32 dsa[48];
+- unsigned char identify = IDENTIFY(0, 0);
+- unsigned char cmd[6];
+- unsigned char data[36];
+- unsigned char status = 0xff;
+- unsigned char msg = 0xff;
+-
+- cmd[0] = INQUIRY;
+- cmd[1] = cmd[2] = cmd[3] = cmd[5] = 0;
+- cmd[4] = sizeof(data);
+-
+- dsa[2] = 1;
+- dsa[3] = virt_to_bus(&identify);
+- dsa[4] = 6;
+- dsa[5] = virt_to_bus(&cmd);
+- dsa[6] = sizeof(data);
+- dsa[7] = virt_to_bus(&data);
+- dsa[8] = 1;
+- dsa[9] = virt_to_bus(&status);
+- dsa[10] = 1;
+- dsa[11] = virt_to_bus(&msg);
+-
+- for (i = 0; i < 6; ++i) {
+-#ifdef VALID_IDS
+- if (!hostdata->valid_ids[i])
+- continue;
+-#endif
+- local_irq_disable();
+- if (!hostdata->idle) {
+- printk ("scsi%d : chip not idle, aborting tests\n", host->host_no);
+- local_irq_restore(flags);
+- return -1;
+- }
+-
+- /* 710: bit mapped scsi ID, async */
+- dsa[0] = (1 << i) << 16;
+- hostdata->idle = 0;
+- hostdata->test_running = 2;
+- hostdata->test_completed = -1;
+- start = virt_to_bus(hostdata->script) + hostdata->E_test_2;
+- hostdata->state = STATE_RUNNING;
+- NCR53c7x0_write32 (DSA_REG, virt_to_bus(dsa));
+- NCR53c7x0_write32 (DSP_REG, start);
+- if (hostdata->options & OPTION_DEBUG_TRACE)
+- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl |
+- DCNTL_SSM | DCNTL_STD);
+- local_irq_restore(flags);
+-
+- timeout = jiffies + 5 * HZ; /* arbitrary */
+- while ((hostdata->test_completed == -1) && time_before(jiffies, timeout))
+- barrier();
+-
+- NCR53c7x0_write32 (DSA_REG, 0);
+-
+- if (hostdata->test_completed == 2) {
+- data[35] = 0;
+- printk ("scsi%d : test 2 INQUIRY to target %d, lun 0 : %s\n",
+- host->host_no, i, data + 8);
+- printk ("scsi%d : status ", host->host_no);
+- scsi_print_status (status);
+- printk ("\nscsi%d : message ", host->host_no);
+- spi_print_msg(&msg);
+- printk ("\n");
+- } else if (hostdata->test_completed == 3) {
+- printk("scsi%d : test 2 no connection with target %d\n",
+- host->host_no, i);
+- if (!hostdata->idle) {
+- printk("scsi%d : not idle\n", host->host_no);
+- local_irq_restore(flags);
+- return -1;
+- }
+- } else if (hostdata->test_completed == -1) {
+- printk ("scsi%d : test 2 timed out\n", host->host_no);
+- local_irq_restore(flags);
+- return -1;
+- }
+- hostdata->test_running = 0;
+- }
+- }
+-
+- local_irq_restore(flags);
+- return 0;
+-}
+-
+-/*
+- * Function : static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : copy the NCR53c8xx dsa structure into cmd's dsa buffer,
+- * performing all necessary relocation.
+- *
+- * Inputs : cmd, a NCR53c7x0_cmd structure with a dsa area large
+- * enough to hold the NCR53c8xx dsa.
+- */
+-
+-static void
+-NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) {
+- Scsi_Cmnd *c = cmd->cmd;
+- struct Scsi_Host *host = c->device->host;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int i;
+-
+- memcpy (cmd->dsa, hostdata->script + (hostdata->E_dsa_code_template / 4),
+- hostdata->E_dsa_code_template_end - hostdata->E_dsa_code_template);
+-
+- /*
+- * Note : within the NCR 'C' code, dsa points to the _start_
+- * of the DSA structure, and _not_ the offset of dsa_zero within
+- * that structure used to facilitate shorter signed offsets
+- * for the 8 bit ALU.
+- *
+- * The implications of this are that
+- *
+- * - 32 bit A_dsa_* absolute values require an additional
+- * dsa_zero added to their value to be correct, since they are
+- * relative to dsa_zero which is in essentially a separate
+- * space from the code symbols.
+- *
+- * - All other symbols require no special treatment.
+- */
+-
+- patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_lun, c->device->lun);
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_addr_next, virt_to_bus(&cmd->dsa_next_addr));
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_next, virt_to_bus(cmd->dsa) + Ent_dsa_zero -
+- Ent_dsa_code_template + A_dsa_next);
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_sync, virt_to_bus((void *)hostdata->sync[c->device->id].script));
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_sscf_710, virt_to_bus((void *)&hostdata->sync[c->device->id].sscf_710));
+- patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_target, 1 << c->device->id);
+- /* XXX - new pointer stuff */
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_addr_saved_pointer, virt_to_bus(&cmd->saved_data_pointer));
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_addr_saved_residual, virt_to_bus(&cmd->saved_residual));
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_addr_residual, virt_to_bus(&cmd->residual));
+-
+- /* XXX - new start stuff */
+-
+- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+- dsa_temp_addr_dsa_value, virt_to_bus(&cmd->dsa_addr));
+-}
+-
+-/*
+- * Function : run_process_issue_queue (void)
+- *
+- * Purpose : insure that the coroutine is running and will process our
+- * request. process_issue_queue_running is checked/set here (in an
+- * inline function) rather than in process_issue_queue itself to reduce
+- * the chances of stack overflow.
+- *
+- */
+-
+-static volatile int process_issue_queue_running = 0;
+-
+-static __inline__ void
+-run_process_issue_queue(void) {
+- unsigned long flags;
+- local_irq_save(flags);
+- if (!process_issue_queue_running) {
+- process_issue_queue_running = 1;
+- process_issue_queue(flags);
+- /*
+- * process_issue_queue_running is cleared in process_issue_queue
+- * once it can't do more work, and process_issue_queue exits with
+- * interrupts disabled.
+- */
+- }
+- local_irq_restore(flags);
+-}
+-
+-/*
+- * Function : static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int
+- * result)
+- *
+- * Purpose : mark SCSI command as finished, OR'ing the host portion
+- * of the result word into the result field of the corresponding
+- * Scsi_Cmnd structure, and removing it from the internal queues.
+- *
+- * Inputs : cmd - command, result - entire result field
+- *
+- * Preconditions : the NCR chip should be in a halted state when
+- * abnormal_finished is run, since it modifies structures which
+- * the NCR expects to have exclusive access to.
+- */
+-
+-static void
+-abnormal_finished (struct NCR53c7x0_cmd *cmd, int result) {
+- Scsi_Cmnd *c = cmd->cmd;
+- struct Scsi_Host *host = c->device->host;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned long flags;
+- int left, found;
+- volatile struct NCR53c7x0_cmd * linux_search;
+- volatile struct NCR53c7x0_cmd * volatile *linux_prev;
+- volatile u32 *ncr_prev, *ncrcurrent, ncr_search;
+-
+-#if 0
+- printk ("scsi%d: abnormal finished\n", host->host_no);
+-#endif
+-
+- local_irq_save(flags);
+- found = 0;
+- /*
+- * Traverse the NCR issue array until we find a match or run out
+- * of instructions. Instructions in the NCR issue array are
+- * either JUMP or NOP instructions, which are 2 words in length.
+- */
+-
+-
+- for (found = 0, left = host->can_queue, ncrcurrent = hostdata->schedule;
+- left > 0; --left, ncrcurrent += 2)
+- {
+- if (issue_to_cmd (host, hostdata, (u32 *) ncrcurrent) == cmd)
+- {
+- ncrcurrent[0] = hostdata->NOP_insn;
+- ncrcurrent[1] = 0xdeadbeef;
+- ++found;
+- break;
+- }
+- }
+-
+- /*
+- * Traverse the NCR reconnect list of DSA structures until we find
+- * a pointer to this dsa or have found too many command structures.
+- * We let prev point at the next field of the previous element or
+- * head of the list, so we don't do anything different for removing
+- * the head element.
+- */
+-
+- for (left = host->can_queue,
+- ncr_search = hostdata->reconnect_dsa_head,
+- ncr_prev = &hostdata->reconnect_dsa_head;
+- left >= 0 && ncr_search &&
+- ((char*)bus_to_virt(ncr_search) + hostdata->dsa_start)
+- != (char *) cmd->dsa;
+- ncr_prev = (u32*) ((char*)bus_to_virt(ncr_search) +
+- hostdata->dsa_next), ncr_search = *ncr_prev, --left);
+-
+- if (left < 0)
+- printk("scsi%d: loop detected in ncr reconncect list\n",
+- host->host_no);
+- else if (ncr_search) {
+- if (found)
+- printk("scsi%d: scsi %ld in ncr issue array and reconnect lists\n",
+- host->host_no, c->pid);
+- else {
+- volatile u32 * next = (u32 *)
+- ((char *)bus_to_virt(ncr_search) + hostdata->dsa_next);
+- *ncr_prev = *next;
+-/* If we're at the tail end of the issue queue, update that pointer too. */
+- found = 1;
+- }
+- }
+-
+- /*
+- * Traverse the host running list until we find this command or discover
+- * we have too many elements, pointing linux_prev at the next field of the
+- * linux_previous element or head of the list, search at this element.
+- */
+-
+- for (left = host->can_queue, linux_search = hostdata->running_list,
+- linux_prev = &hostdata->running_list;
+- left >= 0 && linux_search && linux_search != cmd;
+- linux_prev = &(linux_search->next),
+- linux_search = linux_search->next, --left);
+-
+- if (left < 0)
+- printk ("scsi%d: loop detected in host running list for scsi pid %ld\n",
+- host->host_no, c->pid);
+- else if (linux_search) {
+- *linux_prev = linux_search->next;
+- --hostdata->busy[c->device->id][c->device->lun];
+- }
+-
+- /* Return the NCR command structure to the free list */
+- cmd->next = hostdata->free;
+- hostdata->free = cmd;
+- c->host_scribble = NULL;
+-
+- /* And return */
+- c->result = result;
+- c->scsi_done(c);
+-
+- local_irq_restore(flags);
+- run_process_issue_queue();
+-}
+-
+-/*
+- * Function : static void intr_break (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : Handler for breakpoint interrupts from a SCSI script
+- *
+- * Inputs : host - pointer to this host adapter's structure,
+- * cmd - pointer to the command (if any) dsa was pointing
+- * to.
+- *
+- */
+-
+-static void
+-intr_break (struct Scsi_Host *host, struct
+- NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_break *bp;
+-#if 0
+- Scsi_Cmnd *c = cmd ? cmd->cmd : NULL;
+-#endif
+- u32 *dsp;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned long flags;
+- NCR53c7x0_local_setup(host);
+-
+- /*
+- * Find the break point corresponding to this address, and
+- * dump the appropriate debugging information to standard
+- * output.
+- */
+- local_irq_save(flags);
+- dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG));
+- for (bp = hostdata->breakpoints; bp && bp->address != dsp;
+- bp = bp->next);
+- if (!bp)
+- panic("scsi%d : break point interrupt from %p with no breakpoint!",
+- host->host_no, dsp);
+-
+- /*
+- * Configure the NCR chip for manual start mode, so that we can
+- * point the DSP register at the instruction that follows the
+- * INT int_debug_break instruction.
+- */
+-
+- NCR53c7x0_write8 (hostdata->dmode,
+- NCR53c7x0_read8(hostdata->dmode)|DMODE_MAN);
+-
+- /*
+- * And update the DSP register, using the size of the old
+- * instruction in bytes.
+- */
+-
+- local_irq_restore(flags);
+-}
+-/*
+- * Function : static void print_synchronous (const char *prefix,
+- * const unsigned char *msg)
+- *
+- * Purpose : print a pretty, user and machine parsable representation
+- * of a SDTR message, including the "real" parameters, data
+- * clock so we can tell transfer rate at a glance.
+- *
+- * Inputs ; prefix - text to prepend, msg - SDTR message (5 bytes)
+- */
+-
+-static void
+-print_synchronous (const char *prefix, const unsigned char *msg) {
+- if (msg[4]) {
+- int Hz = 1000000000 / (msg[3] * 4);
+- int integer = Hz / 1000000;
+- int fraction = (Hz - (integer * 1000000)) / 10000;
+- printk ("%speriod %dns offset %d %d.%02dMHz %s SCSI%s\n",
+- prefix, (int) msg[3] * 4, (int) msg[4], integer, fraction,
+- (((msg[3] * 4) < 200) ? "FAST" : "synchronous"),
+- (((msg[3] * 4) < 200) ? "-II" : ""));
+- } else
+- printk ("%sasynchronous SCSI\n", prefix);
+-}
+-
+-/*
+- * Function : static void set_synchronous (struct Scsi_Host *host,
+- * int target, int sxfer, int scntl3, int now_connected)
+- *
+- * Purpose : reprogram transfers between the selected SCSI initiator and
+- * target with the given register values; in the indirect
+- * select operand, reselection script, and chip registers.
+- *
+- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id,
+- * sxfer and scntl3 - NCR registers. now_connected - if non-zero,
+- * we should reprogram the registers now too.
+- *
+- * NOTE: For 53c710, scntl3 is actually used for SCF bits from
+- * SBCL, as we don't have a SCNTL3.
+- */
+-
+-static void
+-set_synchronous (struct Scsi_Host *host, int target, int sxfer, int scntl3,
+- int now_connected) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- u32 *script;
+- NCR53c7x0_local_setup(host);
+-
+- /* These are eight bit registers */
+- sxfer &= 0xff;
+- scntl3 &= 0xff;
+-
+- hostdata->sync[target].sxfer_sanity = sxfer;
+- hostdata->sync[target].scntl3_sanity = scntl3;
+-
+-/*
+- * HARD CODED : synchronous script is EIGHT words long. This
+- * must agree with 53c7.8xx.h
+- */
+-
+- if ((hostdata->chip != 700) && (hostdata->chip != 70066)) {
+- hostdata->sync[target].select_indirect = (1 << target) << 16 |
+- (sxfer << 8);
+- hostdata->sync[target].sscf_710 = scntl3;
+-
+- script = (u32 *) hostdata->sync[target].script;
+-
+- /* XXX - add NCR53c7x0 code to reprogram SCF bits if we want to */
+- script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY |
+- DCMD_RWRI_OP_MOVE) << 24) |
+- (SBCL_REG << 16) | (scntl3 << 8);
+- script[1] = 0;
+- script += 2;
+-
+- script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY |
+- DCMD_RWRI_OP_MOVE) << 24) |
+- (SXFER_REG << 16) | (sxfer << 8);
+- script[1] = 0;
+- script += 2;
+-
+-#ifdef DEBUG_SYNC_INTR
+- if (hostdata->options & OPTION_DEBUG_DISCONNECT) {
+- script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_INT) << 24) | DBC_TCI_TRUE;
+- script[1] = DEBUG_SYNC_INTR;
+- script += 2;
+- }
+-#endif
+-
+- script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_RETURN) << 24) | DBC_TCI_TRUE;
+- script[1] = 0;
+- script += 2;
+- }
+-
+- if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS)
+- printk ("scsi%d : target %d sync parameters are sxfer=0x%x, scntl3=0x%x\n",
+- host->host_no, target, sxfer, scntl3);
+-
+- if (now_connected) {
+- NCR53c7x0_write8(SBCL_REG, scntl3);
+- NCR53c7x0_write8(SXFER_REG, sxfer);
+- }
+-}
+-
+-
+-/*
+- * Function : static int asynchronous (struct Scsi_Host *host, int target)
+- *
+- * Purpose : reprogram between the selected SCSI Host adapter and target
+- * (assumed to be currently connected) for asynchronous transfers.
+- *
+- * Inputs : host - SCSI host structure, target - numeric target ID.
+- *
+- * Preconditions : the NCR chip should be in one of the halted states
+- */
+-
+-static void
+-asynchronous (struct Scsi_Host *host, int target) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- NCR53c7x0_local_setup(host);
+- set_synchronous (host, target, /* no offset */ 0, hostdata->saved_scntl3,
+- 1);
+- printk ("scsi%d : setting target %d to asynchronous SCSI\n",
+- host->host_no, target);
+-}
+-
+-/*
+- * XXX - do we want to go out of our way (ie, add extra code to selection
+- * in the NCR53c710/NCR53c720 script) to reprogram the synchronous
+- * conversion bits, or can we be content in just setting the
+- * sxfer bits? I chose to do so [richard@sleepie.demon.co.uk]
+- */
+-
+-/* Table for NCR53c8xx synchronous values */
+-
+-/* This table is also correct for 710, allowing that scf=4 is equivalent
+- * of SSCF=0 (ie use DCNTL, divide by 3) for a 50.01-66.00MHz clock.
+- * For any other clock values, we cannot use entries with SCF values of
+- * 4. I guess that for a 66MHz clock, the slowest it will set is 2MHz,
+- * and for a 50MHz clock, the slowest will be 2.27Mhz. Should check
+- * that a device doesn't try and negotiate sync below these limits!
+- */
+-
+-static const struct {
+- int div; /* Total clock divisor * 10 */
+- unsigned char scf; /* */
+- unsigned char tp; /* 4 + tp = xferp divisor */
+-} syncs[] = {
+-/* div scf tp div scf tp div scf tp */
+- { 40, 1, 0}, { 50, 1, 1}, { 60, 1, 2},
+- { 70, 1, 3}, { 75, 2, 1}, { 80, 1, 4},
+- { 90, 1, 5}, { 100, 1, 6}, { 105, 2, 3},
+- { 110, 1, 7}, { 120, 2, 4}, { 135, 2, 5},
+- { 140, 3, 3}, { 150, 2, 6}, { 160, 3, 4},
+- { 165, 2, 7}, { 180, 3, 5}, { 200, 3, 6},
+- { 210, 4, 3}, { 220, 3, 7}, { 240, 4, 4},
+- { 270, 4, 5}, { 300, 4, 6}, { 330, 4, 7}
+-};
+-
+-/*
+- * Function : static void synchronous (struct Scsi_Host *host, int target,
+- * char *msg)
+- *
+- * Purpose : reprogram transfers between the selected SCSI initiator and
+- * target for synchronous SCSI transfers such that the synchronous
+- * offset is less than that requested and period at least as long
+- * as that requested. Also modify *msg such that it contains
+- * an appropriate response.
+- *
+- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id,
+- * msg - synchronous transfer request.
+- */
+-
+-
+-static void
+-synchronous (struct Scsi_Host *host, int target, char *msg) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int desire, divisor, i, limit;
+- unsigned char scntl3, sxfer;
+-/* The diagnostic message fits on one line, even with max. width integers */
+- char buf[80];
+-
+-/* Desired transfer clock in Hz */
+- desire = 1000000000L / (msg[3] * 4);
+-/* Scale the available SCSI clock by 10 so we get tenths */
+- divisor = (hostdata->scsi_clock * 10) / desire;
+-
+-/* NCR chips can handle at most an offset of 8 */
+- if (msg[4] > 8)
+- msg[4] = 8;
+-
+- if (hostdata->options & OPTION_DEBUG_SDTR)
+- printk("scsi%d : optimal synchronous divisor of %d.%01d\n",
+- host->host_no, divisor / 10, divisor % 10);
+-
+- limit = ARRAY_SIZE(syncs) - 1;
+- for (i = 0; (i < limit) && (divisor > syncs[i].div); ++i);
+-
+- if (hostdata->options & OPTION_DEBUG_SDTR)
+- printk("scsi%d : selected synchronous divisor of %d.%01d\n",
+- host->host_no, syncs[i].div / 10, syncs[i].div % 10);
+-
+- msg[3] = ((1000000000L / hostdata->scsi_clock) * syncs[i].div / 10 / 4);
+-
+- if (hostdata->options & OPTION_DEBUG_SDTR)
+- printk("scsi%d : selected synchronous period of %dns\n", host->host_no,
+- msg[3] * 4);
+-
+- scntl3 = syncs[i].scf;
+- sxfer = (msg[4] << SXFER_MO_SHIFT) | (syncs[i].tp << 4);
+- if (hostdata->options & OPTION_DEBUG_SDTR)
+- printk ("scsi%d : sxfer=0x%x scntl3=0x%x\n",
+- host->host_no, (int) sxfer, (int) scntl3);
+- set_synchronous (host, target, sxfer, scntl3, 1);
+- sprintf (buf, "scsi%d : setting target %d to ", host->host_no, target);
+- print_synchronous (buf, msg);
+-}
+-
+-/*
+- * Function : static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : Handler for INT generated instructions for the
+- * NCR53c810/820 SCSI SCRIPT
+- *
+- * Inputs : host - pointer to this host adapter's structure,
+- * cmd - pointer to the command (if any) dsa was pointing
+- * to.
+- *
+- */
+-
+-static int
+-NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct
+- NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- int print;
+- Scsi_Cmnd *c = cmd ? cmd->cmd : NULL;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- u32 dsps,*dsp; /* Argument of the INT instruction */
+-
+- NCR53c7x0_local_setup(host);
+- dsps = NCR53c7x0_read32(DSPS_REG);
+- dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-
+- /* RGH 150597: Frig. Commands which fail with Check Condition are
+- * Flagged as successful - hack dsps to indicate check condition */
+-#if 0
+- /* RGH 200597: Need to disable for BVME6000, as it gets Check Conditions
+- * and then dies. Seems to handle Check Condition at startup, but
+- * not mid kernel build. */
+- if (dsps == A_int_norm_emulateintfly && cmd && cmd->result == 2)
+- dsps = A_int_err_check_condition;
+-#endif
+-
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : DSPS = 0x%x\n", host->host_no, dsps);
+-
+- switch (dsps) {
+- case A_int_msg_1:
+- print = 1;
+- switch (hostdata->msg_buf[0]) {
+- /*
+- * Unless we've initiated synchronous negotiation, I don't
+- * think that this should happen.
+- */
+- case MESSAGE_REJECT:
+- hostdata->dsp = hostdata->script + hostdata->E_accept_message /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- if (cmd && (cmd->flags & CMD_FLAG_SDTR)) {
+- printk ("scsi%d : target %d rejected SDTR\n", host->host_no,
+- c->device->id);
+- cmd->flags &= ~CMD_FLAG_SDTR;
+- asynchronous (host, c->device->id);
+- print = 0;
+- }
+- break;
+- case INITIATE_RECOVERY:
+- printk ("scsi%d : extended contingent allegiance not supported yet, rejecting\n",
+- host->host_no);
+- /* Fall through to default */
+- hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- break;
+- default:
+- printk ("scsi%d : unsupported message, rejecting\n",
+- host->host_no);
+- hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- }
+- if (print) {
+- printk ("scsi%d : received message", host->host_no);
+- if (c)
+- printk (" from target %d lun %d ", c->device->id, c->device->lun);
+- spi_print_msg((unsigned char *) hostdata->msg_buf);
+- printk("\n");
+- }
+-
+- return SPECIFIC_INT_NOTHING;
+-
+-
+- case A_int_msg_sdtr:
+-/*
+- * At this point, hostdata->msg_buf contains
+- * 0 EXTENDED MESSAGE
+- * 1 length
+- * 2 SDTR
+- * 3 period * 4ns
+- * 4 offset
+- */
+-
+- if (cmd) {
+- char buf[80];
+- sprintf (buf, "scsi%d : target %d %s ", host->host_no, c->device->id,
+- (cmd->flags & CMD_FLAG_SDTR) ? "accepting" : "requesting");
+- print_synchronous (buf, (unsigned char *) hostdata->msg_buf);
+-
+- /*
+- * Initiator initiated, won't happen unless synchronous
+- * transfers are enabled. If we get a SDTR message in
+- * response to our SDTR, we should program our parameters
+- * such that
+- * offset <= requested offset
+- * period >= requested period
+- */
+- if (cmd->flags & CMD_FLAG_SDTR) {
+- cmd->flags &= ~CMD_FLAG_SDTR;
+- if (hostdata->msg_buf[4])
+- synchronous (host, c->device->id, (unsigned char *)
+- hostdata->msg_buf);
+- else
+- asynchronous (host, c->device->id);
+- hostdata->dsp = hostdata->script + hostdata->E_accept_message /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- return SPECIFIC_INT_NOTHING;
+- } else {
+- if (hostdata->options & OPTION_SYNCHRONOUS) {
+- cmd->flags |= CMD_FLAG_DID_SDTR;
+- synchronous (host, c->device->id, (unsigned char *)
+- hostdata->msg_buf);
+- } else {
+- hostdata->msg_buf[4] = 0; /* 0 offset = async */
+- asynchronous (host, c->device->id);
+- }
+- patch_dsa_32 (cmd->dsa, dsa_msgout_other, 0, 5);
+- patch_dsa_32 (cmd->dsa, dsa_msgout_other, 1, (u32)
+- virt_to_bus ((void *)&hostdata->msg_buf));
+- hostdata->dsp = hostdata->script +
+- hostdata->E_respond_message / sizeof(u32);
+- hostdata->dsp_changed = 1;
+- }
+- return SPECIFIC_INT_NOTHING;
+- }
+- /* Fall through to abort if we couldn't find a cmd, and
+- therefore a dsa structure to twiddle */
+- case A_int_msg_wdtr:
+- hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- return SPECIFIC_INT_NOTHING;
+- case A_int_err_unexpected_phase:
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : unexpected phase\n", host->host_no);
+- return SPECIFIC_INT_ABORT;
+- case A_int_err_selected:
+- if ((hostdata->chip / 100) == 8)
+- printk ("scsi%d : selected by target %d\n", host->host_no,
+- (int) NCR53c7x0_read8(SDID_REG_800) &7);
+- else
+- printk ("scsi%d : selected by target LCRC=0x%02x\n", host->host_no,
+- (int) NCR53c7x0_read8(LCRC_REG_10));
+- hostdata->dsp = hostdata->script + hostdata->E_target_abort /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- return SPECIFIC_INT_NOTHING;
+- case A_int_err_unexpected_reselect:
+- if ((hostdata->chip / 100) == 8)
+- printk ("scsi%d : unexpected reselect by target %d lun %d\n",
+- host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & 7,
+- hostdata->reselected_identify & 7);
+- else
+- printk ("scsi%d : unexpected reselect LCRC=0x%02x\n", host->host_no,
+- (int) NCR53c7x0_read8(LCRC_REG_10));
+- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- return SPECIFIC_INT_NOTHING;
+-/*
+- * Since contingent allegiance conditions are cleared by the next
+- * command issued to a target, we must issue a REQUEST SENSE
+- * command after receiving a CHECK CONDITION status, before
+- * another command is issued.
+- *
+- * Since this NCR53c7x0_cmd will be freed after use, we don't
+- * care if we step on the various fields, so modify a few things.
+- */
+- case A_int_err_check_condition:
+-#if 0
+- if (hostdata->options & OPTION_DEBUG_INTR)
+-#endif
+- printk ("scsi%d : CHECK CONDITION\n", host->host_no);
+- if (!c) {
+- printk("scsi%d : CHECK CONDITION with no SCSI command\n",
+- host->host_no);
+- return SPECIFIC_INT_PANIC;
+- }
+-
+- /*
+- * FIXME : this uses the normal one-byte selection message.
+- * We may want to renegotiate for synchronous & WIDE transfers
+- * since these could be the crux of our problem.
+- *
+- hostdata->NOP_insn* FIXME : once SCSI-II tagged queuing is implemented, we'll
+- * have to set this up so that the rest of the DSA
+- * agrees with this being an untagged queue'd command.
+- */
+-
+- patch_dsa_32 (cmd->dsa, dsa_msgout, 0, 1);
+-
+- /*
+- * Modify the table indirect for COMMAND OUT phase, since
+- * Request Sense is a six byte command.
+- */
+-
+- patch_dsa_32 (cmd->dsa, dsa_cmdout, 0, 6);
+-
+- /*
+- * The CDB is now mirrored in our local non-cached
+- * structure, but keep the old structure up to date as well,
+- * just in case anyone looks at it.
+- */
+-
+- /*
+- * XXX Need to worry about data buffer alignment/cache state
+- * XXX here, but currently never get A_int_err_check_condition,
+- * XXX so ignore problem for now.
+- */
+- cmd->cmnd[0] = c->cmnd[0] = REQUEST_SENSE;
+- cmd->cmnd[0] = c->cmnd[1] &= 0xe0; /* Zero all but LUN */
+- cmd->cmnd[0] = c->cmnd[2] = 0;
+- cmd->cmnd[0] = c->cmnd[3] = 0;
+- cmd->cmnd[0] = c->cmnd[4] = sizeof(c->sense_buffer);
+- cmd->cmnd[0] = c->cmnd[5] = 0;
+-
+- /*
+- * Disable dataout phase, and program datain to transfer to the
+- * sense buffer, and add a jump to other_transfer after the
+- * command so overflow/underrun conditions are detected.
+- */
+-
+- patch_dsa_32 (cmd->dsa, dsa_dataout, 0,
+- virt_to_bus(hostdata->script) + hostdata->E_other_transfer);
+- patch_dsa_32 (cmd->dsa, dsa_datain, 0,
+- virt_to_bus(cmd->data_transfer_start));
+- cmd->data_transfer_start[0] = (((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I |
+- DCMD_BMI_IO)) << 24) | sizeof(c->sense_buffer);
+- cmd->data_transfer_start[1] = (u32) virt_to_bus(c->sense_buffer);
+-
+- cmd->data_transfer_start[2] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP)
+- << 24) | DBC_TCI_TRUE;
+- cmd->data_transfer_start[3] = (u32) virt_to_bus(hostdata->script) +
+- hostdata->E_other_transfer;
+-
+- /*
+- * Currently, this command is flagged as completed, ie
+- * it has valid status and message data. Reflag it as
+- * incomplete. Q - need to do something so that original
+- * status, etc are used.
+- */
+-
+- cmd->result = cmd->cmd->result = 0xffff;
+-
+- /*
+- * Restart command as a REQUEST SENSE.
+- */
+- hostdata->dsp = (u32 *) hostdata->script + hostdata->E_select /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- return SPECIFIC_INT_NOTHING;
+- case A_int_debug_break:
+- return SPECIFIC_INT_BREAK;
+- case A_int_norm_aborted:
+- hostdata->dsp = (u32 *) hostdata->schedule;
+- hostdata->dsp_changed = 1;
+- if (cmd)
+- abnormal_finished (cmd, DID_ERROR << 16);
+- return SPECIFIC_INT_NOTHING;
+- case A_int_norm_emulateintfly:
+- NCR53c7x0_intfly(host);
+- return SPECIFIC_INT_NOTHING;
+- case A_int_test_1:
+- case A_int_test_2:
+- hostdata->idle = 1;
+- hostdata->test_completed = (dsps - A_int_test_1) / 0x00010000 + 1;
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk("scsi%d : test%d complete\n", host->host_no,
+- hostdata->test_completed);
+- return SPECIFIC_INT_NOTHING;
+-#ifdef A_int_debug_reselected_ok
+- case A_int_debug_reselected_ok:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT)) {
+- /*
+- * Note - this dsa is not based on location relative to
+- * the command structure, but to location relative to the
+- * DSA register
+- */
+- u32 *dsa;
+- dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-
+- printk("scsi%d : reselected_ok (DSA = 0x%x (virt 0x%p)\n",
+- host->host_no, NCR53c7x0_read32(DSA_REG), dsa);
+- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n",
+- host->host_no, cmd->saved_data_pointer,
+- bus_to_virt(cmd->saved_data_pointer));
+- print_insn (host, hostdata->script + Ent_reselected_ok /
+- sizeof(u32), "", 1);
+- if ((hostdata->chip / 100) == 8)
+- printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n",
+- host->host_no, NCR53c7x0_read8(SXFER_REG),
+- NCR53c7x0_read8(SCNTL3_REG_800));
+- else
+- printk ("scsi%d : sxfer=0x%x, cannot read SBCL\n",
+- host->host_no, NCR53c7x0_read8(SXFER_REG));
+- if (c) {
+- print_insn (host, (u32 *)
+- hostdata->sync[c->device->id].script, "", 1);
+- print_insn (host, (u32 *)
+- hostdata->sync[c->device->id].script + 2, "", 1);
+- }
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_reselect_check
+- case A_int_debug_reselect_check:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- u32 *dsa;
+-#if 0
+- u32 *code;
+-#endif
+- /*
+- * Note - this dsa is not based on location relative to
+- * the command structure, but to location relative to the
+- * DSA register
+- */
+- dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG));
+- printk("scsi%d : reselected_check_next (DSA = 0x%lx (virt 0x%p))\n",
+- host->host_no, virt_to_bus(dsa), dsa);
+- if (dsa) {
+- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n",
+- host->host_no, cmd->saved_data_pointer,
+- bus_to_virt (cmd->saved_data_pointer));
+-#if 0
+- printk("scsi%d : template code :\n", host->host_no);
+- for (code = dsa + (Ent_dsa_code_check_reselect - Ent_dsa_zero)
+- / sizeof(u32); code < (dsa + Ent_dsa_zero / sizeof(u32));
+- code += print_insn (host, code, "", 1));
+-#endif
+- }
+- print_insn (host, hostdata->script + Ent_reselected_ok /
+- sizeof(u32), "", 1);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_dsa_schedule
+- case A_int_debug_dsa_schedule:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- u32 *dsa;
+- /*
+- * Note - this dsa is not based on location relative to
+- * the command structure, but to location relative to the
+- * DSA register
+- */
+- dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG));
+- printk("scsi%d : dsa_schedule (old DSA = 0x%lx (virt 0x%p))\n",
+- host->host_no, virt_to_bus(dsa), dsa);
+- if (dsa)
+- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n"
+- " (temp was 0x%x (virt 0x%p))\n",
+- host->host_no, cmd->saved_data_pointer,
+- bus_to_virt (cmd->saved_data_pointer),
+- NCR53c7x0_read32 (TEMP_REG),
+- bus_to_virt (NCR53c7x0_read32(TEMP_REG)));
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_scheduled
+- case A_int_debug_scheduled:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- printk("scsi%d : new I/O 0x%x (virt 0x%p) scheduled\n",
+- host->host_no, NCR53c7x0_read32(DSA_REG),
+- bus_to_virt(NCR53c7x0_read32(DSA_REG)));
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_idle
+- case A_int_debug_idle:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- printk("scsi%d : idle\n", host->host_no);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_cmd
+- case A_int_debug_cmd:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- printk("scsi%d : command sent\n");
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_dsa_loaded
+- case A_int_debug_dsa_loaded:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- printk("scsi%d : DSA loaded with 0x%x (virt 0x%p)\n", host->host_no,
+- NCR53c7x0_read32(DSA_REG),
+- bus_to_virt(NCR53c7x0_read32(DSA_REG)));
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_reselected
+- case A_int_debug_reselected:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT)) {
+- if ((hostdata->chip / 100) == 8)
+- printk("scsi%d : reselected by target %d lun %d\n",
+- host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & ~0x80,
+- (int) hostdata->reselected_identify & 7);
+- else
+- printk("scsi%d : reselected by LCRC=0x%02x lun %d\n",
+- host->host_no, (int) NCR53c7x0_read8(LCRC_REG_10),
+- (int) hostdata->reselected_identify & 7);
+- print_queues(host);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_disconnect_msg
+- case A_int_debug_disconnect_msg:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+- if (c)
+- printk("scsi%d : target %d lun %d disconnecting\n",
+- host->host_no, c->device->id, c->device->lun);
+- else
+- printk("scsi%d : unknown target disconnecting\n",
+- host->host_no);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_disconnected
+- case A_int_debug_disconnected:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT)) {
+- printk ("scsi%d : disconnected, new queues are\n",
+- host->host_no);
+- print_queues(host);
+-#if 0
+- /* Not valid on ncr53c710! */
+- printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n",
+- host->host_no, NCR53c7x0_read8(SXFER_REG),
+- NCR53c7x0_read8(SCNTL3_REG_800));
+-#endif
+- if (c) {
+- print_insn (host, (u32 *)
+- hostdata->sync[c->device->id].script, "", 1);
+- print_insn (host, (u32 *)
+- hostdata->sync[c->device->id].script + 2, "", 1);
+- }
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_panic
+- case A_int_debug_panic:
+- printk("scsi%d : int_debug_panic received\n", host->host_no);
+- print_lots (host);
+- return SPECIFIC_INT_PANIC;
+-#endif
+-#ifdef A_int_debug_saved
+- case A_int_debug_saved:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT)) {
+- printk ("scsi%d : saved data pointer 0x%x (virt 0x%p)\n",
+- host->host_no, cmd->saved_data_pointer,
+- bus_to_virt (cmd->saved_data_pointer));
+- print_progress (c);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_restored
+- case A_int_debug_restored:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT)) {
+- if (cmd) {
+- int size;
+- printk ("scsi%d : restored data pointer 0x%x (virt 0x%p)\n",
+- host->host_no, cmd->saved_data_pointer, bus_to_virt (
+- cmd->saved_data_pointer));
+- size = print_insn (host, (u32 *)
+- bus_to_virt(cmd->saved_data_pointer), "", 1);
+- size = print_insn (host, (u32 *)
+- bus_to_virt(cmd->saved_data_pointer) + size, "", 1);
+- print_progress (c);
+- }
+-#if 0
+- printk ("scsi%d : datapath residual %d\n",
+- host->host_no, datapath_residual (host)) ;
+-#endif
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_sync
+- case A_int_debug_sync:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) {
+- unsigned char sxfer = NCR53c7x0_read8 (SXFER_REG), scntl3;
+- if ((hostdata->chip / 100) == 8) {
+- scntl3 = NCR53c7x0_read8 (SCNTL3_REG_800);
+- if (c) {
+- if (sxfer != hostdata->sync[c->device->id].sxfer_sanity ||
+- scntl3 != hostdata->sync[c->device->id].scntl3_sanity) {
+- printk ("scsi%d : sync sanity check failed sxfer=0x%x, scntl3=0x%x",
+- host->host_no, sxfer, scntl3);
+- NCR53c7x0_write8 (SXFER_REG, sxfer);
+- NCR53c7x0_write8 (SCNTL3_REG_800, scntl3);
+- }
+- } else
+- printk ("scsi%d : unknown command sxfer=0x%x, scntl3=0x%x\n",
+- host->host_no, (int) sxfer, (int) scntl3);
+- } else {
+- if (c) {
+- if (sxfer != hostdata->sync[c->device->id].sxfer_sanity) {
+- printk ("scsi%d : sync sanity check failed sxfer=0x%x",
+- host->host_no, sxfer);
+- NCR53c7x0_write8 (SXFER_REG, sxfer);
+- NCR53c7x0_write8 (SBCL_REG,
+- hostdata->sync[c->device->id].sscf_710);
+- }
+- } else
+- printk ("scsi%d : unknown command sxfer=0x%x\n",
+- host->host_no, (int) sxfer);
+- }
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_datain
+- case A_int_debug_datain:
+- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+- OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) {
+- int size;
+- if ((hostdata->chip / 100) == 8)
+- printk ("scsi%d : In do_datain (%s) sxfer=0x%x, scntl3=0x%x\n"
+- " datapath residual=%d\n",
+- host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)),
+- (int) NCR53c7x0_read8(SXFER_REG),
+- (int) NCR53c7x0_read8(SCNTL3_REG_800),
+- datapath_residual (host)) ;
+- else
+- printk ("scsi%d : In do_datain (%s) sxfer=0x%x\n"
+- " datapath residual=%d\n",
+- host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)),
+- (int) NCR53c7x0_read8(SXFER_REG),
+- datapath_residual (host)) ;
+- print_insn (host, dsp, "", 1);
+- size = print_insn (host, (u32 *) bus_to_virt(dsp[1]), "", 1);
+- print_insn (host, (u32 *) bus_to_virt(dsp[1]) + size, "", 1);
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_check_dsa
+- case A_int_debug_check_dsa:
+- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) {
+- int sdid;
+- int tmp;
+- char *where;
+- if (hostdata->chip / 100 == 8)
+- sdid = NCR53c7x0_read8 (SDID_REG_800) & 15;
+- else {
+- tmp = NCR53c7x0_read8 (SDID_REG_700);
+- if (!tmp)
+- panic ("SDID_REG_700 = 0");
+- tmp >>= 1;
+- sdid = 0;
+- while (tmp) {
+- tmp >>= 1;
+- sdid++;
+- }
+- }
+- where = dsp - NCR53c7x0_insn_size(NCR53c7x0_read8
+- (DCMD_REG)) == hostdata->script +
+- Ent_select_check_dsa / sizeof(u32) ?
+- "selection" : "reselection";
+- if (c && sdid != c->device->id) {
+- printk ("scsi%d : SDID target %d != DSA target %d at %s\n",
+- host->host_no, sdid, c->device->id, where);
+- print_lots(host);
+- dump_events (host, 20);
+- return SPECIFIC_INT_PANIC;
+- }
+- }
+- return SPECIFIC_INT_RESTART;
+-#endif
+- default:
+- if ((dsps & 0xff000000) == 0x03000000) {
+- printk ("scsi%d : misc debug interrupt 0x%x\n",
+- host->host_no, dsps);
+- return SPECIFIC_INT_RESTART;
+- } else if ((dsps & 0xff000000) == 0x05000000) {
+- if (hostdata->events) {
+- struct NCR53c7x0_event *event;
+- ++hostdata->event_index;
+- if (hostdata->event_index >= hostdata->event_size)
+- hostdata->event_index = 0;
+- event = (struct NCR53c7x0_event *) hostdata->events +
+- hostdata->event_index;
+- event->event = (enum ncr_event) dsps;
+- event->dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) {
+- if (hostdata->chip / 100 == 8)
+- event->target = NCR53c7x0_read8(SSID_REG_800);
+- else {
+- unsigned char tmp, sdid;
+- tmp = NCR53c7x0_read8 (SDID_REG_700);
+- if (!tmp)
+- panic ("SDID_REG_700 = 0");
+- tmp >>= 1;
+- sdid = 0;
+- while (tmp) {
+- tmp >>= 1;
+- sdid++;
+- }
+- event->target = sdid;
+- }
+- }
+- else
+- event->target = 255;
+-
+- if (event->event == EVENT_RESELECT)
+- event->lun = hostdata->reselected_identify & 0xf;
+- else if (c)
+- event->lun = c->device->lun;
+- else
+- event->lun = 255;
+- do_gettimeofday(&(event->time));
+- if (c) {
+- event->pid = c->pid;
+- memcpy ((void *) event->cmnd, (void *) c->cmnd,
+- sizeof (event->cmnd));
+- } else {
+- event->pid = -1;
+- }
+- }
+- return SPECIFIC_INT_RESTART;
+- }
+-
+- printk ("scsi%d : unknown user interrupt 0x%x\n",
+- host->host_no, (unsigned) dsps);
+- return SPECIFIC_INT_PANIC;
+- }
+-}
+-
+-/*
+- * XXX - the stock NCR assembler won't output the scriptu.h file,
+- * which undefine's all #define'd CPP symbols from the script.h
+- * file, which will create problems if you use multiple scripts
+- * with the same symbol names.
+- *
+- * If you insist on using NCR's assembler, you could generate
+- * scriptu.h from script.h using something like
+- *
+- * grep #define script.h | \
+- * sed 's/#define[ ][ ]*\([_a-zA-Z][_a-zA-Z0-9]*\).*$/#undefine \1/' \
+- * > scriptu.h
+- */
+-
+-#include "53c7xx_u.h"
+-
+-/* XXX - add alternate script handling code here */
+-
+-
+-/*
+- * Function : static void NCR537xx_soft_reset (struct Scsi_Host *host)
+- *
+- * Purpose : perform a soft reset of the NCR53c7xx chip
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- * Preconditions : NCR53c7x0_init must have been called for this
+- * host.
+- *
+- */
+-
+-static void
+-NCR53c7x0_soft_reset (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- unsigned long flags;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- NCR53c7x0_local_setup(host);
+-
+- local_irq_save(flags);
+-
+- /* Disable scsi chip and s/w level 7 ints */
+-
+-#ifdef CONFIG_MVME16x
+- if (MACH_IS_MVME16x)
+- {
+- volatile unsigned long v;
+-
+- v = *(volatile unsigned long *)0xfff4006c;
+- v &= ~0x8000;
+- *(volatile unsigned long *)0xfff4006c = v;
+- v = *(volatile unsigned long *)0xfff4202c;
+- v &= ~0x10;
+- *(volatile unsigned long *)0xfff4202c = v;
+- }
+-#endif
+- /* Anything specific for your hardware? */
+-
+- /*
+- * Do a soft reset of the chip so that everything is
+- * reinitialized to the power-on state.
+- *
+- * Basically follow the procedure outlined in the NCR53c700
+- * data manual under Chapter Six, How to Use, Steps Necessary to
+- * Start SCRIPTS, with the exception of actually starting the
+- * script and setting up the synchronous transfer gunk.
+- */
+-
+- /* Should we reset the scsi bus here??????????????????? */
+-
+- NCR53c7x0_write8(ISTAT_REG_700, ISTAT_10_SRST);
+- NCR53c7x0_write8(ISTAT_REG_700, 0);
+-
+- /*
+- * saved_dcntl is set up in NCR53c7x0_init() before it is overwritten
+- * here. We should have some better way of working out the CF bit
+- * setting..
+- */
+-
+- hostdata->saved_dcntl = DCNTL_10_EA|DCNTL_10_COM;
+- if (hostdata->scsi_clock > 50000000)
+- hostdata->saved_dcntl |= DCNTL_700_CF_3;
+- else
+- if (hostdata->scsi_clock > 37500000)
+- hostdata->saved_dcntl |= DCNTL_700_CF_2;
+-#if 0
+- else
+- /* Any clocks less than 37.5MHz? */
+-#endif
+-
+- if (hostdata->options & OPTION_DEBUG_TRACE)
+- NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM);
+- else
+- NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl);
+- /* Following disables snooping - snooping is not required, as non-
+- * cached pages are used for shared data, and appropriate use is
+- * made of cache_push/cache_clear. Indeed, for 68060
+- * enabling snooping causes disk corruption of ext2fs free block
+- * bitmaps and the like. If you have a 68060 with snooping hardwared
+- * on, then you need to enable CONFIG_060_WRITETHROUGH.
+- */
+- NCR53c7x0_write8(CTEST7_REG, CTEST7_10_TT1|CTEST7_STD);
+- /* Actually burst of eight, according to my 53c710 databook */
+- NCR53c7x0_write8(hostdata->dmode, DMODE_10_BL_8 | DMODE_10_FC2);
+- NCR53c7x0_write8(SCID_REG, 1 << host->this_id);
+- NCR53c7x0_write8(SBCL_REG, 0);
+- NCR53c7x0_write8(SCNTL1_REG, SCNTL1_ESR_700);
+- NCR53c7x0_write8(SCNTL0_REG, ((hostdata->options & OPTION_PARITY) ?
+- SCNTL0_EPC : 0) | SCNTL0_EPG_700 | SCNTL0_ARB1 | SCNTL0_ARB2);
+-
+- /*
+- * Enable all interrupts, except parity which we only want when
+- * the user requests it.
+- */
+-
+- NCR53c7x0_write8(DIEN_REG, DIEN_700_BF |
+- DIEN_ABRT | DIEN_SSI | DIEN_SIR | DIEN_700_OPC);
+-
+- NCR53c7x0_write8(SIEN_REG_700, ((hostdata->options & OPTION_PARITY) ?
+- SIEN_PAR : 0) | SIEN_700_STO | SIEN_RST | SIEN_UDC |
+- SIEN_SGE | SIEN_MA);
+-
+-#ifdef CONFIG_MVME16x
+- if (MACH_IS_MVME16x)
+- {
+- volatile unsigned long v;
+-
+- /* Enable scsi chip and s/w level 7 ints */
+- v = *(volatile unsigned long *)0xfff40080;
+- v = (v & ~(0xf << 28)) | (4 << 28);
+- *(volatile unsigned long *)0xfff40080 = v;
+- v = *(volatile unsigned long *)0xfff4006c;
+- v |= 0x8000;
+- *(volatile unsigned long *)0xfff4006c = v;
+- v = *(volatile unsigned long *)0xfff4202c;
+- v = (v & ~0xff) | 0x10 | 4;
+- *(volatile unsigned long *)0xfff4202c = v;
+- }
+-#endif
+- /* Anything needed for your hardware? */
+- local_irq_restore(flags);
+-}
+-
+-
+-/*
+- * Function static struct NCR53c7x0_cmd *allocate_cmd (Scsi_Cmnd *cmd)
+- *
+- * Purpose : Return the first free NCR53c7x0_cmd structure (which are
+- * reused in a LIFO manner to minimize cache thrashing).
+- *
+- * Side effects : If we haven't yet scheduled allocation of NCR53c7x0_cmd
+- * structures for this device, do so. Attempt to complete all scheduled
+- * allocations using get_zeroed_page(), putting NCR53c7x0_cmd structures on
+- * the free list. Teach programmers not to drink and hack.
+- *
+- * Inputs : cmd - SCSI command
+- *
+- * Returns : NCR53c7x0_cmd structure allocated on behalf of cmd;
+- * NULL on failure.
+- */
+-
+-static void
+-my_free_page (void *addr, int dummy)
+-{
+- /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which
+- * XXX may be invalid (CONFIG_060_WRITETHROUGH)
+- */
+- kernel_set_cachemode((void *)addr, 4096, IOMAP_FULL_CACHING);
+- free_page ((u32)addr);
+-}
+-
+-static struct NCR53c7x0_cmd *
+-allocate_cmd (Scsi_Cmnd *cmd) {
+- struct Scsi_Host *host = cmd->device->host;
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- u32 real; /* Real address */
+- int size; /* Size of *tmp */
+- struct NCR53c7x0_cmd *tmp;
+- unsigned long flags;
+-
+- if (hostdata->options & OPTION_DEBUG_ALLOCATION)
+- printk ("scsi%d : num_cmds = %d, can_queue = %d\n"
+- " target = %d, lun = %d, %s\n",
+- host->host_no, hostdata->num_cmds, host->can_queue,
+- cmd->device->id, cmd->device->lun, (hostdata->cmd_allocated[cmd->device->id] &
+- (1 << cmd->device->lun)) ? "already allocated" : "not allocated");
+-
+-/*
+- * If we have not yet reserved commands for this I_T_L nexus, and
+- * the device exists (as indicated by permanent Scsi_Cmnd structures
+- * being allocated under 1.3.x, or being outside of scan_scsis in
+- * 1.2.x), do so now.
+- */
+- if (!(hostdata->cmd_allocated[cmd->device->id] & (1 << cmd->device->lun)) &&
+- cmd->device && cmd->device->has_cmdblocks) {
+- if ((hostdata->extra_allocate + hostdata->num_cmds) < host->can_queue)
+- hostdata->extra_allocate += host->cmd_per_lun;
+- hostdata->cmd_allocated[cmd->device->id] |= (1 << cmd->device->lun);
+- }
+-
+- for (; hostdata->extra_allocate > 0 ; --hostdata->extra_allocate,
+- ++hostdata->num_cmds) {
+- /* historically, kmalloc has returned unaligned addresses; pad so we
+- have enough room to ROUNDUP */
+- size = hostdata->max_cmd_size + sizeof (void *);
+-#ifdef FORCE_DSA_ALIGNMENT
+- /*
+- * 53c710 rev.0 doesn't have an add-with-carry instruction.
+- * Ensure we allocate enough memory to force alignment.
+- */
+- size += 256;
+-#endif
+-/* FIXME: for ISA bus '7xx chips, we need to or GFP_DMA in here */
+-
+- if (size > 4096) {
+- printk (KERN_ERR "53c7xx: allocate_cmd size > 4K\n");
+- return NULL;
+- }
+- real = get_zeroed_page(GFP_ATOMIC);
+- if (real == 0)
+- return NULL;
+- cache_push(virt_to_phys((void *)real), 4096);
+- cache_clear(virt_to_phys((void *)real), 4096);
+- kernel_set_cachemode((void *)real, 4096, IOMAP_NOCACHE_SER);
+- tmp = ROUNDUP(real, void *);
+-#ifdef FORCE_DSA_ALIGNMENT
+- {
+- if (((u32)tmp & 0xff) > CmdPageStart)
+- tmp = (struct NCR53c7x0_cmd *)((u32)tmp + 255);
+- tmp = (struct NCR53c7x0_cmd *)(((u32)tmp & ~0xff) + CmdPageStart);
+-#if 0
+- printk ("scsi: size = %d, real = 0x%08x, tmp set to 0x%08x\n",
+- size, real, (u32)tmp);
+-#endif
+- }
+-#endif
+- tmp->real = (void *)real;
+- tmp->size = size;
+- tmp->free = ((void (*)(void *, int)) my_free_page);
+- local_irq_save(flags);
+- tmp->next = hostdata->free;
+- hostdata->free = tmp;
+- local_irq_restore(flags);
+- }
+- local_irq_save(flags);
+- tmp = (struct NCR53c7x0_cmd *) hostdata->free;
+- if (tmp) {
+- hostdata->free = tmp->next;
+- }
+- local_irq_restore(flags);
+- if (!tmp)
+- printk ("scsi%d : can't allocate command for target %d lun %d\n",
+- host->host_no, cmd->device->id, cmd->device->lun);
+- return tmp;
+-}
+-
+-/*
+- * Function static struct NCR53c7x0_cmd *create_cmd (Scsi_Cmnd *cmd)
+- *
+- *
+- * Purpose : allocate a NCR53c7x0_cmd structure, initialize it based on the
+- * Scsi_Cmnd structure passed in cmd, including dsa and Linux field
+- * initialization, and dsa code relocation.
+- *
+- * Inputs : cmd - SCSI command
+- *
+- * Returns : NCR53c7x0_cmd structure corresponding to cmd,
+- * NULL on failure.
+- */
+-static struct NCR53c7x0_cmd *
+-create_cmd (Scsi_Cmnd *cmd) {
+- NCR53c7x0_local_declare();
+- struct Scsi_Host *host = cmd->device->host;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- struct NCR53c7x0_cmd *tmp; /* NCR53c7x0_cmd structure for this command */
+- int datain, /* Number of instructions per phase */
+- dataout;
+- int data_transfer_instructions, /* Count of dynamic instructions */
+- i; /* Counter */
+- u32 *cmd_datain, /* Address of datain/dataout code */
+- *cmd_dataout; /* Incremented as we assemble */
+-#ifdef notyet
+- unsigned char *msgptr; /* Current byte in select message */
+- int msglen; /* Length of whole select message */
+-#endif
+- unsigned long flags;
+- u32 exp_select_indirect; /* Used in sanity check */
+- NCR53c7x0_local_setup(cmd->device->host);
+-
+- if (!(tmp = allocate_cmd (cmd)))
+- return NULL;
+-
+- /*
+- * Copy CDB and initialised result fields from Scsi_Cmnd to NCR53c7x0_cmd.
+- * We do this because NCR53c7x0_cmd may have a special cache mode
+- * selected to cope with lack of bus snooping, etc.
+- */
+-
+- memcpy(tmp->cmnd, cmd->cmnd, 12);
+- tmp->result = cmd->result;
+-
+- /*
+- * Decide whether we need to generate commands for DATA IN,
+- * DATA OUT, neither, or both based on the SCSI command
+- */
+-
+- switch (cmd->cmnd[0]) {
+- /* These commands do DATA IN */
+- case INQUIRY:
+- case MODE_SENSE:
+- case READ_6:
+- case READ_10:
+- case READ_CAPACITY:
+- case REQUEST_SENSE:
+- case READ_BLOCK_LIMITS:
+- case READ_TOC:
+- datain = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+- dataout = 0;
+- break;
+- /* These commands do DATA OUT */
+- case MODE_SELECT:
+- case WRITE_6:
+- case WRITE_10:
+-#if 0
+- printk("scsi%d : command is ", host->host_no);
+- __scsi_print_command(cmd->cmnd);
+-#endif
+-#if 0
+- printk ("scsi%d : %d scatter/gather segments\n", host->host_no,
+- cmd->use_sg);
+-#endif
+- datain = 0;
+- dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+-#if 0
+- hostdata->options |= OPTION_DEBUG_INTR;
+-#endif
+- break;
+- /*
+- * These commands do no data transfer, we should force an
+- * interrupt if a data phase is attempted on them.
+- */
+- case TEST_UNIT_READY:
+- case ALLOW_MEDIUM_REMOVAL:
+- case START_STOP:
+- datain = dataout = 0;
+- break;
+- /*
+- * We don't know about these commands, so generate code to handle
+- * both DATA IN and DATA OUT phases. More efficient to identify them
+- * and add them to the above cases.
+- */
+- default:
+- printk("scsi%d : datain+dataout for command ", host->host_no);
+- __scsi_print_command(cmd->cmnd);
+- datain = dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+- }
+-
+- /*
+- * New code : so that active pointers work correctly regardless
+- * of where the saved data pointer is at, we want to immediately
+- * enter the dynamic code after selection, and on a non-data
+- * phase perform a CALL to the non-data phase handler, with
+- * returns back to this address.
+- *
+- * If a phase mismatch is encountered in the middle of a
+- * Block MOVE instruction, we want to _leave_ that instruction
+- * unchanged as the current case is, modify a temporary buffer,
+- * and point the active pointer (TEMP) at that.
+- *
+- * Furthermore, we want to implement a saved data pointer,
+- * set by the SAVE_DATA_POINTERs message.
+- *
+- * So, the data transfer segments will change to
+- * CALL data_transfer, WHEN NOT data phase
+- * MOVE x, x, WHEN data phase
+- * ( repeat )
+- * JUMP other_transfer
+- */
+-
+- data_transfer_instructions = datain + dataout;
+-
+- /*
+- * When we perform a request sense, we overwrite various things,
+- * including the data transfer code. Make sure we have enough
+- * space to do that.
+- */
+-
+- if (data_transfer_instructions < 2)
+- data_transfer_instructions = 2;
+-
+-
+- /*
+- * The saved data pointer is set up so that a RESTORE POINTERS message
+- * will start the data transfer over at the beginning.
+- */
+-
+- tmp->saved_data_pointer = virt_to_bus (hostdata->script) +
+- hostdata->E_data_transfer;
+-
+- /*
+- * Initialize Linux specific fields.
+- */
+-
+- tmp->cmd = cmd;
+- tmp->next = NULL;
+- tmp->flags = 0;
+- tmp->dsa_next_addr = virt_to_bus(tmp->dsa) + hostdata->dsa_next -
+- hostdata->dsa_start;
+- tmp->dsa_addr = virt_to_bus(tmp->dsa) - hostdata->dsa_start;
+-
+- /*
+- * Calculate addresses of dynamic code to fill in DSA
+- */
+-
+- tmp->data_transfer_start = tmp->dsa + (hostdata->dsa_end -
+- hostdata->dsa_start) / sizeof(u32);
+- tmp->data_transfer_end = tmp->data_transfer_start +
+- 2 * data_transfer_instructions;
+-
+- cmd_datain = datain ? tmp->data_transfer_start : NULL;
+- cmd_dataout = dataout ? (datain ? cmd_datain + 2 * datain : tmp->
+- data_transfer_start) : NULL;
+-
+- /*
+- * Fill in the NCR53c7x0_cmd structure as follows
+- * dsa, with fixed up DSA code
+- * datain code
+- * dataout code
+- */
+-
+- /* Copy template code into dsa and perform all necessary fixups */
+- if (hostdata->dsa_fixup)
+- hostdata->dsa_fixup(tmp);
+-
+- patch_dsa_32(tmp->dsa, dsa_next, 0, 0);
+- /*
+- * XXX is this giving 53c710 access to the Scsi_Cmnd in some way?
+- * Do we need to change it for caching reasons?
+- */
+- patch_dsa_32(tmp->dsa, dsa_cmnd, 0, virt_to_bus(cmd));
+-
+- if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) {
+-
+- exp_select_indirect = ((1 << cmd->device->id) << 16) |
+- (hostdata->sync[cmd->device->id].sxfer_sanity << 8);
+-
+- if (hostdata->sync[cmd->device->id].select_indirect !=
+- exp_select_indirect) {
+- printk ("scsi%d : sanity check failed select_indirect=0x%x\n",
+- host->host_no, hostdata->sync[cmd->device->id].select_indirect);
+- FATAL(host);
+-
+- }
+- }
+-
+- patch_dsa_32(tmp->dsa, dsa_select, 0,
+- hostdata->sync[cmd->device->id].select_indirect);
+-
+- /*
+- * Right now, we'll do the WIDE and SYNCHRONOUS negotiations on
+- * different commands; although it should be trivial to do them
+- * both at the same time.
+- */
+- if (hostdata->initiate_wdtr & (1 << cmd->device->id)) {
+- memcpy ((void *) (tmp->select + 1), (void *) wdtr_message,
+- sizeof(wdtr_message));
+- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(wdtr_message));
+- local_irq_save(flags);
+- hostdata->initiate_wdtr &= ~(1 << cmd->device->id);
+- local_irq_restore(flags);
+- } else if (hostdata->initiate_sdtr & (1 << cmd->device->id)) {
+- memcpy ((void *) (tmp->select + 1), (void *) sdtr_message,
+- sizeof(sdtr_message));
+- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(sdtr_message));
+- tmp->flags |= CMD_FLAG_SDTR;
+- local_irq_save(flags);
+- hostdata->initiate_sdtr &= ~(1 << cmd->device->id);
+- local_irq_restore(flags);
+-
+- }
+-#if 1
+- else if (!(hostdata->talked_to & (1 << cmd->device->id)) &&
+- !(hostdata->options & OPTION_NO_ASYNC)) {
+-
+- memcpy ((void *) (tmp->select + 1), (void *) async_message,
+- sizeof(async_message));
+- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(async_message));
+- tmp->flags |= CMD_FLAG_SDTR;
+- }
+-#endif
+- else
+- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1);
+-
+- hostdata->talked_to |= (1 << cmd->device->id);
+- tmp->select[0] = (hostdata->options & OPTION_DISCONNECT) ?
+- IDENTIFY (1, cmd->device->lun) : IDENTIFY (0, cmd->device->lun);
+- patch_dsa_32(tmp->dsa, dsa_msgout, 1, virt_to_bus(tmp->select));
+- patch_dsa_32(tmp->dsa, dsa_cmdout, 0, cmd->cmd_len);
+- patch_dsa_32(tmp->dsa, dsa_cmdout, 1, virt_to_bus(tmp->cmnd));
+- patch_dsa_32(tmp->dsa, dsa_dataout, 0, cmd_dataout ?
+- virt_to_bus (cmd_dataout)
+- : virt_to_bus (hostdata->script) + hostdata->E_other_transfer);
+- patch_dsa_32(tmp->dsa, dsa_datain, 0, cmd_datain ?
+- virt_to_bus (cmd_datain)
+- : virt_to_bus (hostdata->script) + hostdata->E_other_transfer);
+- /*
+- * XXX - need to make endian aware, should use separate variables
+- * for both status and message bytes.
+- */
+- patch_dsa_32(tmp->dsa, dsa_msgin, 0, 1);
+-/*
+- * FIXME : these only works for little endian. We probably want to
+- * provide message and status fields in the NCR53c7x0_cmd
+- * structure, and assign them to cmd->result when we're done.
+- */
+-#ifdef BIG_ENDIAN
+- patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 2);
+- patch_dsa_32(tmp->dsa, dsa_status, 0, 1);
+- patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result) + 3);
+-#else
+- patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 1);
+- patch_dsa_32(tmp->dsa, dsa_status, 0, 1);
+- patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result));
+-#endif
+- patch_dsa_32(tmp->dsa, dsa_msgout_other, 0, 1);
+- patch_dsa_32(tmp->dsa, dsa_msgout_other, 1,
+- virt_to_bus(&(hostdata->NCR53c7xx_msg_nop)));
+-
+- /*
+- * Generate code for zero or more of the DATA IN, DATA OUT phases
+- * in the format
+- *
+- * CALL data_transfer, WHEN NOT phase
+- * MOVE first buffer length, first buffer address, WHEN phase
+- * ...
+- * MOVE last buffer length, last buffer address, WHEN phase
+- * JUMP other_transfer
+- */
+-
+-/*
+- * See if we're getting to data transfer by generating an unconditional
+- * interrupt.
+- */
+-#if 0
+- if (datain) {
+- cmd_datain[0] = 0x98080000;
+- cmd_datain[1] = 0x03ffd00d;
+- cmd_datain += 2;
+- }
+-#endif
+-
+-/*
+- * XXX - I'm undecided whether all of this nonsense is faster
+- * in the long run, or whether I should just go and implement a loop
+- * on the NCR chip using table indirect mode?
+- *
+- * In any case, this is how it _must_ be done for 53c700/700-66 chips,
+- * so this stays even when we come up with something better.
+- *
+- * When we're limited to 1 simultaneous command, no overlapping processing,
+- * we're seeing 630K/sec, with 7% CPU usage on a slow Syquest 45M
+- * drive.
+- *
+- * Not bad, not good. We'll see.
+- */
+-
+- tmp->bounce.len = 0; /* Assume aligned buffer */
+-
+- for (i = 0; cmd->use_sg ? (i < cmd->use_sg) : !i; cmd_datain += 4,
+- cmd_dataout += 4, ++i) {
+- u32 vbuf = cmd->use_sg
+- ? (u32)page_address(((struct scatterlist *)cmd->request_buffer)[i].page)+
+- ((struct scatterlist *)cmd->request_buffer)[i].offset
+- : (u32)(cmd->request_buffer);
+- u32 bbuf = virt_to_bus((void *)vbuf);
+- u32 count = cmd->use_sg ?
+- ((struct scatterlist *)cmd->request_buffer)[i].length :
+- cmd->request_bufflen;
+-
+- /*
+- * If we have buffers which are not aligned with 16 byte cache
+- * lines, then we just hope nothing accesses the other parts of
+- * those cache lines while the transfer is in progress. That would
+- * fill the cache, and subsequent reads of the dma data would pick
+- * up the wrong thing.
+- * XXX We need a bounce buffer to handle that correctly.
+- */
+-
+- if (((bbuf & 15) || (count & 15)) && (datain || dataout))
+- {
+- /* Bounce buffer needed */
+- if (cmd->use_sg)
+- printk ("53c7xx: Non-aligned buffer with use_sg\n");
+- else if (datain && dataout)
+- printk ("53c7xx: Non-aligned buffer with datain && dataout\n");
+- else if (count > 256)
+- printk ("53c7xx: Non-aligned transfer > 256 bytes\n");
+- else
+- {
+- if (datain)
+- {
+- tmp->bounce.len = count;
+- tmp->bounce.addr = vbuf;
+- bbuf = virt_to_bus(tmp->bounce.buf);
+- tmp->bounce.buf[0] = 0xff;
+- tmp->bounce.buf[1] = 0xfe;
+- tmp->bounce.buf[2] = 0xfd;
+- tmp->bounce.buf[3] = 0xfc;
+- }
+- if (dataout)
+- {
+- memcpy ((void *)tmp->bounce.buf, (void *)vbuf, count);
+- bbuf = virt_to_bus(tmp->bounce.buf);
+- }
+- }
+- }
+-
+- if (datain) {
+- cache_clear(virt_to_phys((void *)vbuf), count);
+- /* CALL other_in, WHEN NOT DATA_IN */
+- cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL |
+- DCMD_TCI_IO) << 24) |
+- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+- cmd_datain[1] = virt_to_bus (hostdata->script) +
+- hostdata->E_other_in;
+- /* MOVE count, buf, WHEN DATA_IN */
+- cmd_datain[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | DCMD_BMI_IO)
+- << 24) | count;
+- cmd_datain[3] = bbuf;
+-#if 0
+- print_insn (host, cmd_datain, "dynamic ", 1);
+- print_insn (host, cmd_datain + 2, "dynamic ", 1);
+-#endif
+- }
+- if (dataout) {
+- cache_push(virt_to_phys((void *)vbuf), count);
+- /* CALL other_out, WHEN NOT DATA_OUT */
+- cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL) << 24) |
+- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+- cmd_dataout[1] = virt_to_bus(hostdata->script) +
+- hostdata->E_other_out;
+- /* MOVE count, buf, WHEN DATA+OUT */
+- cmd_dataout[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I) << 24)
+- | count;
+- cmd_dataout[3] = bbuf;
+-#if 0
+- print_insn (host, cmd_dataout, "dynamic ", 1);
+- print_insn (host, cmd_dataout + 2, "dynamic ", 1);
+-#endif
+- }
+- }
+-
+- /*
+- * Install JUMP instructions after the data transfer routines to return
+- * control to the do_other_transfer routines.
+- */
+-
+-
+- if (datain) {
+- cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) |
+- DBC_TCI_TRUE;
+- cmd_datain[1] = virt_to_bus(hostdata->script) +
+- hostdata->E_other_transfer;
+-#if 0
+- print_insn (host, cmd_datain, "dynamic jump ", 1);
+-#endif
+- cmd_datain += 2;
+- }
+-#if 0
+- if (datain) {
+- cmd_datain[0] = 0x98080000;
+- cmd_datain[1] = 0x03ffdeed;
+- cmd_datain += 2;
+- }
+-#endif
+- if (dataout) {
+- cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) |
+- DBC_TCI_TRUE;
+- cmd_dataout[1] = virt_to_bus(hostdata->script) +
+- hostdata->E_other_transfer;
+-#if 0
+- print_insn (host, cmd_dataout, "dynamic jump ", 1);
+-#endif
+- cmd_dataout += 2;
+- }
+-
+- return tmp;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_queue_command (Scsi_Cmnd *cmd,
+- * void (*done)(Scsi_Cmnd *))
+- *
+- * Purpose : enqueues a SCSI command
+- *
+- * Inputs : cmd - SCSI command, done - function called on completion, with
+- * a pointer to the command descriptor.
+- *
+- * Returns : 0
+- *
+- * Side effects :
+- * cmd is added to the per instance driver issue_queue, with major
+- * twiddling done to the host specific fields of cmd. If the
+- * process_issue_queue coroutine isn't running, it is restarted.
+- *
+- * NOTE : we use the host_scribble field of the Scsi_Cmnd structure to
+- * hold our own data, and pervert the ptr field of the SCp field
+- * to create a linked list.
+- */
+-
+-int
+-NCR53c7xx_queue_command (Scsi_Cmnd *cmd, void (* done)(Scsi_Cmnd *)) {
+- struct Scsi_Host *host = cmd->device->host;
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- unsigned long flags;
+- Scsi_Cmnd *tmp;
+-
+- cmd->scsi_done = done;
+- cmd->host_scribble = NULL;
+- cmd->SCp.ptr = NULL;
+- cmd->SCp.buffer = NULL;
+-
+-#ifdef VALID_IDS
+- /* Ignore commands on invalid IDs */
+- if (!hostdata->valid_ids[cmd->device->id]) {
+- printk("scsi%d : ignoring target %d lun %d\n", host->host_no,
+- cmd->device->id, cmd->device->lun);
+- cmd->result = (DID_BAD_TARGET << 16);
+- done(cmd);
+- return 0;
+- }
+-#endif
+-
+- local_irq_save(flags);
+- if ((hostdata->options & (OPTION_DEBUG_INIT_ONLY|OPTION_DEBUG_PROBE_ONLY))
+- || ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) &&
+- !(hostdata->debug_lun_limit[cmd->device->id] & (1 << cmd->device->lun)))
+-#ifdef LINUX_1_2
+- || cmd->device->id > 7
+-#else
+- || cmd->device->id >= host->max_id
+-#endif
+- || cmd->device->id == host->this_id
+- || hostdata->state == STATE_DISABLED) {
+- printk("scsi%d : disabled or bad target %d lun %d\n", host->host_no,
+- cmd->device->id, cmd->device->lun);
+- cmd->result = (DID_BAD_TARGET << 16);
+- done(cmd);
+- local_irq_restore(flags);
+- return 0;
+- }
+-
+- if ((hostdata->options & OPTION_DEBUG_NCOMMANDS_LIMIT) &&
+- (hostdata->debug_count_limit == 0)) {
+- printk("scsi%d : maximum commands exceeded\n", host->host_no);
+- cmd->result = (DID_BAD_TARGET << 16);
+- done(cmd);
+- local_irq_restore(flags);
+- return 0;
+- }
+-
+- if (hostdata->options & OPTION_DEBUG_READ_ONLY) {
+- switch (cmd->cmnd[0]) {
+- case WRITE_6:
+- case WRITE_10:
+- printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n",
+- host->host_no);
+- cmd->result = (DID_BAD_TARGET << 16);
+- done(cmd);
+- local_irq_restore(flags);
+- return 0;
+- }
+- }
+-
+- if ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) &&
+- hostdata->debug_count_limit != -1)
+- --hostdata->debug_count_limit;
+-
+- cmd->result = 0xffff; /* The NCR will overwrite message
+- and status with valid data */
+- cmd->host_scribble = (unsigned char *) tmp = create_cmd (cmd);
+-
+- /*
+- * REQUEST SENSE commands are inserted at the head of the queue
+- * so that we do not clear the contingent allegiance condition
+- * they may be looking at.
+- */
+-
+- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
+- cmd->SCp.ptr = (unsigned char *) hostdata->issue_queue;
+- hostdata->issue_queue = cmd;
+- } else {
+- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp->SCp.ptr;
+- tmp = (Scsi_Cmnd *) tmp->SCp.ptr);
+- tmp->SCp.ptr = (unsigned char *) cmd;
+- }
+- local_irq_restore(flags);
+- run_process_issue_queue();
+- return 0;
+-}
+-
+-/*
+- * Function : void to_schedule_list (struct Scsi_Host *host,
+- * struct NCR53c7x0_hostdata * hostdata, Scsi_Cmnd *cmd)
+- *
+- * Purpose : takes a SCSI command which was just removed from the
+- * issue queue, and deals with it by inserting it in the first
+- * free slot in the schedule list or by terminating it immediately.
+- *
+- * Inputs :
+- * host - SCSI host adapter; hostdata - hostdata structure for
+- * this adapter; cmd - a pointer to the command; should have
+- * the host_scribble field initialized to point to a valid
+- *
+- * Side effects :
+- * cmd is added to the per instance schedule list, with minor
+- * twiddling done to the host specific fields of cmd.
+- *
+- */
+-
+-static __inline__ void
+-to_schedule_list (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata,
+- struct NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- Scsi_Cmnd *tmp = cmd->cmd;
+- unsigned long flags;
+- /* dsa start is negative, so subtraction is used */
+- volatile u32 *ncrcurrent;
+-
+- int i;
+- NCR53c7x0_local_setup(host);
+-#if 0
+- printk("scsi%d : new dsa is 0x%lx (virt 0x%p)\n", host->host_no,
+- virt_to_bus(hostdata->dsa), hostdata->dsa);
+-#endif
+-
+- local_irq_save(flags);
+-
+- /*
+- * Work around race condition : if an interrupt fired and we
+- * got disabled forget about this command.
+- */
+-
+- if (hostdata->state == STATE_DISABLED) {
+- printk("scsi%d : driver disabled\n", host->host_no);
+- tmp->result = (DID_BAD_TARGET << 16);
+- cmd->next = (struct NCR53c7x0_cmd *) hostdata->free;
+- hostdata->free = cmd;
+- tmp->scsi_done(tmp);
+- local_irq_restore(flags);
+- return;
+- }
+-
+- for (i = host->can_queue, ncrcurrent = hostdata->schedule;
+- i > 0 && ncrcurrent[0] != hostdata->NOP_insn;
+- --i, ncrcurrent += 2 /* JUMP instructions are two words */);
+-
+- if (i > 0) {
+- ++hostdata->busy[tmp->device->id][tmp->device->lun];
+- cmd->next = hostdata->running_list;
+- hostdata->running_list = cmd;
+-
+- /* Restore this instruction to a NOP once the command starts */
+- cmd->dsa [(hostdata->dsa_jump_dest - hostdata->dsa_start) /
+- sizeof(u32)] = (u32) virt_to_bus ((void *)ncrcurrent);
+- /* Replace the current jump operand. */
+- ncrcurrent[1] =
+- virt_to_bus ((void *) cmd->dsa) + hostdata->E_dsa_code_begin -
+- hostdata->E_dsa_code_template;
+- /* Replace the NOP instruction with a JUMP */
+- ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) |
+- DBC_TCI_TRUE;
+- } else {
+- printk ("scsi%d: no free slot\n", host->host_no);
+- disable(host);
+- tmp->result = (DID_ERROR << 16);
+- cmd->next = (struct NCR53c7x0_cmd *) hostdata->free;
+- hostdata->free = cmd;
+- tmp->scsi_done(tmp);
+- local_irq_restore(flags);
+- return;
+- }
+-
+- /*
+- * If the NCR chip is in an idle state, start it running the scheduler
+- * immediately. Otherwise, signal the chip to jump to schedule as
+- * soon as it is idle.
+- */
+-
+- if (hostdata->idle) {
+- hostdata->idle = 0;
+- hostdata->state = STATE_RUNNING;
+- NCR53c7x0_write32 (DSP_REG, virt_to_bus ((void *)hostdata->schedule));
+- if (hostdata->options & OPTION_DEBUG_TRACE)
+- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl |
+- DCNTL_SSM | DCNTL_STD);
+- } else {
+- NCR53c7x0_write8(hostdata->istat, ISTAT_10_SIGP);
+- }
+-
+- local_irq_restore(flags);
+-}
+-
+-/*
+- * Function : busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata
+- * *hostdata, Scsi_Cmnd *cmd)
+- *
+- * Purpose : decide if we can pass the given SCSI command on to the
+- * device in question or not.
+- *
+- * Returns : non-zero when we're busy, 0 when we aren't.
+- */
+-
+-static __inline__ int
+-busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata,
+- Scsi_Cmnd *cmd) {
+- /* FIXME : in the future, this needs to accommodate SCSI-II tagged
+- queuing, and we may be able to play with fairness here a bit.
+- */
+- return hostdata->busy[cmd->device->id][cmd->device->lun];
+-}
+-
+-/*
+- * Function : process_issue_queue (void)
+- *
+- * Purpose : transfer commands from the issue queue to NCR start queue
+- * of each NCR53c7/8xx in the system, avoiding kernel stack
+- * overflows when the scsi_done() function is invoked recursively.
+- *
+- * NOTE : process_issue_queue exits with interrupts *disabled*, so the
+- * caller must reenable them if it desires.
+- *
+- * NOTE : process_issue_queue should be called from both
+- * NCR53c7x0_queue_command() and from the interrupt handler
+- * after command completion in case NCR53c7x0_queue_command()
+- * isn't invoked again but we've freed up resources that are
+- * needed.
+- */
+-
+-static void
+-process_issue_queue (unsigned long flags) {
+- Scsi_Cmnd *tmp, *prev;
+- struct Scsi_Host *host;
+- struct NCR53c7x0_hostdata *hostdata;
+- int done;
+-
+- /*
+- * We run (with interrupts disabled) until we're sure that none of
+- * the host adapters have anything that can be done, at which point
+- * we set process_issue_queue_running to 0 and exit.
+- *
+- * Interrupts are enabled before doing various other internal
+- * instructions, after we've decided that we need to run through
+- * the loop again.
+- *
+- */
+-
+- do {
+- local_irq_disable(); /* Freeze request queues */
+- done = 1;
+- for (host = first_host; host && host->hostt == the_template;
+- host = host->next) {
+- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- local_irq_disable();
+- if (hostdata->issue_queue) {
+- if (hostdata->state == STATE_DISABLED) {
+- tmp = (Scsi_Cmnd *) hostdata->issue_queue;
+- hostdata->issue_queue = (Scsi_Cmnd *) tmp->SCp.ptr;
+- tmp->result = (DID_BAD_TARGET << 16);
+- if (tmp->host_scribble) {
+- ((struct NCR53c7x0_cmd *)tmp->host_scribble)->next =
+- hostdata->free;
+- hostdata->free =
+- (struct NCR53c7x0_cmd *)tmp->host_scribble;
+- tmp->host_scribble = NULL;
+- }
+- tmp->scsi_done (tmp);
+- done = 0;
+- } else
+- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue,
+- prev = NULL; tmp; prev = tmp, tmp = (Scsi_Cmnd *)
+- tmp->SCp.ptr)
+- if (!tmp->host_scribble ||
+- !busyp (host, hostdata, tmp)) {
+- if (prev)
+- prev->SCp.ptr = tmp->SCp.ptr;
+- else
+- hostdata->issue_queue = (Scsi_Cmnd *)
+- tmp->SCp.ptr;
+- tmp->SCp.ptr = NULL;
+- if (tmp->host_scribble) {
+- if (hostdata->options & OPTION_DEBUG_QUEUES)
+- printk ("scsi%d : moving command for target %d lun %d to start list\n",
+- host->host_no, tmp->device->id, tmp->device->lun);
+-
+-
+- to_schedule_list (host, hostdata,
+- (struct NCR53c7x0_cmd *)
+- tmp->host_scribble);
+- } else {
+- if (((tmp->result & 0xff) == 0xff) ||
+- ((tmp->result & 0xff00) == 0xff00)) {
+- printk ("scsi%d : danger Will Robinson!\n",
+- host->host_no);
+- tmp->result = DID_ERROR << 16;
+- disable (host);
+- }
+- tmp->scsi_done(tmp);
+- }
+- done = 0;
+- } /* if target/lun is not busy */
+- } /* if hostdata->issue_queue */
+- if (!done)
+- local_irq_restore(flags);
+- } /* for host */
+- } while (!done);
+- process_issue_queue_running = 0;
+-}
+-
+-/*
+- * Function : static void intr_scsi (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle all SCSI interrupts, indicated by the setting
+- * of the SIP bit in the ISTAT register.
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * may be NULL.
+- */
+-
+-static void
+-intr_scsi (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- unsigned char sstat0_sist0, sist1, /* Registers */
+- fatal; /* Did a fatal interrupt
+- occur ? */
+-
+- NCR53c7x0_local_setup(host);
+-
+- fatal = 0;
+-
+- sstat0_sist0 = NCR53c7x0_read8(SSTAT0_REG);
+- sist1 = 0;
+-
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : SIST0 0x%0x, SIST1 0x%0x\n", host->host_no,
+- sstat0_sist0, sist1);
+-
+- /* 250ms selection timeout */
+- if (sstat0_sist0 & SSTAT0_700_STO) {
+- fatal = 1;
+- if (hostdata->options & OPTION_DEBUG_INTR) {
+- printk ("scsi%d : Selection Timeout\n", host->host_no);
+- if (cmd) {
+- printk("scsi%d : target %d, lun %d, command ",
+- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+- __scsi_print_command (cmd->cmd->cmnd);
+- printk("scsi%d : dsp = 0x%x (virt 0x%p)\n", host->host_no,
+- NCR53c7x0_read32(DSP_REG),
+- bus_to_virt(NCR53c7x0_read32(DSP_REG)));
+- } else {
+- printk("scsi%d : no command\n", host->host_no);
+- }
+- }
+-/*
+- * XXX - question : how do we want to handle the Illegal Instruction
+- * interrupt, which may occur before or after the Selection Timeout
+- * interrupt?
+- */
+-
+- if (1) {
+- hostdata->idle = 1;
+- hostdata->expecting_sto = 0;
+-
+- if (hostdata->test_running) {
+- hostdata->test_running = 0;
+- hostdata->test_completed = 3;
+- } else if (cmd) {
+- abnormal_finished(cmd, DID_BAD_TARGET << 16);
+- }
+-#if 0
+- hostdata->intrs = 0;
+-#endif
+- }
+- }
+-
+-/*
+- * FIXME : in theory, we can also get a UDC when a STO occurs.
+- */
+- if (sstat0_sist0 & SSTAT0_UDC) {
+- fatal = 1;
+- if (cmd) {
+- printk("scsi%d : target %d lun %d unexpected disconnect\n",
+- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+- print_lots (host);
+- abnormal_finished(cmd, DID_ERROR << 16);
+- } else
+- printk("scsi%d : unexpected disconnect (no command)\n",
+- host->host_no);
+-
+- hostdata->dsp = (u32 *) hostdata->schedule;
+- hostdata->dsp_changed = 1;
+- }
+-
+- /* SCSI PARITY error */
+- if (sstat0_sist0 & SSTAT0_PAR) {
+- fatal = 1;
+- if (cmd && cmd->cmd) {
+- printk("scsi%d : target %d lun %d parity error.\n",
+- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+- abnormal_finished (cmd, DID_PARITY << 16);
+- } else
+- printk("scsi%d : parity error\n", host->host_no);
+- /* Should send message out, parity error */
+-
+- /* XXX - Reduce synchronous transfer rate! */
+- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- /* SCSI GROSS error */
+- }
+-
+- if (sstat0_sist0 & SSTAT0_SGE) {
+- fatal = 1;
+- printk("scsi%d : gross error, saved2_dsa = 0x%x\n", host->host_no,
+- (unsigned int)hostdata->saved2_dsa);
+- print_lots (host);
+-
+- /*
+- * A SCSI gross error may occur when we have
+- *
+- * - A synchronous offset which causes the SCSI FIFO to be overwritten.
+- *
+- * - A REQ which causes the maximum synchronous offset programmed in
+- * the SXFER register to be exceeded.
+- *
+- * - A phase change with an outstanding synchronous offset.
+- *
+- * - Residual data in the synchronous data FIFO, with a transfer
+- * other than a synchronous receive is started.$#
+- */
+-
+-
+- /* XXX Should deduce synchronous transfer rate! */
+- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- /* Phase mismatch */
+- }
+-
+- if (sstat0_sist0 & SSTAT0_MA) {
+- fatal = 1;
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : SSTAT0_MA\n", host->host_no);
+- intr_phase_mismatch (host, cmd);
+- }
+-
+-#if 0
+- if (sstat0_sist0 & SIST0_800_RSL)
+- printk ("scsi%d : Oh no Mr. Bill!\n", host->host_no);
+-#endif
+-
+-/*
+- * If a fatal SCSI interrupt occurs, we must insure that the DMA and
+- * SCSI FIFOs were flushed.
+- */
+-
+- if (fatal) {
+- if (!hostdata->dstat_valid) {
+- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+- hostdata->dstat_valid = 1;
+- }
+-
+- if (!(hostdata->dstat & DSTAT_DFE)) {
+- printk ("scsi%d : DMA FIFO not empty\n", host->host_no);
+- /*
+- * Really need to check this code for 710 RGH.
+- * Havn't seen any problems, but maybe we should FLUSH before
+- * clearing sometimes.
+- */
+- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF)
+- ;
+- hostdata->dstat |= DSTAT_DFE;
+- }
+- }
+-}
+-
+-#ifdef CYCLIC_TRACE
+-
+-/*
+- * The following implements a cyclic log of instructions executed, if you turn
+- * TRACE on. It will also print the log for you. Very useful when debugging
+- * 53c710 support, possibly not really needed any more.
+- */
+-
+-u32 insn_log[4096];
+-u32 insn_log_index = 0;
+-
+-void log1 (u32 i)
+-{
+- insn_log[insn_log_index++] = i;
+- if (insn_log_index == 4096)
+- insn_log_index = 0;
+-}
+-
+-void log_insn (u32 *ip)
+-{
+- log1 ((u32)ip);
+- log1 (*ip);
+- log1 (*(ip+1));
+- if (((*ip >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI)
+- log1 (*(ip+2));
+-}
+-
+-void dump_log(void)
+-{
+- int cnt = 0;
+- int i = insn_log_index;
+- int size;
+- struct Scsi_Host *host = first_host;
+-
+- while (cnt < 4096) {
+- printk ("%08x (+%6x): ", insn_log[i], (insn_log[i] - (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4);
+- if (++i == 4096)
+- i = 0;
+- cnt++;
+- if (((insn_log[i] >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI)
+- size = 3;
+- else
+- size = 2;
+- while (size--) {
+- printk ("%08x ", insn_log[i]);
+- if (++i == 4096)
+- i = 0;
+- cnt++;
+- }
+- printk ("\n");
+- }
+-}
+-#endif
+-
+-
+-/*
+- * Function : static void NCR53c7x0_intfly (struct Scsi_Host *host)
+- *
+- * Purpose : Scan command queue for specified host, looking for completed
+- * commands.
+- *
+- * Inputs : Scsi_Host pointer.
+- *
+- * This is called from the interrupt handler, when a simulated INTFLY
+- * interrupt occurs.
+- */
+-
+-static void
+-NCR53c7x0_intfly (struct Scsi_Host *host)
+-{
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */
+- struct NCR53c7x0_cmd *cmd, /* command which halted */
+- **cmd_prev_ptr;
+- unsigned long flags;
+- char search_found = 0; /* Got at least one ? */
+-
+- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- NCR53c7x0_local_setup(host);
+-
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : INTFLY\n", host->host_no);
+-
+- /*
+- * Traverse our list of running commands, and look
+- * for those with valid (non-0xff ff) status and message
+- * bytes encoded in the result which signify command
+- * completion.
+- */
+-
+- local_irq_save(flags);
+-restart:
+- for (cmd_prev_ptr = (struct NCR53c7x0_cmd **)&(hostdata->running_list),
+- cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; cmd ;
+- cmd_prev_ptr = (struct NCR53c7x0_cmd **) &(cmd->next),
+- cmd = (struct NCR53c7x0_cmd *) cmd->next)
+- {
+- Scsi_Cmnd *tmp;
+-
+- if (!cmd) {
+- printk("scsi%d : very weird.\n", host->host_no);
+- break;
+- }
+-
+- if (!(tmp = cmd->cmd)) {
+- printk("scsi%d : weird. NCR53c7x0_cmd has no Scsi_Cmnd\n",
+- host->host_no);
+- continue;
+- }
+- /* Copy the result over now; may not be complete,
+- * but subsequent tests may as well be done on
+- * cached memory.
+- */
+- tmp->result = cmd->result;
+-
+- if (((tmp->result & 0xff) == 0xff) ||
+- ((tmp->result & 0xff00) == 0xff00))
+- continue;
+-
+- search_found = 1;
+-
+- if (cmd->bounce.len)
+- memcpy ((void *)cmd->bounce.addr,
+- (void *)cmd->bounce.buf, cmd->bounce.len);
+-
+- /* Important - remove from list _before_ done is called */
+- if (cmd_prev_ptr)
+- *cmd_prev_ptr = (struct NCR53c7x0_cmd *) cmd->next;
+-
+- --hostdata->busy[tmp->device->id][tmp->device->lun];
+- cmd->next = hostdata->free;
+- hostdata->free = cmd;
+-
+- tmp->host_scribble = NULL;
+-
+- if (hostdata->options & OPTION_DEBUG_INTR) {
+- printk ("scsi%d : command complete : pid %lu, id %d,lun %d result 0x%x ",
+- host->host_no, tmp->pid, tmp->device->id, tmp->device->lun, tmp->result);
+- __scsi_print_command (tmp->cmnd);
+- }
+-
+- tmp->scsi_done(tmp);
+- goto restart;
+- }
+- local_irq_restore(flags);
+-
+- if (!search_found) {
+- printk ("scsi%d : WARNING : INTFLY with no completed commands.\n",
+- host->host_no);
+- } else {
+- run_process_issue_queue();
+- }
+- return;
+-}
+-
+-/*
+- * Function : static irqreturn_t NCR53c7x0_intr (int irq, void *dev_id)
+- *
+- * Purpose : handle NCR53c7x0 interrupts for all NCR devices sharing
+- * the same IRQ line.
+- *
+- * Inputs : Since we're using the IRQF_DISABLED interrupt handler
+- * semantics, irq indicates the interrupt which invoked
+- * this handler.
+- *
+- * On the 710 we simualte an INTFLY with a script interrupt, and the
+- * script interrupt handler will call back to this function.
+- */
+-
+-static irqreturn_t
+-NCR53c7x0_intr (int irq, void *dev_id)
+-{
+- NCR53c7x0_local_declare();
+- struct Scsi_Host *host; /* Host we are looking at */
+- unsigned char istat; /* Values of interrupt regs */
+- struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */
+- struct NCR53c7x0_cmd *cmd; /* command which halted */
+- u32 *dsa; /* DSA */
+- int handled = 0;
+-
+-#ifdef NCR_DEBUG
+- char buf[80]; /* Debugging sprintf buffer */
+- size_t buflen; /* Length of same */
+-#endif
+-
+- host = (struct Scsi_Host *)dev_id;
+- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- NCR53c7x0_local_setup(host);
+-
+- /*
+- * Only read istat once per loop, since reading it again will unstack
+- * interrupts
+- */
+-
+- while ((istat = NCR53c7x0_read8(hostdata->istat)) & (ISTAT_SIP|ISTAT_DIP)) {
+- handled = 1;
+- hostdata->dsp_changed = 0;
+- hostdata->dstat_valid = 0;
+- hostdata->state = STATE_HALTED;
+-
+- if (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK)
+- printk ("scsi%d : SCSI FIFO not empty\n", host->host_no);
+-
+- /*
+- * NCR53c700 and NCR53c700-66 change the current SCSI
+- * process, hostdata->ncrcurrent, in the Linux driver so
+- * cmd = hostdata->ncrcurrent.
+- *
+- * With other chips, we must look through the commands
+- * executing and find the command structure which
+- * corresponds to the DSA register.
+- */
+-
+- if (hostdata->options & OPTION_700) {
+- cmd = (struct NCR53c7x0_cmd *) hostdata->ncrcurrent;
+- } else {
+- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+- for (cmd = (struct NCR53c7x0_cmd *) hostdata->running_list;
+- cmd && (dsa + (hostdata->dsa_start / sizeof(u32))) != cmd->dsa;
+- cmd = (struct NCR53c7x0_cmd *)(cmd->next))
+- ;
+- }
+- if (hostdata->options & OPTION_DEBUG_INTR) {
+- if (cmd) {
+- printk("scsi%d : interrupt for pid %lu, id %d, lun %d ",
+- host->host_no, cmd->cmd->pid, (int) cmd->cmd->device->id,
+- (int) cmd->cmd->device->lun);
+- __scsi_print_command (cmd->cmd->cmnd);
+- } else {
+- printk("scsi%d : no active command\n", host->host_no);
+- }
+- }
+-
+- if (istat & ISTAT_SIP) {
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : ISTAT_SIP\n", host->host_no);
+- intr_scsi (host, cmd);
+- }
+-
+- if (istat & ISTAT_DIP) {
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : ISTAT_DIP\n", host->host_no);
+- intr_dma (host, cmd);
+- }
+-
+- if (!hostdata->dstat_valid) {
+- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+- hostdata->dstat_valid = 1;
+- }
+-
+- if (!(hostdata->dstat & DSTAT_DFE)) {
+- printk ("scsi%d : DMA FIFO not empty\n", host->host_no);
+- /* Really need to check this out for 710 RGH */
+- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF)
+- ;
+- hostdata->dstat |= DSTAT_DFE;
+- }
+-
+- if (!hostdata->idle && hostdata->state == STATE_HALTED) {
+- if (!hostdata->dsp_changed)
+- hostdata->dsp = (u32 *)bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-#if 0
+- printk("scsi%d : new dsp is 0x%lx (virt 0x%p)\n",
+- host->host_no, virt_to_bus(hostdata->dsp), hostdata->dsp);
+-#endif
+-
+- hostdata->state = STATE_RUNNING;
+- NCR53c7x0_write32 (DSP_REG, virt_to_bus(hostdata->dsp));
+- if (hostdata->options & OPTION_DEBUG_TRACE) {
+-#ifdef CYCLIC_TRACE
+- log_insn (hostdata->dsp);
+-#else
+- print_insn (host, hostdata->dsp, "t ", 1);
+-#endif
+- NCR53c7x0_write8 (DCNTL_REG,
+- hostdata->saved_dcntl | DCNTL_SSM | DCNTL_STD);
+- }
+- }
+- }
+- return IRQ_HANDLED;
+-}
+-
+-
+-/*
+- * Function : static int abort_connected (struct Scsi_Host *host)
+- *
+- * Purpose : Assuming that the NCR SCSI processor is currently
+- * halted, break the currently established nexus. Clean
+- * up of the NCR53c7x0_cmd and Scsi_Cmnd structures should
+- * be done on receipt of the abort interrupt.
+- *
+- * Inputs : host - SCSI host
+- *
+- */
+-
+-static int
+-abort_connected (struct Scsi_Host *host) {
+-#ifdef NEW_ABORT
+- NCR53c7x0_local_declare();
+-#endif
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+-/* FIXME : this probably should change for production kernels; at the
+- least, counter should move to a per-host structure. */
+- static int counter = 5;
+-#ifdef NEW_ABORT
+- int sstat, phase, offset;
+- u32 *script;
+- NCR53c7x0_local_setup(host);
+-#endif
+-
+- if (--counter <= 0) {
+- disable(host);
+- return 0;
+- }
+-
+- printk ("scsi%d : DANGER : abort_connected() called \n",
+- host->host_no);
+-
+-#ifdef NEW_ABORT
+-
+-/*
+- * New strategy : Rather than using a generic abort routine,
+- * we'll specifically try to source or sink the appropriate
+- * amount of data for the phase we're currently in (taking into
+- * account the current synchronous offset)
+- */
+-
+- sstat = NCR53c8x0_read8 (SSTAT2_REG);
+- offset = OFFSET (sstat & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT;
+- phase = sstat & SSTAT2_PHASE_MASK;
+-
+-/*
+- * SET ATN
+- * MOVE source_or_sink, WHEN CURRENT PHASE
+- * < repeat for each outstanding byte >
+- * JUMP send_abort_message
+- */
+-
+- script = hostdata->abort_script = kmalloc (
+- 8 /* instruction size */ * (
+- 1 /* set ATN */ +
+- (!offset ? 1 : offset) /* One transfer per outstanding byte */ +
+- 1 /* send abort message */),
+- GFP_ATOMIC);
+-
+-
+-#else /* def NEW_ABORT */
+- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+- sizeof(u32);
+-#endif /* def NEW_ABORT */
+- hostdata->dsp_changed = 1;
+-
+-/* XXX - need to flag the command as aborted after the abort_connected
+- code runs
+- */
+- return 0;
+-}
+-
+-/*
+- * Function : static int datapath_residual (Scsi_Host *host)
+- *
+- * Purpose : return residual data count of what's in the chip.
+- *
+- * Inputs : host - SCSI host
+- */
+-
+-static int
+-datapath_residual (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- int count, synchronous, sstat;
+- unsigned int ddir;
+-
+- NCR53c7x0_local_setup(host);
+- /* COMPAT : the 700 and 700-66 need to use DFIFO_00_BO_MASK */
+- count = ((NCR53c7x0_read8 (DFIFO_REG) & DFIFO_10_BO_MASK) -
+- (NCR53c7x0_read32 (DBC_REG) & DFIFO_10_BO_MASK)) & DFIFO_10_BO_MASK;
+- synchronous = NCR53c7x0_read8 (SXFER_REG) & SXFER_MO_MASK;
+- /* COMPAT : DDIR is elsewhere on non-'8xx chips. */
+- ddir = NCR53c7x0_read8 (CTEST0_REG_700) & CTEST0_700_DDIR;
+-
+- if (ddir) {
+- /* Receive */
+- if (synchronous)
+- count += (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT;
+- else
+- if (NCR53c7x0_read8 (SSTAT1_REG) & SSTAT1_ILF)
+- ++count;
+- } else {
+- /* Send */
+- sstat = NCR53c7x0_read8 (SSTAT1_REG);
+- if (sstat & SSTAT1_OLF)
+- ++count;
+- if (synchronous && (sstat & SSTAT1_ORF))
+- ++count;
+- }
+- return count;
+-}
+-
+-/*
+- * Function : static const char * sbcl_to_phase (int sbcl)_
+- *
+- * Purpose : Convert SBCL register to user-parsable phase representation
+- *
+- * Inputs : sbcl - value of sbcl register
+- */
+-
+-
+-static const char *
+-sbcl_to_phase (int sbcl) {
+- switch (sbcl & SBCL_PHASE_MASK) {
+- case SBCL_PHASE_DATAIN:
+- return "DATAIN";
+- case SBCL_PHASE_DATAOUT:
+- return "DATAOUT";
+- case SBCL_PHASE_MSGIN:
+- return "MSGIN";
+- case SBCL_PHASE_MSGOUT:
+- return "MSGOUT";
+- case SBCL_PHASE_CMDOUT:
+- return "CMDOUT";
+- case SBCL_PHASE_STATIN:
+- return "STATUSIN";
+- default:
+- return "unknown";
+- }
+-}
+-
+-/*
+- * Function : static const char * sstat2_to_phase (int sstat)_
+- *
+- * Purpose : Convert SSTAT2 register to user-parsable phase representation
+- *
+- * Inputs : sstat - value of sstat register
+- */
+-
+-
+-static const char *
+-sstat2_to_phase (int sstat) {
+- switch (sstat & SSTAT2_PHASE_MASK) {
+- case SSTAT2_PHASE_DATAIN:
+- return "DATAIN";
+- case SSTAT2_PHASE_DATAOUT:
+- return "DATAOUT";
+- case SSTAT2_PHASE_MSGIN:
+- return "MSGIN";
+- case SSTAT2_PHASE_MSGOUT:
+- return "MSGOUT";
+- case SSTAT2_PHASE_CMDOUT:
+- return "CMDOUT";
+- case SSTAT2_PHASE_STATIN:
+- return "STATUSIN";
+- default:
+- return "unknown";
+- }
+-}
+-
+-/*
+- * Function : static void intr_phase_mismatch (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : Handle phase mismatch interrupts
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * may be NULL.
+- *
+- * Side effects : The abort_connected() routine is called or the NCR chip
+- * is restarted, jumping to the command_complete entry point, or
+- * patching the address and transfer count of the current instruction
+- * and calling the msg_in entry point as appropriate.
+- */
+-
+-static void
+-intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- u32 dbc_dcmd, *dsp, *dsp_next;
+- unsigned char dcmd, sbcl;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int residual;
+- enum {ACTION_ABORT, ACTION_ABORT_PRINT, ACTION_CONTINUE} action =
+- ACTION_ABORT_PRINT;
+- const char *where = NULL;
+-
+- NCR53c7x0_local_setup(host);
+-
+- /*
+- * Corrective action is based on where in the SCSI SCRIPT(tm) the error
+- * occurred, as well as which SCSI phase we are currently in.
+- */
+- dsp_next = bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-
+- /*
+- * Fetch the current instruction, and remove the operands for easier
+- * interpretation.
+- */
+- dbc_dcmd = NCR53c7x0_read32(DBC_REG);
+- dcmd = (dbc_dcmd & 0xff000000) >> 24;
+- /*
+- * Like other processors, the NCR adjusts the instruction pointer before
+- * instruction decode. Set the DSP address back to what it should
+- * be for this instruction based on its size (2 or 3 32 bit words).
+- */
+- dsp = dsp_next - NCR53c7x0_insn_size(dcmd);
+-
+-
+- /*
+- * Read new SCSI phase from the SBCL lines. Since all of our code uses
+- * a WHEN conditional instead of an IF conditional, we don't need to
+- * wait for a new REQ.
+- */
+- sbcl = NCR53c7x0_read8(SBCL_REG) & SBCL_PHASE_MASK;
+-
+- if (!cmd) {
+- action = ACTION_ABORT_PRINT;
+- where = "no current command";
+- /*
+- * The way my SCSI SCRIPTS(tm) are architected, recoverable phase
+- * mismatches should only occur where we're doing a multi-byte
+- * BMI instruction. Specifically, this means
+- *
+- * - select messages (a SCSI-I target may ignore additional messages
+- * after the IDENTIFY; any target may reject a SDTR or WDTR)
+- *
+- * - command out (targets may send a message to signal an error
+- * condition, or go into STATUSIN after they've decided
+- * they don't like the command.
+- *
+- * - reply_message (targets may reject a multi-byte message in the
+- * middle)
+- *
+- * - data transfer routines (command completion with buffer space
+- * left, disconnect message, or error message)
+- */
+- } else if (((dsp >= cmd->data_transfer_start &&
+- dsp < cmd->data_transfer_end)) || dsp == (cmd->residual + 2)) {
+- if ((dcmd & (DCMD_TYPE_MASK|DCMD_BMI_OP_MASK|DCMD_BMI_INDIRECT|
+- DCMD_BMI_MSG|DCMD_BMI_CD)) == (DCMD_TYPE_BMI|
+- DCMD_BMI_OP_MOVE_I)) {
+- residual = datapath_residual (host);
+- if (hostdata->options & OPTION_DEBUG_DISCONNECT)
+- printk ("scsi%d : handling residual transfer (+ %d bytes from DMA FIFO)\n",
+- host->host_no, residual);
+-
+- /*
+- * The first instruction is a CALL to the alternate handler for
+- * this data transfer phase, so we can do calls to
+- * munge_msg_restart as we would if control were passed
+- * from normal dynamic code.
+- */
+- if (dsp != cmd->residual + 2) {
+- cmd->residual[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL |
+- ((dcmd & DCMD_BMI_IO) ? DCMD_TCI_IO : 0)) << 24) |
+- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+- cmd->residual[1] = virt_to_bus(hostdata->script)
+- + ((dcmd & DCMD_BMI_IO)
+- ? hostdata->E_other_in : hostdata->E_other_out);
+- }
+-
+- /*
+- * The second instruction is the a data transfer block
+- * move instruction, reflecting the pointer and count at the
+- * time of the phase mismatch.
+- */
+- cmd->residual[2] = dbc_dcmd + residual;
+- cmd->residual[3] = NCR53c7x0_read32(DNAD_REG) - residual;
+-
+- /*
+- * The third and final instruction is a jump to the instruction
+- * which follows the instruction which had to be 'split'
+- */
+- if (dsp != cmd->residual + 2) {
+- cmd->residual[4] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP)
+- << 24) | DBC_TCI_TRUE;
+- cmd->residual[5] = virt_to_bus(dsp_next);
+- }
+-
+- /*
+- * For the sake of simplicity, transfer control to the
+- * conditional CALL at the start of the residual buffer.
+- */
+- hostdata->dsp = cmd->residual;
+- hostdata->dsp_changed = 1;
+- action = ACTION_CONTINUE;
+- } else {
+- where = "non-BMI dynamic DSA code";
+- action = ACTION_ABORT_PRINT;
+- }
+- } else if (dsp == (hostdata->script + hostdata->E_select_msgout / 4 + 2)) {
+- /* RGH 290697: Added +2 above, to compensate for the script
+- * instruction which disables the selection timer. */
+- /* Release ATN */
+- NCR53c7x0_write8 (SOCL_REG, 0);
+- switch (sbcl) {
+- /*
+- * Some devices (SQ555 come to mind) grab the IDENTIFY message
+- * sent on selection, and decide to go into COMMAND OUT phase
+- * rather than accepting the rest of the messages or rejecting
+- * them. Handle these devices gracefully.
+- */
+- case SBCL_PHASE_CMDOUT:
+- hostdata->dsp = dsp + 2 /* two _words_ */;
+- hostdata->dsp_changed = 1;
+- printk ("scsi%d : target %d ignored SDTR and went into COMMAND OUT\n",
+- host->host_no, cmd->cmd->device->id);
+- cmd->flags &= ~CMD_FLAG_SDTR;
+- action = ACTION_CONTINUE;
+- break;
+- case SBCL_PHASE_MSGIN:
+- hostdata->dsp = hostdata->script + hostdata->E_msg_in /
+- sizeof(u32);
+- hostdata->dsp_changed = 1;
+- action = ACTION_CONTINUE;
+- break;
+- default:
+- where="select message out";
+- action = ACTION_ABORT_PRINT;
+- }
+- /*
+- * Some SCSI devices will interpret a command as they read the bytes
+- * off the SCSI bus, and may decide that the command is Bogus before
+- * they've read the entire command off the bus.
+- */
+- } else if (dsp == hostdata->script + hostdata->E_cmdout_cmdout / sizeof
+- (u32)) {
+- hostdata->dsp = hostdata->script + hostdata->E_data_transfer /
+- sizeof (u32);
+- hostdata->dsp_changed = 1;
+- action = ACTION_CONTINUE;
+- /* FIXME : we need to handle message reject, etc. within msg_respond. */
+-#ifdef notyet
+- } else if (dsp == hostdata->script + hostdata->E_reply_message) {
+- switch (sbcl) {
+- /* Any other phase mismatches abort the currently executing command. */
+-#endif
+- } else {
+- where = "unknown location";
+- action = ACTION_ABORT_PRINT;
+- }
+-
+- /* Flush DMA FIFO */
+- if (!hostdata->dstat_valid) {
+- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+- hostdata->dstat_valid = 1;
+- }
+- if (!(hostdata->dstat & DSTAT_DFE)) {
+- /* Really need to check this out for 710 RGH */
+- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF);
+- hostdata->dstat |= DSTAT_DFE;
+- }
+-
+- switch (action) {
+- case ACTION_ABORT_PRINT:
+- printk("scsi%d : %s : unexpected phase %s.\n",
+- host->host_no, where ? where : "unknown location",
+- sbcl_to_phase(sbcl));
+- print_lots (host);
+- /* Fall through to ACTION_ABORT */
+- case ACTION_ABORT:
+- abort_connected (host);
+- break;
+- case ACTION_CONTINUE:
+- break;
+- }
+-
+-#if 0
+- if (hostdata->dsp_changed) {
+- printk("scsi%d: new dsp 0x%p\n", host->host_no, hostdata->dsp);
+- print_insn (host, hostdata->dsp, "", 1);
+- }
+-#endif
+-}
+-
+-/*
+- * Function : static void intr_bf (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle BUS FAULT interrupts
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * may be NULL.
+- */
+-
+-static void
+-intr_bf (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- u32 *dsp,
+- *next_dsp, /* Current dsp */
+- *dsa,
+- dbc_dcmd; /* DCMD (high eight bits) + DBC */
+- char *reason = NULL;
+- /* Default behavior is for a silent error, with a retry until we've
+- exhausted retries. */
+- enum {MAYBE, ALWAYS, NEVER} retry = MAYBE;
+- int report = 0;
+- NCR53c7x0_local_setup(host);
+-
+- dbc_dcmd = NCR53c7x0_read32 (DBC_REG);
+- next_dsp = bus_to_virt (NCR53c7x0_read32(DSP_REG));
+- dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff);
+-/* FIXME - check chip type */
+- dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-
+- /*
+- * Bus faults can be caused by either a Bad Address or
+- * Target Abort. We should check the Received Target Abort
+- * bit of the PCI status register and Master Abort Bit.
+- *
+- * - Master Abort bit indicates that no device claimed
+- * the address with DEVSEL within five clocks
+- *
+- * - Target Abort bit indicates that a target claimed it,
+- * but changed its mind once it saw the byte enables.
+- *
+- */
+-
+- /* 53c710, not PCI system */
+- report = 1;
+- reason = "Unknown";
+-
+-#ifndef notyet
+- report = 1;
+-#endif
+- if (report && reason)
+- {
+- printk(KERN_ALERT "scsi%d : BUS FAULT reason = %s\n",
+- host->host_no, reason ? reason : "unknown");
+- print_lots (host);
+- }
+-
+-#ifndef notyet
+- retry = NEVER;
+-#endif
+-
+- /*
+- * TODO : we should attempt to recover from any spurious bus
+- * faults. After X retries, we should figure that things are
+- * sufficiently wedged, and call NCR53c7xx_reset.
+- *
+- * This code should only get executed once we've decided that we
+- * cannot retry.
+- */
+-
+- if (retry == NEVER) {
+- printk(KERN_ALERT " mail richard@sleepie.demon.co.uk\n");
+- FATAL (host);
+- }
+-}
+-
+-/*
+- * Function : static void intr_dma (struct Scsi_Host *host,
+- * struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle all DMA interrupts, indicated by the setting
+- * of the DIP bit in the ISTAT register.
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * may be NULL.
+- */
+-
+-static void
+-intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned char dstat; /* DSTAT */
+- u32 *dsp,
+- *next_dsp, /* Current dsp */
+- *dsa,
+- dbc_dcmd; /* DCMD (high eight bits) + DBC */
+- int tmp;
+- unsigned long flags;
+- NCR53c7x0_local_setup(host);
+-
+- if (!hostdata->dstat_valid) {
+- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+- hostdata->dstat_valid = 1;
+- }
+-
+- dstat = hostdata->dstat;
+-
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk("scsi%d : DSTAT=0x%x\n", host->host_no, (int) dstat);
+-
+- dbc_dcmd = NCR53c7x0_read32 (DBC_REG);
+- next_dsp = bus_to_virt(NCR53c7x0_read32(DSP_REG));
+- dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff);
+-/* XXX - check chip type */
+- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+-
+- /*
+- * DSTAT_ABRT is the aborted interrupt. This is set whenever the
+- * SCSI chip is aborted.
+- *
+- * With NCR53c700 and NCR53c700-66 style chips, we should only
+- * get this when the chip is currently running the accept
+- * reselect/select code and we have set the abort bit in the
+- * ISTAT register.
+- *
+- */
+-
+- if (dstat & DSTAT_ABRT) {
+-#if 0
+- /* XXX - add code here to deal with normal abort */
+- if ((hostdata->options & OPTION_700) && (hostdata->state ==
+- STATE_ABORTING)) {
+- } else
+-#endif
+- {
+- printk(KERN_ALERT "scsi%d : unexpected abort interrupt at\n"
+- " ", host->host_no);
+- print_insn (host, dsp, KERN_ALERT "s ", 1);
+- FATAL (host);
+- }
+- }
+-
+- /*
+- * DSTAT_SSI is the single step interrupt. Should be generated
+- * whenever we have single stepped or are tracing.
+- */
+-
+- if (dstat & DSTAT_SSI) {
+- if (hostdata->options & OPTION_DEBUG_TRACE) {
+- /* Don't print instr. until we write DSP at end of intr function */
+- } else if (hostdata->options & OPTION_DEBUG_SINGLE) {
+- print_insn (host, dsp, "s ", 0);
+- local_irq_save(flags);
+-/* XXX - should we do this, or can we get away with writing dsp? */
+-
+- NCR53c7x0_write8 (DCNTL_REG, (NCR53c7x0_read8(DCNTL_REG) &
+- ~DCNTL_SSM) | DCNTL_STD);
+- local_irq_restore(flags);
+- } else {
+- printk(KERN_ALERT "scsi%d : unexpected single step interrupt at\n"
+- " ", host->host_no);
+- print_insn (host, dsp, KERN_ALERT "", 1);
+- printk(KERN_ALERT " mail drew@PoohSticks.ORG\n");
+- FATAL (host);
+- }
+- }
+-
+- /*
+- * DSTAT_IID / DSTAT_OPC (same bit, same meaning, only the name
+- * is different) is generated whenever an illegal instruction is
+- * encountered.
+- *
+- * XXX - we may want to emulate INTFLY here, so we can use
+- * the same SCSI SCRIPT (tm) for NCR53c710 through NCR53c810
+- * chips.
+- */
+-
+- if (dstat & DSTAT_OPC) {
+- /*
+- * Ascertain if this IID interrupts occurred before or after a STO
+- * interrupt. Since the interrupt handling code now leaves
+- * DSP unmodified until _after_ all stacked interrupts have been
+- * processed, reading the DSP returns the original DSP register.
+- * This means that if dsp lies between the select code, and
+- * message out following the selection code (where the IID interrupt
+- * would have to have occurred by due to the implicit wait for REQ),
+- * we have an IID interrupt resulting from a STO condition and
+- * can ignore it.
+- */
+-
+- if (((dsp >= (hostdata->script + hostdata->E_select / sizeof(u32))) &&
+- (dsp <= (hostdata->script + hostdata->E_select_msgout /
+- sizeof(u32) + 8))) || (hostdata->test_running == 2)) {
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : ignoring DSTAT_IID for SSTAT_STO\n",
+- host->host_no);
+- if (hostdata->expecting_iid) {
+- hostdata->expecting_iid = 0;
+- hostdata->idle = 1;
+- if (hostdata->test_running == 2) {
+- hostdata->test_running = 0;
+- hostdata->test_completed = 3;
+- } else if (cmd)
+- abnormal_finished (cmd, DID_BAD_TARGET << 16);
+- } else {
+- hostdata->expecting_sto = 1;
+- }
+- /*
+- * We can't guarantee we'll be able to execute the WAIT DISCONNECT
+- * instruction within the 3.4us of bus free and arbitration delay
+- * that a target can RESELECT in and assert REQ after we've dropped
+- * ACK. If this happens, we'll get an illegal instruction interrupt.
+- * Doing away with the WAIT DISCONNECT instructions broke everything,
+- * so instead I'll settle for moving one WAIT DISCONNECT a few
+- * instructions closer to the CLEAR ACK before it to minimize the
+- * chances of this happening, and handle it if it occurs anyway.
+- *
+- * Simply continue with what we were doing, and control should
+- * be transferred to the schedule routine which will ultimately
+- * pass control onto the reselection or selection (not yet)
+- * code.
+- */
+- } else if (dbc_dcmd == 0x48000000 && (NCR53c7x0_read8 (SBCL_REG) &
+- SBCL_REQ)) {
+- if (!(hostdata->options & OPTION_NO_PRINT_RACE))
+- {
+- printk("scsi%d: REQ before WAIT DISCONNECT IID\n",
+- host->host_no);
+- hostdata->options |= OPTION_NO_PRINT_RACE;
+- }
+- } else {
+- printk(KERN_ALERT "scsi%d : invalid instruction\n", host->host_no);
+- print_lots (host);
+- printk(KERN_ALERT " mail Richard@sleepie.demon.co.uk with ALL\n"
+- " boot messages and diagnostic output\n");
+- FATAL (host);
+- }
+- }
+-
+- /*
+- * DSTAT_BF are bus fault errors. DSTAT_800_BF is valid for 710 also.
+- */
+-
+- if (dstat & DSTAT_800_BF) {
+- intr_bf (host, cmd);
+- }
+-
+-
+- /*
+- * DSTAT_SIR interrupts are generated by the execution of
+- * the INT instruction. Since the exact values available
+- * are determined entirely by the SCSI script running,
+- * and are local to a particular script, a unique handler
+- * is called for each script.
+- */
+-
+- if (dstat & DSTAT_SIR) {
+- if (hostdata->options & OPTION_DEBUG_INTR)
+- printk ("scsi%d : DSTAT_SIR\n", host->host_no);
+- switch ((tmp = hostdata->dstat_sir_intr (host, cmd))) {
+- case SPECIFIC_INT_NOTHING:
+- case SPECIFIC_INT_RESTART:
+- break;
+- case SPECIFIC_INT_ABORT:
+- abort_connected(host);
+- break;
+- case SPECIFIC_INT_PANIC:
+- printk(KERN_ALERT "scsi%d : failure at ", host->host_no);
+- print_insn (host, dsp, KERN_ALERT "", 1);
+- printk(KERN_ALERT " dstat_sir_intr() returned SPECIFIC_INT_PANIC\n");
+- FATAL (host);
+- break;
+- case SPECIFIC_INT_BREAK:
+- intr_break (host, cmd);
+- break;
+- default:
+- printk(KERN_ALERT "scsi%d : failure at ", host->host_no);
+- print_insn (host, dsp, KERN_ALERT "", 1);
+- printk(KERN_ALERT" dstat_sir_intr() returned unknown value %d\n",
+- tmp);
+- FATAL (host);
+- }
+- }
+-}
+-
+-/*
+- * Function : static int print_insn (struct Scsi_Host *host,
+- * u32 *insn, int kernel)
+- *
+- * Purpose : print numeric representation of the instruction pointed
+- * to by insn to the debugging or kernel message buffer
+- * as appropriate.
+- *
+- * If desired, a user level program can interpret this
+- * information.
+- *
+- * Inputs : host, insn - host, pointer to instruction, prefix -
+- * string to prepend, kernel - use printk instead of debugging buffer.
+- *
+- * Returns : size, in u32s, of instruction printed.
+- */
+-
+-/*
+- * FIXME: should change kernel parameter so that it takes an ENUM
+- * specifying severity - either KERN_ALERT or KERN_PANIC so
+- * all panic messages are output with the same severity.
+- */
+-
+-static int
+-print_insn (struct Scsi_Host *host, const u32 *insn,
+- const char *prefix, int kernel) {
+- char buf[160], /* Temporary buffer and pointer. ICKY
+- arbitrary length. */
+-
+-
+- *tmp;
+- unsigned char dcmd; /* dcmd register for *insn */
+- int size;
+-
+- /*
+- * Check to see if the instruction pointer is not bogus before
+- * indirecting through it; avoiding red-zone at start of
+- * memory.
+- *
+- * FIXME: icky magic needs to happen here on non-intel boxes which
+- * don't have kernel memory mapped in like this. Might be reasonable
+- * to use vverify()?
+- */
+-
+- if (virt_to_phys((void *)insn) < PAGE_SIZE ||
+- virt_to_phys((void *)(insn + 8)) > virt_to_phys(high_memory) ||
+- ((((dcmd = (insn[0] >> 24) & 0xff) & DCMD_TYPE_MMI) == DCMD_TYPE_MMI) &&
+- virt_to_phys((void *)(insn + 12)) > virt_to_phys(high_memory))) {
+- size = 0;
+- sprintf (buf, "%s%p: address out of range\n",
+- prefix, insn);
+- } else {
+-/*
+- * FIXME : (void *) cast in virt_to_bus should be unnecessary, because
+- * it should take const void * as argument.
+- */
+-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000)
+- sprintf(buf, "%s0x%lx (virt 0x%p) : 0x%08x 0x%08x (virt 0x%p)",
+- (prefix ? prefix : ""), virt_to_bus((void *) insn), insn,
+- insn[0], insn[1], bus_to_virt (insn[1]));
+-#else
+- /* Remove virtual addresses to reduce output, as they are the same */
+- sprintf(buf, "%s0x%x (+%x) : 0x%08x 0x%08x",
+- (prefix ? prefix : ""), (u32)insn, ((u32)insn -
+- (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4,
+- insn[0], insn[1]);
+-#endif
+- tmp = buf + strlen(buf);
+- if ((dcmd & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) {
+-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000)
+- sprintf (tmp, " 0x%08x (virt 0x%p)\n", insn[2],
+- bus_to_virt(insn[2]));
+-#else
+- /* Remove virtual addr to reduce output, as it is the same */
+- sprintf (tmp, " 0x%08x\n", insn[2]);
+-#endif
+- size = 3;
+- } else {
+- sprintf (tmp, "\n");
+- size = 2;
+- }
+- }
+-
+- if (kernel)
+- printk ("%s", buf);
+-#ifdef NCR_DEBUG
+- else {
+- size_t len = strlen(buf);
+- debugger_kernel_write(host, buf, len);
+- }
+-#endif
+- return size;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_abort (Scsi_Cmnd *cmd)
+- *
+- * Purpose : Abort an errant SCSI command, doing all necessary
+- * cleanup of the issue_queue, running_list, shared Linux/NCR
+- * dsa issue and reconnect queues.
+- *
+- * Inputs : cmd - command to abort, code - entire result field
+- *
+- * Returns : 0 on success, -1 on failure.
+- */
+-
+-int
+-NCR53c7xx_abort (Scsi_Cmnd *cmd) {
+- NCR53c7x0_local_declare();
+- struct Scsi_Host *host = cmd->device->host;
+- struct NCR53c7x0_hostdata *hostdata = host ? (struct NCR53c7x0_hostdata *)
+- host->hostdata[0] : NULL;
+- unsigned long flags;
+- struct NCR53c7x0_cmd *curr, **prev;
+- Scsi_Cmnd *me, **last;
+-#if 0
+- static long cache_pid = -1;
+-#endif
+-
+-
+- if (!host) {
+- printk ("Bogus SCSI command pid %ld; no host structure\n",
+- cmd->pid);
+- return SCSI_ABORT_ERROR;
+- } else if (!hostdata) {
+- printk ("Bogus SCSI host %d; no hostdata\n", host->host_no);
+- return SCSI_ABORT_ERROR;
+- }
+- NCR53c7x0_local_setup(host);
+-
+-/*
+- * CHECK : I don't think that reading ISTAT will unstack any interrupts,
+- * since we need to write the INTF bit to clear it, and SCSI/DMA
+- * interrupts don't clear until we read SSTAT/SIST and DSTAT registers.
+- *
+- * See that this is the case. Appears to be correct on the 710, at least.
+- *
+- * I suspect that several of our failures may be coming from a new fatal
+- * interrupt (possibly due to a phase mismatch) happening after we've left
+- * the interrupt handler, but before the PIC has had the interrupt condition
+- * cleared.
+- */
+-
+- if (NCR53c7x0_read8(hostdata->istat) & (ISTAT_DIP|ISTAT_SIP)) {
+- printk ("scsi%d : dropped interrupt for command %ld\n", host->host_no,
+- cmd->pid);
+- NCR53c7x0_intr (host->irq, NULL, NULL);
+- return SCSI_ABORT_BUSY;
+- }
+-
+- local_irq_save(flags);
+-#if 0
+- if (cache_pid == cmd->pid)
+- panic ("scsi%d : bloody fetus %d\n", host->host_no, cmd->pid);
+- else
+- cache_pid = cmd->pid;
+-#endif
+-
+-
+-/*
+- * The command could be hiding in the issue_queue. This would be very
+- * nice, as commands can't be moved from the high level driver's issue queue
+- * into the shared queue until an interrupt routine is serviced, and this
+- * moving is atomic.
+- *
+- * If this is the case, we don't have to worry about anything - we simply
+- * pull the command out of the old queue, and call it aborted.
+- */
+-
+- for (me = (Scsi_Cmnd *) hostdata->issue_queue,
+- last = (Scsi_Cmnd **) &(hostdata->issue_queue);
+- me && me != cmd; last = (Scsi_Cmnd **)&(me->SCp.ptr),
+- me = (Scsi_Cmnd *)me->SCp.ptr);
+-
+- if (me) {
+- *last = (Scsi_Cmnd *) me->SCp.ptr;
+- if (me->host_scribble) {
+- ((struct NCR53c7x0_cmd *)me->host_scribble)->next = hostdata->free;
+- hostdata->free = (struct NCR53c7x0_cmd *) me->host_scribble;
+- me->host_scribble = NULL;
+- }
+- cmd->result = DID_ABORT << 16;
+- cmd->scsi_done(cmd);
+- printk ("scsi%d : found command %ld in Linux issue queue\n",
+- host->host_no, me->pid);
+- local_irq_restore(flags);
+- run_process_issue_queue();
+- return SCSI_ABORT_SUCCESS;
+- }
+-
+-/*
+- * That failing, the command could be in our list of already executing
+- * commands. If this is the case, drastic measures are called for.
+- */
+-
+- for (curr = (struct NCR53c7x0_cmd *) hostdata->running_list,
+- prev = (struct NCR53c7x0_cmd **) &(hostdata->running_list);
+- curr && curr->cmd != cmd; prev = (struct NCR53c7x0_cmd **)
+- &(curr->next), curr = (struct NCR53c7x0_cmd *) curr->next);
+-
+- if (curr) {
+- if ((curr->result & 0xff) != 0xff && (curr->result & 0xff00) != 0xff00) {
+- cmd->result = curr->result;
+- if (prev)
+- *prev = (struct NCR53c7x0_cmd *) curr->next;
+- curr->next = (struct NCR53c7x0_cmd *) hostdata->free;
+- cmd->host_scribble = NULL;
+- hostdata->free = curr;
+- cmd->scsi_done(cmd);
+- printk ("scsi%d : found finished command %ld in running list\n",
+- host->host_no, cmd->pid);
+- local_irq_restore(flags);
+- return SCSI_ABORT_NOT_RUNNING;
+- } else {
+- printk ("scsi%d : DANGER : command running, can not abort.\n",
+- cmd->device->host->host_no);
+- local_irq_restore(flags);
+- return SCSI_ABORT_BUSY;
+- }
+- }
+-
+-/*
+- * And if we couldn't find it in any of our queues, it must have been
+- * a dropped interrupt.
+- */
+-
+- curr = (struct NCR53c7x0_cmd *) cmd->host_scribble;
+- if (curr) {
+- curr->next = hostdata->free;
+- hostdata->free = curr;
+- cmd->host_scribble = NULL;
+- }
+-
+- if (curr == NULL || ((curr->result & 0xff00) == 0xff00) ||
+- ((curr->result & 0xff) == 0xff)) {
+- printk ("scsi%d : did this command ever run?\n", host->host_no);
+- cmd->result = DID_ABORT << 16;
+- } else {
+- printk ("scsi%d : probably lost INTFLY, normal completion\n",
+- host->host_no);
+- cmd->result = curr->result;
+-/*
+- * FIXME : We need to add an additional flag which indicates if a
+- * command was ever counted as BUSY, so if we end up here we can
+- * decrement the busy count if and only if it is necessary.
+- */
+- --hostdata->busy[cmd->device->id][cmd->device->lun];
+- }
+- local_irq_restore(flags);
+- cmd->scsi_done(cmd);
+-
+-/*
+- * We need to run process_issue_queue since termination of this command
+- * may allow another queued command to execute first?
+- */
+- return SCSI_ABORT_NOT_RUNNING;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_reset (Scsi_Cmnd *cmd)
+- *
+- * Purpose : perform a hard reset of the SCSI bus and NCR
+- * chip.
+- *
+- * Inputs : cmd - command which caused the SCSI RESET
+- *
+- * Returns : 0 on success.
+- */
+-
+-int
+-NCR53c7xx_reset (Scsi_Cmnd *cmd, unsigned int reset_flags) {
+- NCR53c7x0_local_declare();
+- unsigned long flags;
+- int found = 0;
+- struct NCR53c7x0_cmd * c;
+- Scsi_Cmnd *tmp;
+- /*
+- * When we call scsi_done(), it's going to wake up anything sleeping on the
+- * resources which were in use by the aborted commands, and we'll start to
+- * get new commands.
+- *
+- * We can't let this happen until after we've re-initialized the driver
+- * structures, and can't reinitialize those structures until after we've
+- * dealt with their contents.
+- *
+- * So, we need to find all of the commands which were running, stick
+- * them on a linked list of completed commands (we'll use the host_scribble
+- * pointer), do our reinitialization, and then call the done function for
+- * each command.
+- */
+- Scsi_Cmnd *nuke_list = NULL;
+- struct Scsi_Host *host = cmd->device->host;
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+-
+- NCR53c7x0_local_setup(host);
+- local_irq_save(flags);
+- ncr_halt (host);
+- print_lots (host);
+- dump_events (host, 30);
+- ncr_scsi_reset (host);
+- for (tmp = nuke_list = return_outstanding_commands (host, 1 /* free */,
+- 0 /* issue */ ); tmp; tmp = (Scsi_Cmnd *) tmp->SCp.buffer)
+- if (tmp == cmd) {
+- found = 1;
+- break;
+- }
+-
+- /*
+- * If we didn't find the command which caused this reset in our running
+- * list, then we've lost it. See that it terminates normally anyway.
+- */
+- if (!found) {
+- c = (struct NCR53c7x0_cmd *) cmd->host_scribble;
+- if (c) {
+- cmd->host_scribble = NULL;
+- c->next = hostdata->free;
+- hostdata->free = c;
+- } else
+- printk ("scsi%d: lost command %ld\n", host->host_no, cmd->pid);
+- cmd->SCp.buffer = (struct scatterlist *) nuke_list;
+- nuke_list = cmd;
+- }
+-
+- NCR53c7x0_driver_init (host);
+- hostdata->soft_reset (host);
+- if (hostdata->resets == 0)
+- disable(host);
+- else if (hostdata->resets != -1)
+- --hostdata->resets;
+- local_irq_restore(flags);
+- for (; nuke_list; nuke_list = tmp) {
+- tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer;
+- nuke_list->result = DID_RESET << 16;
+- nuke_list->scsi_done (nuke_list);
+- }
+- local_irq_restore(flags);
+- return SCSI_RESET_SUCCESS;
+-}
+-
+-/*
+- * The NCR SDMS bios follows Annex A of the SCSI-CAM draft, and
+- * therefore shares the scsicam_bios_param function.
+- */
+-
+-/*
+- * Function : int insn_to_offset (Scsi_Cmnd *cmd, u32 *insn)
+- *
+- * Purpose : convert instructions stored at NCR pointer into data
+- * pointer offset.
+- *
+- * Inputs : cmd - SCSI command; insn - pointer to instruction. Either current
+- * DSP, or saved data pointer.
+- *
+- * Returns : offset on success, -1 on failure.
+- */
+-
+-
+-static int
+-insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) {
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) cmd->device->host->hostdata[0];
+- struct NCR53c7x0_cmd *ncmd =
+- (struct NCR53c7x0_cmd *) cmd->host_scribble;
+- int offset = 0, buffers;
+- struct scatterlist *segment;
+- char *ptr;
+- int found = 0;
+-
+-/*
+- * With the current code implementation, if the insn is inside dynamically
+- * generated code, the data pointer will be the instruction preceding
+- * the next transfer segment.
+- */
+-
+- if (!check_address ((unsigned long) ncmd, sizeof (struct NCR53c7x0_cmd)) &&
+- ((insn >= ncmd->data_transfer_start &&
+- insn < ncmd->data_transfer_end) ||
+- (insn >= ncmd->residual &&
+- insn < (ncmd->residual +
+- sizeof(ncmd->residual))))) {
+- ptr = bus_to_virt(insn[3]);
+-
+- if ((buffers = cmd->use_sg)) {
+- for (offset = 0,
+- segment = (struct scatterlist *) cmd->request_buffer;
+- buffers && !((found = ((ptr >= (char *)page_address(segment->page)+segment->offset) &&
+- (ptr < ((char *)page_address(segment->page)+segment->offset+segment->length)))));
+- --buffers, offset += segment->length, ++segment)
+-#if 0
+- printk("scsi%d: comparing 0x%p to 0x%p\n",
+- cmd->device->host->host_no, saved, page_address(segment->page+segment->offset));
+-#else
+- ;
+-#endif
+- offset += ptr - ((char *)page_address(segment->page)+segment->offset);
+- } else {
+- found = 1;
+- offset = ptr - (char *) (cmd->request_buffer);
+- }
+- } else if ((insn >= hostdata->script +
+- hostdata->E_data_transfer / sizeof(u32)) &&
+- (insn <= hostdata->script +
+- hostdata->E_end_data_transfer / sizeof(u32))) {
+- found = 1;
+- offset = 0;
+- }
+- return found ? offset : -1;
+-}
+-
+-
+-
+-/*
+- * Function : void print_progress (Scsi_Cmnd *cmd)
+- *
+- * Purpose : print the current location of the saved data pointer
+- *
+- * Inputs : cmd - command we are interested in
+- *
+- */
+-
+-static void
+-print_progress (Scsi_Cmnd *cmd) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_cmd *ncmd =
+- (struct NCR53c7x0_cmd *) cmd->host_scribble;
+- int offset, i;
+- char *where;
+- u32 *ptr;
+- NCR53c7x0_local_setup (cmd->device->host);
+-
+- if (check_address ((unsigned long) ncmd,sizeof (struct NCR53c7x0_cmd)) == 0)
+- {
+- printk("\nNCR53c7x0_cmd fields:\n");
+- printk(" bounce.len=0x%x, addr=0x%0x, buf[]=0x%02x %02x %02x %02x\n",
+- ncmd->bounce.len, ncmd->bounce.addr, ncmd->bounce.buf[0],
+- ncmd->bounce.buf[1], ncmd->bounce.buf[2], ncmd->bounce.buf[3]);
+- printk(" result=%04x, cdb[0]=0x%02x\n", ncmd->result, ncmd->cmnd[0]);
+- }
+-
+- for (i = 0; i < 2; ++i) {
+- if (check_address ((unsigned long) ncmd,
+- sizeof (struct NCR53c7x0_cmd)) == -1)
+- continue;
+- if (!i) {
+- where = "saved";
+- ptr = bus_to_virt(ncmd->saved_data_pointer);
+- } else {
+- where = "active";
+- ptr = bus_to_virt (NCR53c7x0_read32 (DSP_REG) -
+- NCR53c7x0_insn_size (NCR53c7x0_read8 (DCMD_REG)) *
+- sizeof(u32));
+- }
+- offset = insn_to_offset (cmd, ptr);
+-
+- if (offset != -1)
+- printk ("scsi%d : %s data pointer at offset %d\n",
+- cmd->device->host->host_no, where, offset);
+- else {
+- int size;
+- printk ("scsi%d : can't determine %s data pointer offset\n",
+- cmd->device->host->host_no, where);
+- if (ncmd) {
+- size = print_insn (cmd->device->host,
+- bus_to_virt(ncmd->saved_data_pointer), "", 1);
+- print_insn (cmd->device->host,
+- bus_to_virt(ncmd->saved_data_pointer) + size * sizeof(u32),
+- "", 1);
+- }
+- }
+- }
+-}
+-
+-
+-static void
+-print_dsa (struct Scsi_Host *host, u32 *dsa, const char *prefix) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int i, len;
+- char *ptr;
+- Scsi_Cmnd *cmd;
+-
+- if (check_address ((unsigned long) dsa, hostdata->dsa_end -
+- hostdata->dsa_start) == -1) {
+- printk("scsi%d : bad dsa virt 0x%p\n", host->host_no, dsa);
+- return;
+- }
+- printk("%sscsi%d : dsa at phys 0x%lx (virt 0x%p)\n"
+- " + %d : dsa_msgout length = %u, data = 0x%x (virt 0x%p)\n" ,
+- prefix ? prefix : "",
+- host->host_no, virt_to_bus (dsa), dsa, hostdata->dsa_msgout,
+- dsa[hostdata->dsa_msgout / sizeof(u32)],
+- dsa[hostdata->dsa_msgout / sizeof(u32) + 1],
+- bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]));
+-
+- /*
+- * Only print messages if they're sane in length so we don't
+- * blow the kernel printk buffer on something which won't buy us
+- * anything.
+- */
+-
+- if (dsa[hostdata->dsa_msgout / sizeof(u32)] <
+- sizeof (hostdata->free->select))
+- for (i = dsa[hostdata->dsa_msgout / sizeof(u32)],
+- ptr = bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]);
+- i > 0 && !check_address ((unsigned long) ptr, 1);
+- ptr += len, i -= len) {
+- printk(" ");
+- len = spi_print_msg(ptr);
+- printk("\n");
+- if (!len)
+- break;
+- }
+-
+- printk(" + %d : select_indirect = 0x%x\n",
+- hostdata->dsa_select, dsa[hostdata->dsa_select / sizeof(u32)]);
+- cmd = (Scsi_Cmnd *) bus_to_virt(dsa[hostdata->dsa_cmnd / sizeof(u32)]);
+- printk(" + %d : dsa_cmnd = 0x%x ", hostdata->dsa_cmnd,
+- (u32) virt_to_bus(cmd));
+- /* XXX Maybe we should access cmd->host_scribble->result here. RGH */
+- if (cmd) {
+- printk(" result = 0x%x, target = %d, lun = %d, cmd = ",
+- cmd->result, cmd->device->id, cmd->device->lun);
+- __scsi_print_command(cmd->cmnd);
+- } else
+- printk("\n");
+- printk(" + %d : dsa_next = 0x%x\n", hostdata->dsa_next,
+- dsa[hostdata->dsa_next / sizeof(u32)]);
+- if (cmd) {
+- printk("scsi%d target %d : sxfer_sanity = 0x%x, scntl3_sanity = 0x%x\n"
+- " script : ",
+- host->host_no, cmd->device->id,
+- hostdata->sync[cmd->device->id].sxfer_sanity,
+- hostdata->sync[cmd->device->id].scntl3_sanity);
+- for (i = 0; i < (sizeof(hostdata->sync[cmd->device->id].script) / 4); ++i)
+- printk ("0x%x ", hostdata->sync[cmd->device->id].script[i]);
+- printk ("\n");
+- print_progress (cmd);
+- }
+-}
+-/*
+- * Function : void print_queues (Scsi_Host *host)
+- *
+- * Purpose : print the contents of the NCR issue and reconnect queues
+- *
+- * Inputs : host - SCSI host we are interested in
+- *
+- */
+-
+-static void
+-print_queues (struct Scsi_Host *host) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- u32 *dsa, *next_dsa;
+- volatile u32 *ncrcurrent;
+- int left;
+- Scsi_Cmnd *cmd, *next_cmd;
+- unsigned long flags;
+-
+- printk ("scsi%d : issue queue\n", host->host_no);
+-
+- for (left = host->can_queue, cmd = (Scsi_Cmnd *) hostdata->issue_queue;
+- left >= 0 && cmd;
+- cmd = next_cmd) {
+- next_cmd = (Scsi_Cmnd *) cmd->SCp.ptr;
+- local_irq_save(flags);
+- if (cmd->host_scribble) {
+- if (check_address ((unsigned long) (cmd->host_scribble),
+- sizeof (cmd->host_scribble)) == -1)
+- printk ("scsi%d: scsi pid %ld bad pointer to NCR53c7x0_cmd\n",
+- host->host_no, cmd->pid);
+- /* print_dsa does sanity check on address, no need to check */
+- else
+- print_dsa (host, ((struct NCR53c7x0_cmd *) cmd->host_scribble)
+- -> dsa, "");
+- } else
+- printk ("scsi%d : scsi pid %ld for target %d lun %d has no NCR53c7x0_cmd\n",
+- host->host_no, cmd->pid, cmd->device->id, cmd->device->lun);
+- local_irq_restore(flags);
+- }
+-
+- if (left <= 0) {
+- printk ("scsi%d : loop detected in issue queue\n",
+- host->host_no);
+- }
+-
+- /*
+- * Traverse the NCR reconnect and start DSA structures, printing out
+- * each element until we hit the end or detect a loop. Currently,
+- * the reconnect structure is a linked list; and the start structure
+- * is an array. Eventually, the reconnect structure will become a
+- * list as well, since this simplifies the code.
+- */
+-
+- printk ("scsi%d : schedule dsa array :\n", host->host_no);
+- for (left = host->can_queue, ncrcurrent = hostdata->schedule;
+- left > 0; ncrcurrent += 2, --left)
+- if (ncrcurrent[0] != hostdata->NOP_insn)
+-/* FIXME : convert pointer to dsa_begin to pointer to dsa. */
+- print_dsa (host, bus_to_virt (ncrcurrent[1] -
+- (hostdata->E_dsa_code_begin -
+- hostdata->E_dsa_code_template)), "");
+- printk ("scsi%d : end schedule dsa array\n", host->host_no);
+-
+- printk ("scsi%d : reconnect_dsa_head :\n", host->host_no);
+-
+- for (left = host->can_queue,
+- dsa = bus_to_virt (hostdata->reconnect_dsa_head);
+- left >= 0 && dsa;
+- dsa = next_dsa) {
+- local_irq_save(flags);
+- if (check_address ((unsigned long) dsa, sizeof(dsa)) == -1) {
+- printk ("scsi%d: bad DSA pointer 0x%p", host->host_no,
+- dsa);
+- next_dsa = NULL;
+- }
+- else
+- {
+- next_dsa = bus_to_virt(dsa[hostdata->dsa_next / sizeof(u32)]);
+- print_dsa (host, dsa, "");
+- }
+- local_irq_restore(flags);
+- }
+- printk ("scsi%d : end reconnect_dsa_head\n", host->host_no);
+- if (left < 0)
+- printk("scsi%d: possible loop in ncr reconnect list\n",
+- host->host_no);
+-}
+-
+-static void
+-print_lots (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- u32 *dsp_next, *dsp, *dsa, dbc_dcmd;
+- unsigned char dcmd, sbcl;
+- int i, size;
+- NCR53c7x0_local_setup(host);
+-
+- if ((dsp_next = bus_to_virt(NCR53c7x0_read32 (DSP_REG)))) {
+- dbc_dcmd = NCR53c7x0_read32(DBC_REG);
+- dcmd = (dbc_dcmd & 0xff000000) >> 24;
+- dsp = dsp_next - NCR53c7x0_insn_size(dcmd);
+- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+- sbcl = NCR53c7x0_read8 (SBCL_REG);
+-
+- /*
+- * For the 53c710, the following will report value 0 for SCNTL3
+- * and STEST0 - we don't have these registers.
+- */
+- printk ("scsi%d : DCMD|DBC=0x%x, DNAD=0x%x (virt 0x%p)\n"
+- " DSA=0x%lx (virt 0x%p)\n"
+- " DSPS=0x%x, TEMP=0x%x (virt 0x%p), DMODE=0x%x\n"
+- " SXFER=0x%x, SCNTL3=0x%x\n"
+- " %s%s%sphase=%s, %d bytes in SCSI FIFO\n"
+- " SCRATCH=0x%x, saved2_dsa=0x%0lx\n",
+- host->host_no, dbc_dcmd, NCR53c7x0_read32(DNAD_REG),
+- bus_to_virt(NCR53c7x0_read32(DNAD_REG)),
+- virt_to_bus(dsa), dsa,
+- NCR53c7x0_read32(DSPS_REG), NCR53c7x0_read32(TEMP_REG),
+- bus_to_virt (NCR53c7x0_read32(TEMP_REG)),
+- (int) NCR53c7x0_read8(hostdata->dmode),
+- (int) NCR53c7x0_read8(SXFER_REG),
+- ((hostdata->chip / 100) == 8) ?
+- (int) NCR53c7x0_read8(SCNTL3_REG_800) : 0,
+- (sbcl & SBCL_BSY) ? "BSY " : "",
+- (sbcl & SBCL_SEL) ? "SEL " : "",
+- (sbcl & SBCL_REQ) ? "REQ " : "",
+- sstat2_to_phase(NCR53c7x0_read8 (((hostdata->chip / 100) == 8) ?
+- SSTAT1_REG : SSTAT2_REG)),
+- (NCR53c7x0_read8 ((hostdata->chip / 100) == 8 ?
+- SSTAT1_REG : SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT,
+- ((hostdata->chip / 100) == 8) ? NCR53c7x0_read8 (STEST0_REG_800) :
+- NCR53c7x0_read32(SCRATCHA_REG_800),
+- hostdata->saved2_dsa);
+- printk ("scsi%d : DSP 0x%lx (virt 0x%p) ->\n", host->host_no,
+- virt_to_bus(dsp), dsp);
+- for (i = 6; i > 0; --i, dsp += size)
+- size = print_insn (host, dsp, "", 1);
+- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) {
+- if ((hostdata->chip / 100) == 8)
+- printk ("scsi%d : connected (SDID=0x%x, SSID=0x%x)\n",
+- host->host_no, NCR53c7x0_read8 (SDID_REG_800),
+- NCR53c7x0_read8 (SSID_REG_800));
+- else
+- printk ("scsi%d : connected (SDID=0x%x)\n",
+- host->host_no, NCR53c7x0_read8 (SDID_REG_700));
+- print_dsa (host, dsa, "");
+- }
+-
+-#if 1
+- print_queues (host);
+-#endif
+- }
+-}
+-
+-/*
+- * Function : static int shutdown (struct Scsi_Host *host)
+- *
+- * Purpose : does a clean (we hope) shutdown of the NCR SCSI
+- * chip. Use prior to dumping core, unloading the NCR driver,
+- *
+- * Returns : 0 on success
+- */
+-static int
+-shutdown (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- unsigned long flags;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- NCR53c7x0_local_setup(host);
+- local_irq_save(flags);
+-/* Get in a state where we can reset the SCSI bus */
+- ncr_halt (host);
+- ncr_scsi_reset (host);
+- hostdata->soft_reset(host);
+-
+- disable (host);
+- local_irq_restore(flags);
+- return 0;
+-}
+-
+-/*
+- * Function : void ncr_scsi_reset (struct Scsi_Host *host)
+- *
+- * Purpose : reset the SCSI bus.
+- */
+-
+-static void
+-ncr_scsi_reset (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- unsigned long flags;
+- NCR53c7x0_local_setup(host);
+- local_irq_save(flags);
+- NCR53c7x0_write8(SCNTL1_REG, SCNTL1_RST);
+- udelay(25); /* Minimum amount of time to assert RST */
+- NCR53c7x0_write8(SCNTL1_REG, 0);
+- local_irq_restore(flags);
+-}
+-
+-/*
+- * Function : void hard_reset (struct Scsi_Host *host)
+- *
+- */
+-
+-static void
+-hard_reset (struct Scsi_Host *host) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned long flags;
+- local_irq_save(flags);
+- ncr_scsi_reset(host);
+- NCR53c7x0_driver_init (host);
+- if (hostdata->soft_reset)
+- hostdata->soft_reset (host);
+- local_irq_restore(flags);
+-}
+-
+-
+-/*
+- * Function : Scsi_Cmnd *return_outstanding_commands (struct Scsi_Host *host,
+- * int free, int issue)
+- *
+- * Purpose : return a linked list (using the SCp.buffer field as next,
+- * so we don't perturb hostdata. We don't use a field of the
+- * NCR53c7x0_cmd structure since we may not have allocated one
+- * for the command causing the reset.) of Scsi_Cmnd structures that
+- * had propagated below the Linux issue queue level. If free is set,
+- * free the NCR53c7x0_cmd structures which are associated with
+- * the Scsi_Cmnd structures, and clean up any internal
+- * NCR lists that the commands were on. If issue is set,
+- * also return commands in the issue queue.
+- *
+- * Returns : linked list of commands
+- *
+- * NOTE : the caller should insure that the NCR chip is halted
+- * if the free flag is set.
+- */
+-
+-static Scsi_Cmnd *
+-return_outstanding_commands (struct Scsi_Host *host, int free, int issue) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- struct NCR53c7x0_cmd *c;
+- int i;
+- u32 *ncrcurrent;
+- Scsi_Cmnd *list = NULL, *tmp;
+- for (c = (struct NCR53c7x0_cmd *) hostdata->running_list; c;
+- c = (struct NCR53c7x0_cmd *) c->next) {
+- if (c->cmd->SCp.buffer) {
+- printk ("scsi%d : loop detected in running list!\n", host->host_no);
+- break;
+- } else {
+- printk ("Duh? Bad things happening in the NCR driver\n");
+- break;
+- }
+-
+- c->cmd->SCp.buffer = (struct scatterlist *) list;
+- list = c->cmd;
+- if (free) {
+- c->next = hostdata->free;
+- hostdata->free = c;
+- }
+- }
+-
+- if (free) {
+- for (i = 0, ncrcurrent = (u32 *) hostdata->schedule;
+- i < host->can_queue; ++i, ncrcurrent += 2) {
+- ncrcurrent[0] = hostdata->NOP_insn;
+- ncrcurrent[1] = 0xdeadbeef;
+- }
+- hostdata->ncrcurrent = NULL;
+- }
+-
+- if (issue) {
+- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp; tmp = tmp->next) {
+- if (tmp->SCp.buffer) {
+- printk ("scsi%d : loop detected in issue queue!\n",
+- host->host_no);
+- break;
+- }
+- tmp->SCp.buffer = (struct scatterlist *) list;
+- list = tmp;
+- }
+- if (free)
+- hostdata->issue_queue = NULL;
+-
+- }
+- return list;
+-}
+-
+-/*
+- * Function : static int disable (struct Scsi_Host *host)
+- *
+- * Purpose : disables the given NCR host, causing all commands
+- * to return a driver error. Call this so we can unload the
+- * module during development and try again. Eventually,
+- * we should be able to find clean workarounds for these
+- * problems.
+- *
+- * Inputs : host - hostadapter to twiddle
+- *
+- * Returns : 0 on success.
+- */
+-
+-static int
+-disable (struct Scsi_Host *host) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- unsigned long flags;
+- Scsi_Cmnd *nuke_list, *tmp;
+- local_irq_save(flags);
+- if (hostdata->state != STATE_HALTED)
+- ncr_halt (host);
+- nuke_list = return_outstanding_commands (host, 1 /* free */, 1 /* issue */);
+- hard_reset (host);
+- hostdata->state = STATE_DISABLED;
+- local_irq_restore(flags);
+- printk ("scsi%d : nuking commands\n", host->host_no);
+- for (; nuke_list; nuke_list = tmp) {
+- tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer;
+- nuke_list->result = DID_ERROR << 16;
+- nuke_list->scsi_done(nuke_list);
+- }
+- printk ("scsi%d : done. \n", host->host_no);
+- printk (KERN_ALERT "scsi%d : disabled. Unload and reload\n",
+- host->host_no);
+- return 0;
+-}
+-
+-/*
+- * Function : static int ncr_halt (struct Scsi_Host *host)
+- *
+- * Purpose : halts the SCSI SCRIPTS(tm) processor on the NCR chip
+- *
+- * Inputs : host - SCSI chip to halt
+- *
+- * Returns : 0 on success
+- */
+-
+-static int
+-ncr_halt (struct Scsi_Host *host) {
+- NCR53c7x0_local_declare();
+- unsigned long flags;
+- unsigned char istat, tmp;
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- int stage;
+- NCR53c7x0_local_setup(host);
+-
+- local_irq_save(flags);
+- /* Stage 0 : eat all interrupts
+- Stage 1 : set ABORT
+- Stage 2 : eat all but abort interrupts
+- Stage 3 : eat all interrupts
+- */
+- for (stage = 0;;) {
+- if (stage == 1) {
+- NCR53c7x0_write8(hostdata->istat, ISTAT_ABRT);
+- ++stage;
+- }
+- istat = NCR53c7x0_read8 (hostdata->istat);
+- if (istat & ISTAT_SIP) {
+- tmp = NCR53c7x0_read8(SSTAT0_REG);
+- } else if (istat & ISTAT_DIP) {
+- tmp = NCR53c7x0_read8(DSTAT_REG);
+- if (stage == 2) {
+- if (tmp & DSTAT_ABRT) {
+- NCR53c7x0_write8(hostdata->istat, 0);
+- ++stage;
+- } else {
+- printk(KERN_ALERT "scsi%d : could not halt NCR chip\n",
+- host->host_no);
+- disable (host);
+- }
+- }
+- }
+- if (!(istat & (ISTAT_SIP|ISTAT_DIP))) {
+- if (stage == 0)
+- ++stage;
+- else if (stage == 3)
+- break;
+- }
+- }
+- hostdata->state = STATE_HALTED;
+- local_irq_restore(flags);
+-#if 0
+- print_lots (host);
+-#endif
+- return 0;
+-}
+-
+-/*
+- * Function: event_name (int event)
+- *
+- * Purpose: map event enum into user-readable strings.
+- */
+-
+-static const char *
+-event_name (int event) {
+- switch (event) {
+- case EVENT_NONE: return "none";
+- case EVENT_ISSUE_QUEUE: return "to issue queue";
+- case EVENT_START_QUEUE: return "to start queue";
+- case EVENT_SELECT: return "selected";
+- case EVENT_DISCONNECT: return "disconnected";
+- case EVENT_RESELECT: return "reselected";
+- case EVENT_COMPLETE: return "completed";
+- case EVENT_IDLE: return "idle";
+- case EVENT_SELECT_FAILED: return "select failed";
+- case EVENT_BEFORE_SELECT: return "before select";
+- case EVENT_RESELECT_FAILED: return "reselect failed";
+- default: return "unknown";
+- }
+-}
+-
+-/*
+- * Function : void dump_events (struct Scsi_Host *host, count)
+- *
+- * Purpose : print last count events which have occurred.
+- */
+-static void
+-dump_events (struct Scsi_Host *host, int count) {
+- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+- host->hostdata[0];
+- struct NCR53c7x0_event event;
+- int i;
+- unsigned long flags;
+- if (hostdata->events) {
+- if (count > hostdata->event_size)
+- count = hostdata->event_size;
+- for (i = hostdata->event_index; count > 0;
+- i = (i ? i - 1 : hostdata->event_size -1), --count) {
+-/*
+- * By copying the event we're currently examining with interrupts
+- * disabled, we can do multiple printk(), etc. operations and
+- * still be guaranteed that they're happening on the same
+- * event structure.
+- */
+- local_irq_save(flags);
+-#if 0
+- event = hostdata->events[i];
+-#else
+- memcpy ((void *) &event, (void *) &(hostdata->events[i]),
+- sizeof(event));
+-#endif
+-
+- local_irq_restore(flags);
+- printk ("scsi%d : %s event %d at %ld secs %ld usecs target %d lun %d\n",
+- host->host_no, event_name (event.event), count,
+- (long) event.time.tv_sec, (long) event.time.tv_usec,
+- event.target, event.lun);
+- if (event.dsa)
+- printk (" event for dsa 0x%lx (virt 0x%p)\n",
+- virt_to_bus(event.dsa), event.dsa);
+- if (event.pid != -1) {
+- printk (" event for pid %ld ", event.pid);
+- __scsi_print_command (event.cmnd);
+- }
+- }
+- }
+-}
+-
+-/*
+- * Function: check_address
+- *
+- * Purpose: Check to see if a possibly corrupt pointer will fault the
+- * kernel.
+- *
+- * Inputs: addr - address; size - size of area
+- *
+- * Returns: 0 if area is OK, -1 on error.
+- *
+- * NOTES: should be implemented in terms of vverify on kernels
+- * that have it.
+- */
+-
+-static int
+-check_address (unsigned long addr, int size) {
+- return (virt_to_phys((void *)addr) < PAGE_SIZE || virt_to_phys((void *)(addr + size)) > virt_to_phys(high_memory) ? -1 : 0);
+-}
+-
+-#ifdef MODULE
+-int
+-NCR53c7x0_release(struct Scsi_Host *host) {
+- struct NCR53c7x0_hostdata *hostdata =
+- (struct NCR53c7x0_hostdata *) host->hostdata[0];
+- struct NCR53c7x0_cmd *cmd, *tmp;
+- shutdown (host);
+- if (host->irq != SCSI_IRQ_NONE)
+- {
+- int irq_count;
+- struct Scsi_Host *tmp;
+- for (irq_count = 0, tmp = first_host; tmp; tmp = tmp->next)
+- if (tmp->hostt == the_template && tmp->irq == host->irq)
+- ++irq_count;
+- if (irq_count == 1)
+- free_irq(host->irq, NULL);
+- }
+- if (host->dma_channel != DMA_NONE)
+- free_dma(host->dma_channel);
+- if (host->io_port)
+- release_region(host->io_port, host->n_io_port);
+-
+- for (cmd = (struct NCR53c7x0_cmd *) hostdata->free; cmd; cmd = tmp,
+- --hostdata->num_cmds) {
+- tmp = (struct NCR53c7x0_cmd *) cmd->next;
+- /*
+- * If we're going to loop, try to stop it to get a more accurate
+- * count of the leaked commands.
+- */
+- cmd->next = NULL;
+- if (cmd->free)
+- cmd->free ((void *) cmd->real, cmd->size);
+- }
+- if (hostdata->num_cmds)
+- printk ("scsi%d : leaked %d NCR53c7x0_cmd structures\n",
+- host->host_no, hostdata->num_cmds);
+-
+- vfree(hostdata->events);
+-
+- /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which
+- * XXX may be invalid (CONFIG_060_WRITETHROUGH)
+- */
+- kernel_set_cachemode((void *)hostdata, 8192, IOMAP_FULL_CACHING);
+- free_pages ((u32)hostdata, 1);
+- return 1;
+-}
+-#endif /* def MODULE */
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.h linux-2.6.22-591/drivers/scsi/53c7xx.h
+--- linux-2.6.22-570/drivers/scsi/53c7xx.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c7xx.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,1608 +0,0 @@
+-/*
+- * 53c710 driver. Modified from Drew Eckhardts driver
+- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+- *
+- * I have left the code for the 53c8xx family in here, because it didn't
+- * seem worth removing it. The possibility of IO_MAPPED chips rather
+- * than MEMORY_MAPPED remains, in case someone wants to add support for
+- * 53c710 chips on Intel PCs (some older machines have them on the
+- * motherboard).
+- *
+- * NOTE THERE MAY BE PROBLEMS WITH CASTS IN read8 AND Co.
+- */
+-
+-/*
+- * NCR 53c{7,8}0x0 driver, header file
+- *
+- * Sponsored by
+- * iX Multiuser Multitasking Magazine
+- * Hannover, Germany
+- * hm@ix.de
+- *
+- * Copyright 1993, 1994, 1995 Drew Eckhardt
+- * Visionary Computing
+- * (Unix and Linux consulting and custom programming)
+- * drew@PoohSticks.ORG
+- * +1 (303) 786-7975
+- *
+- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+- *
+- * PRE-ALPHA
+- *
+- * For more information, please consult
+- *
+- * NCR 53C700/53C700-66
+- * SCSI I/O Processor
+- * Data Manual
+- *
+- * NCR 53C810
+- * PCI-SCSI I/O Processor
+- * Data Manual
+- *
+- * NCR Microelectronics
+- * 1635 Aeroplaza Drive
+- * Colorado Springs, CO 80916
+- * +1 (719) 578-3400
+- *
+- * Toll free literature number
+- * +1 (800) 334-5454
+- *
+- */
+-
+-#ifndef NCR53c710_H
+-#define NCR53c710_H
+-
+-#ifndef HOSTS_C
+-
+-/* SCSI control 0 rw, default = 0xc0 */
+-#define SCNTL0_REG 0x00
+-#define SCNTL0_ARB1 0x80 /* 0 0 = simple arbitration */
+-#define SCNTL0_ARB2 0x40 /* 1 1 = full arbitration */
+-#define SCNTL0_STRT 0x20 /* Start Sequence */
+-#define SCNTL0_WATN 0x10 /* Select with ATN */
+-#define SCNTL0_EPC 0x08 /* Enable parity checking */
+-/* Bit 2 is reserved on 800 series chips */
+-#define SCNTL0_EPG_700 0x04 /* Enable parity generation */
+-#define SCNTL0_AAP 0x02 /* ATN/ on parity error */
+-#define SCNTL0_TRG 0x01 /* Target mode */
+-
+-/* SCSI control 1 rw, default = 0x00 */
+-
+-#define SCNTL1_REG 0x01
+-#define SCNTL1_EXC 0x80 /* Extra Clock Cycle of Data setup */
+-#define SCNTL1_ADB 0x40 /* contents of SODL on bus */
+-#define SCNTL1_ESR_700 0x20 /* Enable SIOP response to selection
+- and reselection */
+-#define SCNTL1_DHP_800 0x20 /* Disable halt on parity error or ATN
+- target mode only */
+-#define SCNTL1_CON 0x10 /* Connected */
+-#define SCNTL1_RST 0x08 /* SCSI RST/ */
+-#define SCNTL1_AESP 0x04 /* Force bad parity */
+-#define SCNTL1_SND_700 0x02 /* Start SCSI send */
+-#define SCNTL1_IARB_800 0x02 /* Immediate Arbitration, start
+- arbitration immediately after
+- busfree is detected */
+-#define SCNTL1_RCV_700 0x01 /* Start SCSI receive */
+-#define SCNTL1_SST_800 0x01 /* Start SCSI transfer */
+-
+-/* SCSI control 2 rw, */
+-
+-#define SCNTL2_REG_800 0x02
+-#define SCNTL2_800_SDU 0x80 /* SCSI disconnect unexpected */
+-
+-/* SCSI control 3 rw */
+-
+-#define SCNTL3_REG_800 0x03
+-#define SCNTL3_800_SCF_SHIFT 4
+-#define SCNTL3_800_SCF_MASK 0x70
+-#define SCNTL3_800_SCF2 0x40 /* Synchronous divisor */
+-#define SCNTL3_800_SCF1 0x20 /* 0x00 = SCLK/3 */
+-#define SCNTL3_800_SCF0 0x10 /* 0x10 = SCLK/1 */
+- /* 0x20 = SCLK/1.5
+- 0x30 = SCLK/2
+- 0x40 = SCLK/3 */
+-
+-#define SCNTL3_800_CCF_SHIFT 0
+-#define SCNTL3_800_CCF_MASK 0x07
+-#define SCNTL3_800_CCF2 0x04 /* 0x00 50.01 to 66 */
+-#define SCNTL3_800_CCF1 0x02 /* 0x01 16.67 to 25 */
+-#define SCNTL3_800_CCF0 0x01 /* 0x02 25.01 - 37.5
+- 0x03 37.51 - 50
+- 0x04 50.01 - 66 */
+-
+-/*
+- * SCSI destination ID rw - the appropriate bit is set for the selected
+- * target ID. This is written by the SCSI SCRIPTS processor.
+- * default = 0x00
+- */
+-#define SDID_REG_700 0x02
+-#define SDID_REG_800 0x06
+-
+-#define GP_REG_800 0x07 /* General purpose IO */
+-#define GP_800_IO1 0x02
+-#define GP_800_IO2 0x01
+-
+-/* SCSI interrupt enable rw, default = 0x00 */
+-#define SIEN_REG_700 0x03
+-#define SIEN0_REG_800 0x40
+-#define SIEN_MA 0x80 /* Phase mismatch (ini) or ATN (tgt) */
+-#define SIEN_FC 0x40 /* Function complete */
+-#define SIEN_700_STO 0x20 /* Selection or reselection timeout */
+-#define SIEN_800_SEL 0x20 /* Selected */
+-#define SIEN_700_SEL 0x10 /* Selected or reselected */
+-#define SIEN_800_RESEL 0x10 /* Reselected */
+-#define SIEN_SGE 0x08 /* SCSI gross error */
+-#define SIEN_UDC 0x04 /* Unexpected disconnect */
+-#define SIEN_RST 0x02 /* SCSI RST/ received */
+-#define SIEN_PAR 0x01 /* Parity error */
+-
+-/*
+- * SCSI chip ID rw
+- * NCR53c700 :
+- * When arbitrating, the highest bit is used, when reselection or selection
+- * occurs, the chip responds to all IDs for which a bit is set.
+- * default = 0x00
+- * NCR53c810 :
+- * Uses bit mapping
+- */
+-#define SCID_REG 0x04
+-/* Bit 7 is reserved on 800 series chips */
+-#define SCID_800_RRE 0x40 /* Enable response to reselection */
+-#define SCID_800_SRE 0x20 /* Enable response to selection */
+-/* Bits four and three are reserved on 800 series chips */
+-#define SCID_800_ENC_MASK 0x07 /* Encoded SCSI ID */
+-
+-/* SCSI transfer rw, default = 0x00 */
+-#define SXFER_REG 0x05
+-#define SXFER_DHP 0x80 /* Disable halt on parity */
+-
+-#define SXFER_TP2 0x40 /* Transfer period msb */
+-#define SXFER_TP1 0x20
+-#define SXFER_TP0 0x10 /* lsb */
+-#define SXFER_TP_MASK 0x70
+-/* FIXME : SXFER_TP_SHIFT == 5 is right for '8xx chips */
+-#define SXFER_TP_SHIFT 5
+-#define SXFER_TP_4 0x00 /* Divisors */
+-#define SXFER_TP_5 0x10<<1
+-#define SXFER_TP_6 0x20<<1
+-#define SXFER_TP_7 0x30<<1
+-#define SXFER_TP_8 0x40<<1
+-#define SXFER_TP_9 0x50<<1
+-#define SXFER_TP_10 0x60<<1
+-#define SXFER_TP_11 0x70<<1
+-
+-#define SXFER_MO3 0x08 /* Max offset msb */
+-#define SXFER_MO2 0x04
+-#define SXFER_MO1 0x02
+-#define SXFER_MO0 0x01 /* lsb */
+-#define SXFER_MO_MASK 0x0f
+-#define SXFER_MO_SHIFT 0
+-
+-/*
+- * SCSI output data latch rw
+- * The contents of this register are driven onto the SCSI bus when
+- * the Assert Data Bus bit of the SCNTL1 register is set and
+- * the CD, IO, and MSG bits of the SOCL register match the SCSI phase
+- */
+-#define SODL_REG_700 0x06
+-#define SODL_REG_800 0x54
+-
+-
+-/*
+- * SCSI output control latch rw, default = 0
+- * Note that when the chip is being manually programmed as an initiator,
+- * the MSG, CD, and IO bits must be set correctly for the phase the target
+- * is driving the bus in. Otherwise no data transfer will occur due to
+- * phase mismatch.
+- */
+-
+-#define SOCL_REG 0x07
+-#define SOCL_REQ 0x80 /* REQ */
+-#define SOCL_ACK 0x40 /* ACK */
+-#define SOCL_BSY 0x20 /* BSY */
+-#define SOCL_SEL 0x10 /* SEL */
+-#define SOCL_ATN 0x08 /* ATN */
+-#define SOCL_MSG 0x04 /* MSG */
+-#define SOCL_CD 0x02 /* C/D */
+-#define SOCL_IO 0x01 /* I/O */
+-
+-/*
+- * SCSI first byte received latch ro
+- * This register contains the first byte received during a block MOVE
+- * SCSI SCRIPTS instruction, including
+- *
+- * Initiator mode Target mode
+- * Message in Command
+- * Status Message out
+- * Data in Data out
+- *
+- * It also contains the selecting or reselecting device's ID and our
+- * ID.
+- *
+- * Note that this is the register the various IF conditionals can
+- * operate on.
+- */
+-#define SFBR_REG 0x08
+-
+-/*
+- * SCSI input data latch ro
+- * In initiator mode, data is latched into this register on the rising
+- * edge of REQ/. In target mode, data is latched on the rising edge of
+- * ACK/
+- */
+-#define SIDL_REG_700 0x09
+-#define SIDL_REG_800 0x50
+-
+-/*
+- * SCSI bus data lines ro
+- * This register reflects the instantaneous status of the SCSI data
+- * lines. Note that SCNTL0 must be set to disable parity checking,
+- * otherwise reading this register will latch new parity.
+- */
+-#define SBDL_REG_700 0x0a
+-#define SBDL_REG_800 0x58
+-
+-#define SSID_REG_800 0x0a
+-#define SSID_800_VAL 0x80 /* Exactly two bits asserted at sel */
+-#define SSID_800_ENCID_MASK 0x07 /* Device which performed operation */
+-
+-
+-/*
+- * SCSI bus control lines rw,
+- * instantaneous readout of control lines
+- */
+-#define SBCL_REG 0x0b
+-#define SBCL_REQ 0x80 /* REQ ro */
+-#define SBCL_ACK 0x40 /* ACK ro */
+-#define SBCL_BSY 0x20 /* BSY ro */
+-#define SBCL_SEL 0x10 /* SEL ro */
+-#define SBCL_ATN 0x08 /* ATN ro */
+-#define SBCL_MSG 0x04 /* MSG ro */
+-#define SBCL_CD 0x02 /* C/D ro */
+-#define SBCL_IO 0x01 /* I/O ro */
+-#define SBCL_PHASE_CMDOUT SBCL_CD
+-#define SBCL_PHASE_DATAIN SBCL_IO
+-#define SBCL_PHASE_DATAOUT 0
+-#define SBCL_PHASE_MSGIN (SBCL_CD|SBCL_IO|SBCL_MSG)
+-#define SBCL_PHASE_MSGOUT (SBCL_CD|SBCL_MSG)
+-#define SBCL_PHASE_STATIN (SBCL_CD|SBCL_IO)
+-#define SBCL_PHASE_MASK (SBCL_CD|SBCL_IO|SBCL_MSG)
+-/*
+- * Synchronous SCSI Clock Control bits
+- * 0 - set by DCNTL
+- * 1 - SCLK / 1.0
+- * 2 - SCLK / 1.5
+- * 3 - SCLK / 2.0
+- */
+-#define SBCL_SSCF1 0x02 /* wo, -66 only */
+-#define SBCL_SSCF0 0x01 /* wo, -66 only */
+-#define SBCL_SSCF_MASK 0x03
+-
+-/*
+- * XXX note : when reading the DSTAT and STAT registers to clear interrupts,
+- * insure that 10 clocks elapse between the two
+- */
+-/* DMA status ro */
+-#define DSTAT_REG 0x0c
+-#define DSTAT_DFE 0x80 /* DMA FIFO empty */
+-#define DSTAT_800_MDPE 0x40 /* Master Data Parity Error */
+-#define DSTAT_800_BF 0x20 /* Bus Fault */
+-#define DSTAT_ABRT 0x10 /* Aborted - set on error */
+-#define DSTAT_SSI 0x08 /* SCRIPTS single step interrupt */
+-#define DSTAT_SIR 0x04 /* SCRIPTS interrupt received -
+- set when INT instruction is
+- executed */
+-#define DSTAT_WTD 0x02 /* Watchdog timeout detected */
+-#define DSTAT_OPC 0x01 /* Illegal instruction */
+-#define DSTAT_800_IID 0x01 /* Same thing, different name */
+-
+-
+-/* NCR53c800 moves this stuff into SIST0 */
+-#define SSTAT0_REG 0x0d /* SCSI status 0 ro */
+-#define SIST0_REG_800 0x42
+-#define SSTAT0_MA 0x80 /* ini : phase mismatch,
+- * tgt : ATN/ asserted
+- */
+-#define SSTAT0_CMP 0x40 /* function complete */
+-#define SSTAT0_700_STO 0x20 /* Selection or reselection timeout */
+-#define SIST0_800_SEL 0x20 /* Selected */
+-#define SSTAT0_700_SEL 0x10 /* Selected or reselected */
+-#define SIST0_800_RSL 0x10 /* Reselected */
+-#define SSTAT0_SGE 0x08 /* SCSI gross error */
+-#define SSTAT0_UDC 0x04 /* Unexpected disconnect */
+-#define SSTAT0_RST 0x02 /* SCSI RST/ received */
+-#define SSTAT0_PAR 0x01 /* Parity error */
+-
+-/* And uses SSTAT0 for what was SSTAT1 */
+-
+-#define SSTAT1_REG 0x0e /* SCSI status 1 ro */
+-#define SSTAT1_ILF 0x80 /* SIDL full */
+-#define SSTAT1_ORF 0x40 /* SODR full */
+-#define SSTAT1_OLF 0x20 /* SODL full */
+-#define SSTAT1_AIP 0x10 /* Arbitration in progress */
+-#define SSTAT1_LOA 0x08 /* Lost arbitration */
+-#define SSTAT1_WOA 0x04 /* Won arbitration */
+-#define SSTAT1_RST 0x02 /* Instant readout of RST/ */
+-#define SSTAT1_SDP 0x01 /* Instant readout of SDP/ */
+-
+-#define SSTAT2_REG 0x0f /* SCSI status 2 ro */
+-#define SSTAT2_FF3 0x80 /* number of bytes in synchronous */
+-#define SSTAT2_FF2 0x40 /* data FIFO */
+-#define SSTAT2_FF1 0x20
+-#define SSTAT2_FF0 0x10
+-#define SSTAT2_FF_MASK 0xf0
+-#define SSTAT2_FF_SHIFT 4
+-
+-/*
+- * Latched signals, latched on the leading edge of REQ/ for initiators,
+- * ACK/ for targets.
+- */
+-#define SSTAT2_SDP 0x08 /* SDP */
+-#define SSTAT2_MSG 0x04 /* MSG */
+-#define SSTAT2_CD 0x02 /* C/D */
+-#define SSTAT2_IO 0x01 /* I/O */
+-#define SSTAT2_PHASE_CMDOUT SSTAT2_CD
+-#define SSTAT2_PHASE_DATAIN SSTAT2_IO
+-#define SSTAT2_PHASE_DATAOUT 0
+-#define SSTAT2_PHASE_MSGIN (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG)
+-#define SSTAT2_PHASE_MSGOUT (SSTAT2_CD|SSTAT2_MSG)
+-#define SSTAT2_PHASE_STATIN (SSTAT2_CD|SSTAT2_IO)
+-#define SSTAT2_PHASE_MASK (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG)
+-
+-
+-/* NCR53c700-66 only */
+-#define SCRATCHA_REG_00 0x10 /* through 0x13 Scratch A rw */
+-/* NCR53c710 and higher */
+-#define DSA_REG 0x10 /* DATA structure address */
+-
+-#define CTEST0_REG_700 0x14 /* Chip test 0 ro */
+-#define CTEST0_REG_800 0x18 /* Chip test 0 rw, general purpose */
+-/* 0x80 - 0x04 are reserved */
+-#define CTEST0_700_RTRG 0x02 /* Real target mode */
+-#define CTEST0_700_DDIR 0x01 /* Data direction, 1 =
+- * SCSI bus to host, 0 =
+- * host to SCSI.
+- */
+-
+-#define CTEST1_REG_700 0x15 /* Chip test 1 ro */
+-#define CTEST1_REG_800 0x19 /* Chip test 1 ro */
+-#define CTEST1_FMT3 0x80 /* Identify which byte lanes are empty */
+-#define CTEST1_FMT2 0x40 /* in the DMA FIFO */
+-#define CTEST1_FMT1 0x20
+-#define CTEST1_FMT0 0x10
+-
+-#define CTEST1_FFL3 0x08 /* Identify which bytes lanes are full */
+-#define CTEST1_FFL2 0x04 /* in the DMA FIFO */
+-#define CTEST1_FFL1 0x02
+-#define CTEST1_FFL0 0x01
+-
+-#define CTEST2_REG_700 0x16 /* Chip test 2 ro */
+-#define CTEST2_REG_800 0x1a /* Chip test 2 ro */
+-
+-#define CTEST2_800_DDIR 0x80 /* 1 = SCSI->host */
+-#define CTEST2_800_SIGP 0x40 /* A copy of SIGP in ISTAT.
+- Reading this register clears */
+-#define CTEST2_800_CIO 0x20 /* Configured as IO */.
+-#define CTEST2_800_CM 0x10 /* Configured as memory */
+-
+-/* 0x80 - 0x40 are reserved on 700 series chips */
+-#define CTEST2_700_SOFF 0x20 /* SCSI Offset Compare,
+- * As an initiator, this bit is
+- * one when the synchronous offset
+- * is zero, as a target this bit
+- * is one when the synchronous
+- * offset is at the maximum
+- * defined in SXFER
+- */
+-#define CTEST2_700_SFP 0x10 /* SCSI FIFO parity bit,
+- * reading CTEST3 unloads a byte
+- * from the FIFO and sets this
+- */
+-#define CTEST2_700_DFP 0x08 /* DMA FIFO parity bit,
+- * reading CTEST6 unloads a byte
+- * from the FIFO and sets this
+- */
+-#define CTEST2_TEOP 0x04 /* SCSI true end of process,
+- * indicates a totally finished
+- * transfer
+- */
+-#define CTEST2_DREQ 0x02 /* Data request signal */
+-/* 0x01 is reserved on 700 series chips */
+-#define CTEST2_800_DACK 0x01
+-
+-/*
+- * Chip test 3 ro
+- * Unloads the bottom byte of the eight deep SCSI synchronous FIFO,
+- * check SSTAT2 FIFO full bits to determine size. Note that a GROSS
+- * error results if a read is attempted on this register. Also note
+- * that 16 and 32 bit reads of this register will cause corruption.
+- */
+-#define CTEST3_REG_700 0x17
+-/* Chip test 3 rw */
+-#define CTEST3_REG_800 0x1b
+-#define CTEST3_800_V3 0x80 /* Chip revision */
+-#define CTEST3_800_V2 0x40
+-#define CTEST3_800_V1 0x20
+-#define CTEST3_800_V0 0x10
+-#define CTEST3_800_FLF 0x08 /* Flush DMA FIFO */
+-#define CTEST3_800_CLF 0x04 /* Clear DMA FIFO */
+-#define CTEST3_800_FM 0x02 /* Fetch mode pin */
+-/* bit 0 is reserved on 800 series chips */
+-
+-#define CTEST4_REG_700 0x18 /* Chip test 4 rw */
+-#define CTEST4_REG_800 0x21 /* Chip test 4 rw */
+-/* 0x80 is reserved on 700 series chips */
+-#define CTEST4_800_BDIS 0x80 /* Burst mode disable */
+-#define CTEST4_ZMOD 0x40 /* High impedance mode */
+-#define CTEST4_SZM 0x20 /* SCSI bus high impedance */
+-#define CTEST4_700_SLBE 0x10 /* SCSI loopback enabled */
+-#define CTEST4_800_SRTM 0x10 /* Shadow Register Test Mode */
+-#define CTEST4_700_SFWR 0x08 /* SCSI FIFO write enable,
+- * redirects writes from SODL
+- * to the SCSI FIFO.
+- */
+-#define CTEST4_800_MPEE 0x08 /* Enable parity checking
+- during master cycles on PCI
+- bus */
+-
+-/*
+- * These bits send the contents of the CTEST6 register to the appropriate
+- * byte lane of the 32 bit DMA FIFO. Normal operation is zero, otherwise
+- * the high bit means the low two bits select the byte lane.
+- */
+-#define CTEST4_FBL2 0x04
+-#define CTEST4_FBL1 0x02
+-#define CTEST4_FBL0 0x01
+-#define CTEST4_FBL_MASK 0x07
+-#define CTEST4_FBL_0 0x04 /* Select DMA FIFO byte lane 0 */
+-#define CTEST4_FBL_1 0x05 /* Select DMA FIFO byte lane 1 */
+-#define CTEST4_FBL_2 0x06 /* Select DMA FIFO byte lane 2 */
+-#define CTEST4_FBL_3 0x07 /* Select DMA FIFO byte lane 3 */
+-#define CTEST4_800_SAVE (CTEST4_800_BDIS)
+-
+-
+-#define CTEST5_REG_700 0x19 /* Chip test 5 rw */
+-#define CTEST5_REG_800 0x22 /* Chip test 5 rw */
+-/*
+- * Clock Address Incrementor. When set, it increments the
+- * DNAD register to the next bus size boundary. It automatically
+- * resets itself when the operation is complete.
+- */
+-#define CTEST5_ADCK 0x80
+-/*
+- * Clock Byte Counter. When set, it decrements the DBC register to
+- * the next bus size boundary.
+- */
+-#define CTEST5_BBCK 0x40
+-/*
+- * Reset SCSI Offset. Setting this bit to 1 clears the current offset
+- * pointer in the SCSI synchronous offset counter (SSTAT). This bit
+- * is set to 1 if a SCSI Gross Error Condition occurs. The offset should
+- * be cleared when a synchronous transfer fails. When written, it is
+- * automatically cleared after the SCSI synchronous offset counter is
+- * reset.
+- */
+-/* Bit 5 is reserved on 800 series chips */
+-#define CTEST5_700_ROFF 0x20
+-/*
+- * Master Control for Set or Reset pulses. When 1, causes the low
+- * four bits of register to set when set, 0 causes the low bits to
+- * clear when set.
+- */
+-#define CTEST5_MASR 0x10
+-#define CTEST5_DDIR 0x08 /* DMA direction */
+-/*
+- * Bits 2-0 are reserved on 800 series chips
+- */
+-#define CTEST5_700_EOP 0x04 /* End of process */
+-#define CTEST5_700_DREQ 0x02 /* Data request */
+-#define CTEST5_700_DACK 0x01 /* Data acknowledge */
+-
+-/*
+- * Chip test 6 rw - writing to this register writes to the byte
+- * lane in the DMA FIFO as determined by the FBL bits in the CTEST4
+- * register.
+- */
+-#define CTEST6_REG_700 0x1a
+-#define CTEST6_REG_800 0x23
+-
+-#define CTEST7_REG 0x1b /* Chip test 7 rw */
+-/* 0x80 - 0x40 are reserved on NCR53c700 and NCR53c700-66 chips */
+-#define CTEST7_10_CDIS 0x80 /* Cache burst disable */
+-#define CTEST7_10_SC1 0x40 /* Snoop control bits */
+-#define CTEST7_10_SC0 0x20
+-#define CTEST7_10_SC_MASK 0x60
+-/* 0x20 is reserved on the NCR53c700 */
+-#define CTEST7_0060_FM 0x20 /* Fetch mode */
+-#define CTEST7_STD 0x10 /* Selection timeout disable */
+-#define CTEST7_DFP 0x08 /* DMA FIFO parity bit for CTEST6 */
+-#define CTEST7_EVP 0x04 /* 1 = host bus even parity, 0 = odd */
+-#define CTEST7_10_TT1 0x02 /* Transfer type */
+-#define CTEST7_00_DC 0x02 /* Set to drive DC low during instruction
+- fetch */
+-#define CTEST7_DIFF 0x01 /* Differential mode */
+-
+-#define CTEST7_SAVE ( CTEST7_EVP | CTEST7_DIFF )
+-
+-
+-#define TEMP_REG 0x1c /* through 0x1f Temporary stack rw */
+-
+-#define DFIFO_REG 0x20 /* DMA FIFO rw */
+-/*
+- * 0x80 is reserved on the NCR53c710, the CLF and FLF bits have been
+- * moved into the CTEST8 register.
+- */
+-#define DFIFO_00_FLF 0x80 /* Flush DMA FIFO to memory */
+-#define DFIFO_00_CLF 0x40 /* Clear DMA and SCSI FIFOs */
+-#define DFIFO_BO6 0x40
+-#define DFIFO_BO5 0x20
+-#define DFIFO_BO4 0x10
+-#define DFIFO_BO3 0x08
+-#define DFIFO_BO2 0x04
+-#define DFIFO_BO1 0x02
+-#define DFIFO_BO0 0x01
+-#define DFIFO_10_BO_MASK 0x7f /* 7 bit counter */
+-#define DFIFO_00_BO_MASK 0x3f /* 6 bit counter */
+-
+-/*
+- * Interrupt status rw
+- * Note that this is the only register which can be read while SCSI
+- * SCRIPTS are being executed.
+- */
+-#define ISTAT_REG_700 0x21
+-#define ISTAT_REG_800 0x14
+-#define ISTAT_ABRT 0x80 /* Software abort, write
+- *1 to abort, wait for interrupt. */
+-/* 0x40 and 0x20 are reserved on NCR53c700 and NCR53c700-66 chips */
+-#define ISTAT_10_SRST 0x40 /* software reset */
+-#define ISTAT_10_SIGP 0x20 /* signal script */
+-/* 0x10 is reserved on NCR53c700 series chips */
+-#define ISTAT_800_SEM 0x10 /* semaphore */
+-#define ISTAT_CON 0x08 /* 1 when connected */
+-#define ISTAT_800_INTF 0x04 /* Interrupt on the fly */
+-#define ISTAT_700_PRE 0x04 /* Pointer register empty.
+- * Set to 1 when DSPS and DSP
+- * registers are empty in pipeline
+- * mode, always set otherwise.
+- */
+-#define ISTAT_SIP 0x02 /* SCSI interrupt pending from
+- * SCSI portion of SIOP see
+- * SSTAT0
+- */
+-#define ISTAT_DIP 0x01 /* DMA interrupt pending
+- * see DSTAT
+- */
+-
+-/* NCR53c700-66 and NCR53c710 only */
+-#define CTEST8_REG 0x22 /* Chip test 8 rw */
+-#define CTEST8_0066_EAS 0x80 /* Enable alternate SCSI clock,
+- * ie read from SCLK/ rather than CLK/
+- */
+-#define CTEST8_0066_EFM 0x40 /* Enable fetch and master outputs */
+-#define CTEST8_0066_GRP 0x20 /* Generate Receive Parity for
+- * pass through. This insures that
+- * bad parity won't reach the host
+- * bus.
+- */
+-#define CTEST8_0066_TE 0x10 /* TolerANT enable. Enable
+- * active negation, should only
+- * be used for slow SCSI
+- * non-differential.
+- */
+-#define CTEST8_0066_HSC 0x08 /* Halt SCSI clock */
+-#define CTEST8_0066_SRA 0x04 /* Shorten REQ/ACK filtering,
+- * must be set for fast SCSI-II
+- * speeds.
+- */
+-#define CTEST8_0066_DAS 0x02 /* Disable automatic target/initiator
+- * switching.
+- */
+-#define CTEST8_0066_LDE 0x01 /* Last disconnect enable.
+- * The status of pending
+- * disconnect is maintained by
+- * the core, eliminating
+- * the possibility of missing a
+- * selection or reselection
+- * while waiting to fetch a
+- * WAIT DISCONNECT opcode.
+- */
+-
+-#define CTEST8_10_V3 0x80 /* Chip revision */
+-#define CTEST8_10_V2 0x40
+-#define CTEST8_10_V1 0x20
+-#define CTEST8_10_V0 0x10
+-#define CTEST8_10_V_MASK 0xf0
+-#define CTEST8_10_FLF 0x08 /* Flush FIFOs */
+-#define CTEST8_10_CLF 0x04 /* Clear FIFOs */
+-#define CTEST8_10_FM 0x02 /* Fetch pin mode */
+-#define CTEST8_10_SM 0x01 /* Snoop pin mode */
+-
+-
+-/*
+- * The CTEST9 register may be used to differentiate between a
+- * NCR53c700 and a NCR53c710.
+- *
+- * Write 0xff to this register.
+- * Read it.
+- * If the contents are 0xff, it is a NCR53c700
+- * If the contents are 0x00, it is a NCR53c700-66 first revision
+- * If the contents are some other value, it is some other NCR53c700-66
+- */
+-#define CTEST9_REG_00 0x23 /* Chip test 9 ro */
+-#define LCRC_REG_10 0x23
+-
+-/*
+- * 0x24 through 0x27 are the DMA byte counter register. Instructions
+- * write their high 8 bits into the DCMD register, the low 24 bits into
+- * the DBC register.
+- *
+- * Function is dependent on the command type being executed.
+- */
+-
+-
+-#define DBC_REG 0x24
+-/*
+- * For Block Move Instructions, DBC is a 24 bit quantity representing
+- * the number of bytes to transfer.
+- * For Transfer Control Instructions, DBC is bit fielded as follows :
+- */
+-/* Bits 20 - 23 should be clear */
+-#define DBC_TCI_TRUE (1 << 19) /* Jump when true */
+-#define DBC_TCI_COMPARE_DATA (1 << 18) /* Compare data */
+-#define DBC_TCI_COMPARE_PHASE (1 << 17) /* Compare phase with DCMD field */
+-#define DBC_TCI_WAIT_FOR_VALID (1 << 16) /* Wait for REQ */
+-/* Bits 8 - 15 are reserved on some implementations ? */
+-#define DBC_TCI_MASK_MASK 0xff00 /* Mask for data compare */
+-#define DBC_TCI_MASK_SHIFT 8
+-#define DBC_TCI_DATA_MASK 0xff /* Data to be compared */
+-#define DBC_TCI_DATA_SHIFT 0
+-
+-#define DBC_RWRI_IMMEDIATE_MASK 0xff00 /* Immediate data */
+-#define DBC_RWRI_IMMEDIATE_SHIFT 8 /* Amount to shift */
+-#define DBC_RWRI_ADDRESS_MASK 0x3f0000 /* Register address */
+-#define DBC_RWRI_ADDRESS_SHIFT 16
+-
+-
+-/*
+- * DMA command r/w
+- */
+-#define DCMD_REG 0x27
+-#define DCMD_TYPE_MASK 0xc0 /* Masks off type */
+-#define DCMD_TYPE_BMI 0x00 /* Indicates a Block Move instruction */
+-#define DCMD_BMI_IO 0x01 /* I/O, CD, and MSG bits selecting */
+-#define DCMD_BMI_CD 0x02 /* the phase for the block MOVE */
+-#define DCMD_BMI_MSG 0x04 /* instruction */
+-
+-#define DCMD_BMI_OP_MASK 0x18 /* mask for opcode */
+-#define DCMD_BMI_OP_MOVE_T 0x00 /* MOVE */
+-#define DCMD_BMI_OP_MOVE_I 0x08 /* MOVE Initiator */
+-
+-#define DCMD_BMI_INDIRECT 0x20 /* Indirect addressing */
+-
+-#define DCMD_TYPE_TCI 0x80 /* Indicates a Transfer Control
+- instruction */
+-#define DCMD_TCI_IO 0x01 /* I/O, CD, and MSG bits selecting */
+-#define DCMD_TCI_CD 0x02 /* the phase for the block MOVE */
+-#define DCMD_TCI_MSG 0x04 /* instruction */
+-#define DCMD_TCI_OP_MASK 0x38 /* mask for opcode */
+-#define DCMD_TCI_OP_JUMP 0x00 /* JUMP */
+-#define DCMD_TCI_OP_CALL 0x08 /* CALL */
+-#define DCMD_TCI_OP_RETURN 0x10 /* RETURN */
+-#define DCMD_TCI_OP_INT 0x18 /* INT */
+-
+-#define DCMD_TYPE_RWRI 0x40 /* Indicates I/O or register Read/Write
+- instruction */
+-#define DCMD_RWRI_OPC_MASK 0x38 /* Opcode mask */
+-#define DCMD_RWRI_OPC_WRITE 0x28 /* Write SFBR to register */
+-#define DCMD_RWRI_OPC_READ 0x30 /* Read register to SFBR */
+-#define DCMD_RWRI_OPC_MODIFY 0x38 /* Modify in place */
+-
+-#define DCMD_RWRI_OP_MASK 0x07
+-#define DCMD_RWRI_OP_MOVE 0x00
+-#define DCMD_RWRI_OP_SHL 0x01
+-#define DCMD_RWRI_OP_OR 0x02
+-#define DCMD_RWRI_OP_XOR 0x03
+-#define DCMD_RWRI_OP_AND 0x04
+-#define DCMD_RWRI_OP_SHR 0x05
+-#define DCMD_RWRI_OP_ADD 0x06
+-#define DCMD_RWRI_OP_ADDC 0x07
+-
+-#define DCMD_TYPE_MMI 0xc0 /* Indicates a Memory Move instruction
+- (three words) */
+-
+-
+-#define DNAD_REG 0x28 /* through 0x2b DMA next address for
+- data */
+-#define DSP_REG 0x2c /* through 0x2f DMA SCRIPTS pointer rw */
+-#define DSPS_REG 0x30 /* through 0x33 DMA SCRIPTS pointer
+- save rw */
+-#define DMODE_REG_00 0x34 /* DMA mode rw */
+-#define DMODE_00_BL1 0x80 /* Burst length bits */
+-#define DMODE_00_BL0 0x40
+-#define DMODE_BL_MASK 0xc0
+-/* Burst lengths (800) */
+-#define DMODE_BL_2 0x00 /* 2 transfer */
+-#define DMODE_BL_4 0x40 /* 4 transfers */
+-#define DMODE_BL_8 0x80 /* 8 transfers */
+-#define DMODE_BL_16 0xc0 /* 16 transfers */
+-
+-#define DMODE_10_BL_1 0x00 /* 1 transfer */
+-#define DMODE_10_BL_2 0x40 /* 2 transfers */
+-#define DMODE_10_BL_4 0x80 /* 4 transfers */
+-#define DMODE_10_BL_8 0xc0 /* 8 transfers */
+-#define DMODE_10_FC2 0x20 /* Driven to FC2 pin */
+-#define DMODE_10_FC1 0x10 /* Driven to FC1 pin */
+-#define DMODE_710_PD 0x08 /* Program/data on FC0 pin */
+-#define DMODE_710_UO 0x02 /* User prog. output */
+-
+-#define DMODE_700_BW16 0x20 /* Host buswidth = 16 */
+-#define DMODE_700_286 0x10 /* 286 mode */
+-#define DMODE_700_IOM 0x08 /* Transfer to IO port */
+-#define DMODE_700_FAM 0x04 /* Fixed address mode */
+-#define DMODE_700_PIPE 0x02 /* Pipeline mode disables
+- * automatic fetch / exec
+- */
+-#define DMODE_MAN 0x01 /* Manual start mode,
+- * requires a 1 to be written
+- * to the start DMA bit in the DCNTL
+- * register to run scripts
+- */
+-
+-#define DMODE_700_SAVE ( DMODE_00_BL_MASK | DMODE_00_BW16 | DMODE_00_286 )
+-
+-/* NCR53c800 series only */
+-#define SCRATCHA_REG_800 0x34 /* through 0x37 Scratch A rw */
+-/* NCR53c710 only */
+-#define SCRATCHB_REG_10 0x34 /* through 0x37 scratch B rw */
+-
+-#define DMODE_REG_10 0x38 /* DMA mode rw, NCR53c710 and newer */
+-#define DMODE_800_SIOM 0x20 /* Source IO = 1 */
+-#define DMODE_800_DIOM 0x10 /* Destination IO = 1 */
+-#define DMODE_800_ERL 0x08 /* Enable Read Line */
+-
+-/* 35-38 are reserved on 700 and 700-66 series chips */
+-#define DIEN_REG 0x39 /* DMA interrupt enable rw */
+-/* 0x80, 0x40, and 0x20 are reserved on 700-series chips */
+-#define DIEN_800_MDPE 0x40 /* Master data parity error */
+-#define DIEN_800_BF 0x20 /* BUS fault */
+-#define DIEN_700_BF 0x20 /* BUS fault */
+-#define DIEN_ABRT 0x10 /* Enable aborted interrupt */
+-#define DIEN_SSI 0x08 /* Enable single step interrupt */
+-#define DIEN_SIR 0x04 /* Enable SCRIPTS INT command
+- * interrupt
+- */
+-/* 0x02 is reserved on 800 series chips */
+-#define DIEN_700_WTD 0x02 /* Enable watchdog timeout interrupt */
+-#define DIEN_700_OPC 0x01 /* Enable illegal instruction
+- * interrupt
+- */
+-#define DIEN_800_IID 0x01 /* Same meaning, different name */
+-
+-/*
+- * DMA watchdog timer rw
+- * set in 16 CLK input periods.
+- */
+-#define DWT_REG 0x3a
+-
+-/* DMA control rw */
+-#define DCNTL_REG 0x3b
+-#define DCNTL_700_CF1 0x80 /* Clock divisor bits */
+-#define DCNTL_700_CF0 0x40
+-#define DCNTL_700_CF_MASK 0xc0
+-/* Clock divisors Divisor SCLK range (MHZ) */
+-#define DCNTL_700_CF_2 0x00 /* 2.0 37.51-50.00 */
+-#define DCNTL_700_CF_1_5 0x40 /* 1.5 25.01-37.50 */
+-#define DCNTL_700_CF_1 0x80 /* 1.0 16.67-25.00 */
+-#define DCNTL_700_CF_3 0xc0 /* 3.0 50.01-66.67 (53c700-66) */
+-
+-#define DCNTL_700_S16 0x20 /* Load scripts 16 bits at a time */
+-#define DCNTL_SSM 0x10 /* Single step mode */
+-#define DCNTL_700_LLM 0x08 /* Low level mode, can only be set
+- * after selection */
+-#define DCNTL_800_IRQM 0x08 /* Totem pole IRQ pin */
+-#define DCNTL_STD 0x04 /* Start DMA / SCRIPTS */
+-/* 0x02 is reserved */
+-#define DCNTL_00_RST 0x01 /* Software reset, resets everything
+- * but 286 mode bit in DMODE. On the
+- * NCR53c710, this bit moved to CTEST8
+- */
+-#define DCNTL_10_COM 0x01 /* 700 software compatibility mode */
+-#define DCNTL_10_EA 0x20 /* Enable Ack - needed for MVME16x */
+-
+-#define DCNTL_700_SAVE ( DCNTL_CF_MASK | DCNTL_S16)
+-
+-
+-/* NCR53c700-66 only */
+-#define SCRATCHB_REG_00 0x3c /* through 0x3f scratch b rw */
+-#define SCRATCHB_REG_800 0x5c /* through 0x5f scratch b rw */
+-/* NCR53c710 only */
+-#define ADDER_REG_10 0x3c /* Adder, NCR53c710 only */
+-
+-#define SIEN1_REG_800 0x41
+-#define SIEN1_800_STO 0x04 /* selection/reselection timeout */
+-#define SIEN1_800_GEN 0x02 /* general purpose timer */
+-#define SIEN1_800_HTH 0x01 /* handshake to handshake */
+-
+-#define SIST1_REG_800 0x43
+-#define SIST1_800_STO 0x04 /* selection/reselection timeout */
+-#define SIST1_800_GEN 0x02 /* general purpose timer */
+-#define SIST1_800_HTH 0x01 /* handshake to handshake */
+-
+-#define SLPAR_REG_800 0x44 /* Parity */
+-
+-#define MACNTL_REG_800 0x46 /* Memory access control */
+-#define MACNTL_800_TYP3 0x80
+-#define MACNTL_800_TYP2 0x40
+-#define MACNTL_800_TYP1 0x20
+-#define MACNTL_800_TYP0 0x10
+-#define MACNTL_800_DWR 0x08
+-#define MACNTL_800_DRD 0x04
+-#define MACNTL_800_PSCPT 0x02
+-#define MACNTL_800_SCPTS 0x01
+-
+-#define GPCNTL_REG_800 0x47 /* General Purpose Pin Control */
+-
+-/* Timeouts are expressed such that 0=off, 1=100us, doubling after that */
+-#define STIME0_REG_800 0x48 /* SCSI Timer Register 0 */
+-#define STIME0_800_HTH_MASK 0xf0 /* Handshake to Handshake timeout */
+-#define STIME0_800_HTH_SHIFT 4
+-#define STIME0_800_SEL_MASK 0x0f /* Selection timeout */
+-#define STIME0_800_SEL_SHIFT 0
+-
+-#define STIME1_REG_800 0x49
+-#define STIME1_800_GEN_MASK 0x0f /* General purpose timer */
+-
+-#define RESPID_REG_800 0x4a /* Response ID, bit fielded. 8
+- bits on narrow chips, 16 on WIDE */
+-
+-#define STEST0_REG_800 0x4c
+-#define STEST0_800_SLT 0x08 /* Selection response logic test */
+-#define STEST0_800_ART 0x04 /* Arbitration priority encoder test */
+-#define STEST0_800_SOZ 0x02 /* Synchronous offset zero */
+-#define STEST0_800_SOM 0x01 /* Synchronous offset maximum */
+-
+-#define STEST1_REG_800 0x4d
+-#define STEST1_800_SCLK 0x80 /* Disable SCSI clock */
+-
+-#define STEST2_REG_800 0x4e
+-#define STEST2_800_SCE 0x80 /* Enable SOCL/SODL */
+-#define STEST2_800_ROF 0x40 /* Reset SCSI sync offset */
+-#define STEST2_800_SLB 0x10 /* Enable SCSI loopback mode */
+-#define STEST2_800_SZM 0x08 /* SCSI high impedance mode */
+-#define STEST2_800_EXT 0x02 /* Extend REQ/ACK filter 30 to 60ns */
+-#define STEST2_800_LOW 0x01 /* SCSI low level mode */
+-
+-#define STEST3_REG_800 0x4f
+-#define STEST3_800_TE 0x80 /* Enable active negation */
+-#define STEST3_800_STR 0x40 /* SCSI FIFO test read */
+-#define STEST3_800_HSC 0x20 /* Halt SCSI clock */
+-#define STEST3_800_DSI 0x10 /* Disable single initiator response */
+-#define STEST3_800_TTM 0x04 /* Time test mode */
+-#define STEST3_800_CSF 0x02 /* Clear SCSI FIFO */
+-#define STEST3_800_STW 0x01 /* SCSI FIFO test write */
+-
+-#define OPTION_PARITY 0x1 /* Enable parity checking */
+-#define OPTION_TAGGED_QUEUE 0x2 /* Enable SCSI-II tagged queuing */
+-#define OPTION_700 0x8 /* Always run NCR53c700 scripts */
+-#define OPTION_INTFLY 0x10 /* Use INTFLY interrupts */
+-#define OPTION_DEBUG_INTR 0x20 /* Debug interrupts */
+-#define OPTION_DEBUG_INIT_ONLY 0x40 /* Run initialization code and
+- simple test code, return
+- DID_NO_CONNECT if any SCSI
+- commands are attempted. */
+-#define OPTION_DEBUG_READ_ONLY 0x80 /* Return DID_ERROR if any
+- SCSI write is attempted */
+-#define OPTION_DEBUG_TRACE 0x100 /* Animated trace mode, print
+- each address and instruction
+- executed to debug buffer. */
+-#define OPTION_DEBUG_SINGLE 0x200 /* stop after executing one
+- instruction */
+-#define OPTION_SYNCHRONOUS 0x400 /* Enable sync SCSI. */
+-#define OPTION_MEMORY_MAPPED 0x800 /* NCR registers have valid
+- memory mapping */
+-#define OPTION_IO_MAPPED 0x1000 /* NCR registers have valid
+- I/O mapping */
+-#define OPTION_DEBUG_PROBE_ONLY 0x2000 /* Probe only, don't even init */
+-#define OPTION_DEBUG_TESTS_ONLY 0x4000 /* Probe, init, run selected tests */
+-#define OPTION_DEBUG_TEST0 0x08000 /* Run test 0 */
+-#define OPTION_DEBUG_TEST1 0x10000 /* Run test 1 */
+-#define OPTION_DEBUG_TEST2 0x20000 /* Run test 2 */
+-#define OPTION_DEBUG_DUMP 0x40000 /* Dump commands */
+-#define OPTION_DEBUG_TARGET_LIMIT 0x80000 /* Only talk to target+luns specified */
+-#define OPTION_DEBUG_NCOMMANDS_LIMIT 0x100000 /* Limit the number of commands */
+-#define OPTION_DEBUG_SCRIPT 0x200000 /* Print when checkpoints are passed */
+-#define OPTION_DEBUG_FIXUP 0x400000 /* print fixup values */
+-#define OPTION_DEBUG_DSA 0x800000
+-#define OPTION_DEBUG_CORRUPTION 0x1000000 /* Detect script corruption */
+-#define OPTION_DEBUG_SDTR 0x2000000 /* Debug SDTR problem */
+-#define OPTION_DEBUG_MISMATCH 0x4000000 /* Debug phase mismatches */
+-#define OPTION_DISCONNECT 0x8000000 /* Allow disconnect */
+-#define OPTION_DEBUG_DISCONNECT 0x10000000
+-#define OPTION_ALWAYS_SYNCHRONOUS 0x20000000 /* Negotiate sync. transfers
+- on power up */
+-#define OPTION_DEBUG_QUEUES 0x80000000
+-#define OPTION_DEBUG_ALLOCATION 0x100000000LL
+-#define OPTION_DEBUG_SYNCHRONOUS 0x200000000LL /* Sanity check SXFER and
+- SCNTL3 registers */
+-#define OPTION_NO_ASYNC 0x400000000LL /* Don't automagically send
+- SDTR for async transfers when
+- we haven't been told to do
+- a synchronous transfer. */
+-#define OPTION_NO_PRINT_RACE 0x800000000LL /* Don't print message when
+- the reselect/WAIT DISCONNECT
+- race condition hits */
+-#if !defined(PERM_OPTIONS)
+-#define PERM_OPTIONS 0
+-#endif
+-
+-/*
+- * Some data which is accessed by the NCR chip must be 4-byte aligned.
+- * For some hosts the default is less than that (eg. 68K uses 2-byte).
+- * Alignment has only been forced where it is important; also if one
+- * 32 bit structure field is aligned then it is assumed that following
+- * 32 bit fields are also aligned. Take care when adding fields
+- * which are other than 32 bit.
+- */
+-
+-struct NCR53c7x0_synchronous {
+- u32 select_indirect /* Value used for indirect selection */
+- __attribute__ ((aligned (4)));
+- u32 sscf_710; /* Used to set SSCF bits for 710 */
+- u32 script[8]; /* Size ?? Script used when target is
+- reselected */
+- unsigned char synchronous_want[5]; /* Per target desired SDTR */
+-/*
+- * Set_synchronous programs these, select_indirect and current settings after
+- * int_debug_should show a match.
+- */
+- unsigned char sxfer_sanity, scntl3_sanity;
+-};
+-
+-#define CMD_FLAG_SDTR 1 /* Initiating synchronous
+- transfer negotiation */
+-#define CMD_FLAG_WDTR 2 /* Initiating wide transfer
+- negotiation */
+-#define CMD_FLAG_DID_SDTR 4 /* did SDTR */
+-#define CMD_FLAG_DID_WDTR 8 /* did WDTR */
+-
+-struct NCR53c7x0_table_indirect {
+- u32 count;
+- void *address;
+-};
+-
+-enum ncr_event {
+- EVENT_NONE = 0,
+-/*
+- * Order is IMPORTANT, since these must correspond to the event interrupts
+- * in 53c7,8xx.scr
+- */
+-
+- EVENT_ISSUE_QUEUE = 0x5000000, /* 0 Command was added to issue queue */
+- EVENT_START_QUEUE, /* 1 Command moved to start queue */
+- EVENT_SELECT, /* 2 Command completed selection */
+- EVENT_DISCONNECT, /* 3 Command disconnected */
+- EVENT_RESELECT, /* 4 Command reselected */
+- EVENT_COMPLETE, /* 5 Command completed */
+- EVENT_IDLE, /* 6 */
+- EVENT_SELECT_FAILED, /* 7 */
+- EVENT_BEFORE_SELECT, /* 8 */
+- EVENT_RESELECT_FAILED /* 9 */
+-};
+-
+-struct NCR53c7x0_event {
+- enum ncr_event event; /* What type of event */
+- unsigned char target;
+- unsigned char lun;
+- struct timeval time;
+- u32 *dsa; /* What's in the DSA register now (virt) */
+-/*
+- * A few things from that SCSI pid so we know what happened after
+- * the Scsi_Cmnd structure in question may have disappeared.
+- */
+- unsigned long pid; /* The SCSI PID which caused this
+- event */
+- unsigned char cmnd[12];
+-};
+-
+-/*
+- * Things in the NCR53c7x0_cmd structure are split into two parts :
+- *
+- * 1. A fixed portion, for things which are not accessed directly by static NCR
+- * code (ie, are referenced only by the Linux side of the driver,
+- * or only by dynamically generated code).
+- *
+- * 2. The DSA portion, for things which are accessed directly by static NCR
+- * code.
+- *
+- * This is a little ugly, but it
+- * 1. Avoids conflicts between the NCR code's picture of the structure, and
+- * Linux code's idea of what it looks like.
+- *
+- * 2. Minimizes the pain in the Linux side of the code needed
+- * to calculate real dsa locations for things, etc.
+- *
+- */
+-
+-struct NCR53c7x0_cmd {
+- void *real; /* Real, unaligned address for
+- free function */
+- void (* free)(void *, int); /* Command to deallocate; NULL
+- for structures allocated with
+- scsi_register, etc. */
+- Scsi_Cmnd *cmd; /* Associated Scsi_Cmnd
+- structure, Scsi_Cmnd points
+- at NCR53c7x0_cmd using
+- host_scribble structure */
+-
+- int size; /* scsi_malloc'd size of this
+- structure */
+-
+- int flags; /* CMD_* flags */
+-
+- unsigned char cmnd[12]; /* CDB, copied from Scsi_Cmnd */
+- int result; /* Copy to Scsi_Cmnd when done */
+-
+- struct { /* Private non-cached bounce buffer */
+- unsigned char buf[256];
+- u32 addr;
+- u32 len;
+- } bounce;
+-
+-/*
+- * SDTR and WIDE messages are an either/or affair
+- * in this message, since we will go into message out and send
+- * _the whole mess_ without dropping out of message out to
+- * let the target go into message in after sending the first
+- * message.
+- */
+-
+- unsigned char select[11]; /* Select message, includes
+- IDENTIFY
+- (optional) QUEUE TAG
+- (optional) SDTR or WDTR
+- */
+-
+-
+- volatile struct NCR53c7x0_cmd *next; /* Linux maintained lists (free,
+- running, eventually finished */
+-
+-
+- u32 *data_transfer_start; /* Start of data transfer routines */
+- u32 *data_transfer_end; /* Address after end of data transfer o
+- routines */
+-/*
+- * The following three fields were moved from the DSA proper to here
+- * since only dynamically generated NCR code refers to them, meaning
+- * we don't need dsa_* absolutes, and it is simpler to let the
+- * host code refer to them directly.
+- */
+-
+-/*
+- * HARD CODED : residual and saved_residual need to agree with the sizes
+- * used in NCR53c7,8xx.scr.
+- *
+- * FIXME: we want to consider the case where we have odd-length
+- * scatter/gather buffers and a WIDE transfer, in which case
+- * we'll need to use the CHAIN MOVE instruction. Ick.
+- */
+- u32 residual[6] __attribute__ ((aligned (4)));
+- /* Residual data transfer which
+- allows pointer code to work
+- right.
+-
+- [0-1] : Conditional call to
+- appropriate other transfer
+- routine.
+- [2-3] : Residual block transfer
+- instruction.
+- [4-5] : Jump to instruction
+- after splice.
+- */
+- u32 saved_residual[6]; /* Copy of old residual, so we
+- can get another partial
+- transfer and still recover
+- */
+-
+- u32 saved_data_pointer; /* Saved data pointer */
+-
+- u32 dsa_next_addr; /* _Address_ of dsa_next field
+- in this dsa for RISCy
+- style constant. */
+-
+- u32 dsa_addr; /* Address of dsa; RISCy style
+- constant */
+-
+- u32 dsa[0]; /* Variable length (depending
+- on host type, number of scatter /
+- gather buffers, etc). */
+-};
+-
+-struct NCR53c7x0_break {
+- u32 *address, old_instruction[2];
+- struct NCR53c7x0_break *next;
+- unsigned char old_size; /* Size of old instruction */
+-};
+-
+-/* Indicates that the NCR is not executing code */
+-#define STATE_HALTED 0
+-/*
+- * Indicates that the NCR is executing the wait for select / reselect
+- * script. Only used when running NCR53c700 compatible scripts, only
+- * state during which an ABORT is _not_ considered an error condition.
+- */
+-#define STATE_WAITING 1
+-/* Indicates that the NCR is executing other code. */
+-#define STATE_RUNNING 2
+-/*
+- * Indicates that the NCR was being aborted.
+- */
+-#define STATE_ABORTING 3
+-/* Indicates that the NCR was successfully aborted. */
+-#define STATE_ABORTED 4
+-/* Indicates that the NCR has been disabled due to a fatal error */
+-#define STATE_DISABLED 5
+-
+-/*
+- * Where knowledge of SCSI SCRIPT(tm) specified values are needed
+- * in an interrupt handler, an interrupt handler exists for each
+- * different SCSI script so we don't have name space problems.
+- *
+- * Return values of these handlers are as follows :
+- */
+-#define SPECIFIC_INT_NOTHING 0 /* don't even restart */
+-#define SPECIFIC_INT_RESTART 1 /* restart at the next instruction */
+-#define SPECIFIC_INT_ABORT 2 /* recoverable error, abort cmd */
+-#define SPECIFIC_INT_PANIC 3 /* unrecoverable error, panic */
+-#define SPECIFIC_INT_DONE 4 /* normal command completion */
+-#define SPECIFIC_INT_BREAK 5 /* break point encountered */
+-
+-struct NCR53c7x0_hostdata {
+- int size; /* Size of entire Scsi_Host
+- structure */
+- int board; /* set to board type, useful if
+- we have host specific things,
+- ie, a general purpose I/O
+- bit is being used to enable
+- termination, etc. */
+-
+- int chip; /* set to chip type; 700-66 is
+- 700-66, rest are last three
+- digits of part number */
+-
+- char valid_ids[8]; /* Valid SCSI ID's for adapter */
+-
+- u32 *dsp; /* dsp to restart with after
+- all stacked interrupts are
+- handled. */
+-
+- unsigned dsp_changed:1; /* Has dsp changed within this
+- set of stacked interrupts ? */
+-
+- unsigned char dstat; /* Most recent value of dstat */
+- unsigned dstat_valid:1;
+-
+- unsigned expecting_iid:1; /* Expect IID interrupt */
+- unsigned expecting_sto:1; /* Expect STO interrupt */
+-
+- /*
+- * The code stays cleaner if we use variables with function
+- * pointers and offsets that are unique for the different
+- * scripts rather than having a slew of switch(hostdata->chip)
+- * statements.
+- *
+- * It also means that the #defines from the SCSI SCRIPTS(tm)
+- * don't have to be visible outside of the script-specific
+- * instructions, preventing name space pollution.
+- */
+-
+- void (* init_fixup)(struct Scsi_Host *host);
+- void (* init_save_regs)(struct Scsi_Host *host);
+- void (* dsa_fixup)(struct NCR53c7x0_cmd *cmd);
+- void (* soft_reset)(struct Scsi_Host *host);
+- int (* run_tests)(struct Scsi_Host *host);
+-
+- /*
+- * Called when DSTAT_SIR is set, indicating an interrupt generated
+- * by the INT instruction, where values are unique for each SCSI
+- * script. Should return one of the SPEC_* values.
+- */
+-
+- int (* dstat_sir_intr)(struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd);
+-
+- int dsa_len; /* Size of DSA structure */
+-
+- /*
+- * Location of DSA fields for the SCSI SCRIPT corresponding to this
+- * chip.
+- */
+-
+- s32 dsa_start;
+- s32 dsa_end;
+- s32 dsa_next;
+- s32 dsa_prev;
+- s32 dsa_cmnd;
+- s32 dsa_select;
+- s32 dsa_msgout;
+- s32 dsa_cmdout;
+- s32 dsa_dataout;
+- s32 dsa_datain;
+- s32 dsa_msgin;
+- s32 dsa_msgout_other;
+- s32 dsa_write_sync;
+- s32 dsa_write_resume;
+- s32 dsa_check_reselect;
+- s32 dsa_status;
+- s32 dsa_saved_pointer;
+- s32 dsa_jump_dest;
+-
+- /*
+- * Important entry points that generic fixup code needs
+- * to know about, fixed up.
+- */
+-
+- s32 E_accept_message;
+- s32 E_command_complete;
+- s32 E_data_transfer;
+- s32 E_dsa_code_template;
+- s32 E_dsa_code_template_end;
+- s32 E_end_data_transfer;
+- s32 E_msg_in;
+- s32 E_initiator_abort;
+- s32 E_other_transfer;
+- s32 E_other_in;
+- s32 E_other_out;
+- s32 E_target_abort;
+- s32 E_debug_break;
+- s32 E_reject_message;
+- s32 E_respond_message;
+- s32 E_select;
+- s32 E_select_msgout;
+- s32 E_test_0;
+- s32 E_test_1;
+- s32 E_test_2;
+- s32 E_test_3;
+- s32 E_dsa_zero;
+- s32 E_cmdout_cmdout;
+- s32 E_wait_reselect;
+- s32 E_dsa_code_begin;
+-
+- long long options; /* Bitfielded set of options enabled */
+- volatile u32 test_completed; /* Test completed */
+- int test_running; /* Test currently running */
+- s32 test_source
+- __attribute__ ((aligned (4)));
+- volatile s32 test_dest;
+-
+- volatile int state; /* state of driver, only used for
+- OPTION_700 */
+-
+- unsigned char dmode; /*
+- * set to the address of the DMODE
+- * register for this chip.
+- */
+- unsigned char istat; /*
+- * set to the address of the ISTAT
+- * register for this chip.
+- */
+-
+- int scsi_clock; /*
+- * SCSI clock in HZ. 0 may be used
+- * for unknown, although this will
+- * disable synchronous negotiation.
+- */
+-
+- volatile int intrs; /* Number of interrupts */
+- volatile int resets; /* Number of SCSI resets */
+- unsigned char saved_dmode;
+- unsigned char saved_ctest4;
+- unsigned char saved_ctest7;
+- unsigned char saved_dcntl;
+- unsigned char saved_scntl3;
+-
+- unsigned char this_id_mask;
+-
+- /* Debugger information */
+- struct NCR53c7x0_break *breakpoints, /* Linked list of all break points */
+- *breakpoint_current; /* Current breakpoint being stepped
+- through, NULL if we are running
+- normally. */
+-#ifdef NCR_DEBUG
+- int debug_size; /* Size of debug buffer */
+- volatile int debug_count; /* Current data count */
+- volatile char *debug_buf; /* Output ring buffer */
+- volatile char *debug_write; /* Current write pointer */
+- volatile char *debug_read; /* Current read pointer */
+-#endif /* def NCR_DEBUG */
+-
+- /* XXX - primitive debugging junk, remove when working ? */
+- int debug_print_limit; /* Number of commands to print
+- out exhaustive debugging
+- information for if
+- OPTION_DEBUG_DUMP is set */
+-
+- unsigned char debug_lun_limit[16]; /* If OPTION_DEBUG_TARGET_LIMIT
+- set, puke if commands are sent
+- to other target/lun combinations */
+-
+- int debug_count_limit; /* Number of commands to execute
+- before puking to limit debugging
+- output */
+-
+-
+- volatile unsigned idle:1; /* set to 1 if idle */
+-
+- /*
+- * Table of synchronous+wide transfer parameters set on a per-target
+- * basis.
+- */
+-
+- volatile struct NCR53c7x0_synchronous sync[16]
+- __attribute__ ((aligned (4)));
+-
+- volatile Scsi_Cmnd *issue_queue
+- __attribute__ ((aligned (4)));
+- /* waiting to be issued by
+- Linux driver */
+- volatile struct NCR53c7x0_cmd *running_list;
+- /* commands running, maintained
+- by Linux driver */
+-
+- volatile struct NCR53c7x0_cmd *ncrcurrent; /* currently connected
+- nexus, ONLY valid for
+- NCR53c700/NCR53c700-66
+- */
+-
+- volatile struct NCR53c7x0_cmd *spare; /* pointer to spare,
+- allocated at probe time,
+- which we can use for
+- initialization */
+- volatile struct NCR53c7x0_cmd *free;
+- int max_cmd_size; /* Maximum size of NCR53c7x0_cmd
+- based on number of
+- scatter/gather segments, etc.
+- */
+- volatile int num_cmds; /* Number of commands
+- allocated */
+- volatile int extra_allocate;
+- volatile unsigned char cmd_allocated[16]; /* Have we allocated commands
+- for this target yet? If not,
+- do so ASAP */
+- volatile unsigned char busy[16][8]; /* number of commands
+- executing on each target
+- */
+- /*
+- * Eventually, I'll switch to a coroutine for calling
+- * cmd->done(cmd), etc. so that we can overlap interrupt
+- * processing with this code for maximum performance.
+- */
+-
+- volatile struct NCR53c7x0_cmd *finished_queue;
+-
+- /* Shared variables between SCRIPT and host driver */
+- volatile u32 *schedule
+- __attribute__ ((aligned (4))); /* Array of JUMPs to dsa_begin
+- routines of various DSAs.
+- When not in use, replace
+- with jump to next slot */
+-
+-
+- volatile unsigned char msg_buf[16]; /* buffer for messages
+- other than the command
+- complete message */
+-
+- /* Per-target default synchronous and WIDE messages */
+- volatile unsigned char synchronous_want[16][5];
+- volatile unsigned char wide_want[16][4];
+-
+- /* Bit fielded set of targets we want to speak synchronously with */
+- volatile u16 initiate_sdtr;
+- /* Bit fielded set of targets we want to speak wide with */
+- volatile u16 initiate_wdtr;
+- /* Bit fielded list of targets we've talked to. */
+- volatile u16 talked_to;
+-
+- /* Array of bit-fielded lun lists that we need to request_sense */
+- volatile unsigned char request_sense[16];
+-
+- u32 addr_reconnect_dsa_head
+- __attribute__ ((aligned (4))); /* RISCy style constant,
+- address of following */
+- volatile u32 reconnect_dsa_head;
+- /* Data identifying nexus we are trying to match during reselection */
+- volatile unsigned char reselected_identify; /* IDENTIFY message */
+- volatile unsigned char reselected_tag; /* second byte of queue tag
+- message or 0 */
+-
+- /* These were static variables before we moved them */
+-
+- s32 NCR53c7xx_zero
+- __attribute__ ((aligned (4)));
+- s32 NCR53c7xx_sink;
+- u32 NOP_insn;
+- char NCR53c7xx_msg_reject;
+- char NCR53c7xx_msg_abort;
+- char NCR53c7xx_msg_nop;
+-
+- /*
+- * Following item introduced by RGH to support NCRc710, which is
+- * VERY brain-dead when it come to memory moves
+- */
+-
+- /* DSA save area used only by the NCR chip */
+- volatile unsigned long saved2_dsa
+- __attribute__ ((aligned (4)));
+-
+- volatile unsigned long emulated_intfly
+- __attribute__ ((aligned (4)));
+-
+- volatile int event_size, event_index;
+- volatile struct NCR53c7x0_event *events;
+-
+- /* If we need to generate code to kill off the currently connected
+- command, this is where we do it. Should have a BMI instruction
+- to source or sink the current data, followed by a JUMP
+- to abort_connected */
+-
+- u32 *abort_script;
+-
+- int script_count; /* Size of script in words */
+- u32 script[0]; /* Relocated SCSI script */
+-
+-};
+-
+-#define SCSI_IRQ_NONE 255
+-#define DMA_NONE 255
+-#define IRQ_AUTO 254
+-#define DMA_AUTO 254
+-
+-#define BOARD_GENERIC 0
+-
+-#define NCR53c7x0_insn_size(insn) \
+- (((insn) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI ? 3 : 2)
+-
+-
+-#define NCR53c7x0_local_declare() \
+- volatile unsigned char *NCR53c7x0_address_memory; \
+- unsigned int NCR53c7x0_address_io; \
+- int NCR53c7x0_memory_mapped
+-
+-#define NCR53c7x0_local_setup(host) \
+- NCR53c7x0_address_memory = (void *) (host)->base; \
+- NCR53c7x0_address_io = (unsigned int) (host)->io_port; \
+- NCR53c7x0_memory_mapped = ((struct NCR53c7x0_hostdata *) \
+- host->hostdata[0])-> options & OPTION_MEMORY_MAPPED
+-
+-#ifdef BIG_ENDIAN
+-/* These could be more efficient, given that we are always memory mapped,
+- * but they don't give the same problems as the write macros, so leave
+- * them. */
+-#ifdef __mc68000__
+-#define NCR53c7x0_read8(address) \
+- ((unsigned int)raw_inb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) )
+-
+-#define NCR53c7x0_read16(address) \
+- ((unsigned int)raw_inw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)))
+-#else
+-#define NCR53c7x0_read8(address) \
+- (NCR53c7x0_memory_mapped ? \
+- (unsigned int)readb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) : \
+- inb(NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_read16(address) \
+- (NCR53c7x0_memory_mapped ? \
+- (unsigned int)readw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) : \
+- inw(NCR53c7x0_address_io + (address)))
+-#endif /* mc68000 */
+-#else
+-#define NCR53c7x0_read8(address) \
+- (NCR53c7x0_memory_mapped ? \
+- (unsigned int)readb((u32)NCR53c7x0_address_memory + (u32)(address)) : \
+- inb(NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_read16(address) \
+- (NCR53c7x0_memory_mapped ? \
+- (unsigned int)readw((u32)NCR53c7x0_address_memory + (u32)(address)) : \
+- inw(NCR53c7x0_address_io + (address)))
+-#endif
+-
+-#ifdef __mc68000__
+-#define NCR53c7x0_read32(address) \
+- ((unsigned int) raw_inl((u32)NCR53c7x0_address_memory + (u32)(address)))
+-#else
+-#define NCR53c7x0_read32(address) \
+- (NCR53c7x0_memory_mapped ? \
+- (unsigned int) readl((u32)NCR53c7x0_address_memory + (u32)(address)) : \
+- inl(NCR53c7x0_address_io + (address)))
+-#endif /* mc68000*/
+-
+-#ifdef BIG_ENDIAN
+-/* If we are big-endian, then we are not Intel, so probably don't have
+- * an i/o map as well as a memory map. So, let's assume memory mapped.
+- * Also, I am having terrible problems trying to persuade the compiler
+- * not to lay down code which does a read after write for these macros.
+- * If you remove 'volatile' from writeb() and friends it is ok....
+- */
+-
+-#define NCR53c7x0_write8(address,value) \
+- *(volatile unsigned char *) \
+- ((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) = (value)
+-
+-#define NCR53c7x0_write16(address,value) \
+- *(volatile unsigned short *) \
+- ((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) = (value)
+-
+-#define NCR53c7x0_write32(address,value) \
+- *(volatile unsigned long *) \
+- ((u32)NCR53c7x0_address_memory + ((u32)(address))) = (value)
+-
+-#else
+-
+-#define NCR53c7x0_write8(address,value) \
+- (NCR53c7x0_memory_mapped ? \
+- ({writeb((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \
+- outb((value), NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_write16(address,value) \
+- (NCR53c7x0_memory_mapped ? \
+- ({writew((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \
+- outw((value), NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_write32(address,value) \
+- (NCR53c7x0_memory_mapped ? \
+- ({writel((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \
+- outl((value), NCR53c7x0_address_io + (address)))
+-
+-#endif
+-
+-/* Patch arbitrary 32 bit words in the script */
+-#define patch_abs_32(script, offset, symbol, value) \
+- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \
+- (u32)); ++i) { \
+- (script)[A_##symbol##_used[i] - (offset)] += (value); \
+- if (hostdata->options & OPTION_DEBUG_FIXUP) \
+- printk("scsi%d : %s reference %d at 0x%x in %s is now 0x%x\n",\
+- host->host_no, #symbol, i, A_##symbol##_used[i] - \
+- (int)(offset), #script, (script)[A_##symbol##_used[i] - \
+- (offset)]); \
+- }
+-
+-/* Patch read/write instruction immediate field */
+-#define patch_abs_rwri_data(script, offset, symbol, value) \
+- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \
+- (u32)); ++i) \
+- (script)[A_##symbol##_used[i] - (offset)] = \
+- ((script)[A_##symbol##_used[i] - (offset)] & \
+- ~DBC_RWRI_IMMEDIATE_MASK) | \
+- (((value) << DBC_RWRI_IMMEDIATE_SHIFT) & \
+- DBC_RWRI_IMMEDIATE_MASK)
+-
+-/* Patch transfer control instruction data field */
+-#define patch_abs_tci_data(script, offset, symbol, value) \
+- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \
+- (u32)); ++i) \
+- (script)[A_##symbol##_used[i] - (offset)] = \
+- ((script)[A_##symbol##_used[i] - (offset)] & \
+- ~DBC_TCI_DATA_MASK) | \
+- (((value) << DBC_TCI_DATA_SHIFT) & \
+- DBC_TCI_DATA_MASK)
+-
+-/* Patch field in dsa structure (assignment should be +=?) */
+-#define patch_dsa_32(dsa, symbol, word, value) \
+- { \
+- (dsa)[(hostdata->##symbol - hostdata->dsa_start) / sizeof(u32) \
+- + (word)] = (value); \
+- if (hostdata->options & OPTION_DEBUG_DSA) \
+- printk("scsi : dsa %s symbol %s(%d) word %d now 0x%x\n", \
+- #dsa, #symbol, hostdata->##symbol, \
+- (word), (u32) (value)); \
+- }
+-
+-/* Paranoid people could use panic() here. */
+-#define FATAL(host) shutdown((host));
+-
+-extern int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip,
+- unsigned long base, int io_port, int irq, int dma,
+- long long options, int clock);
+-
+-#endif /* NCR53c710_C */
+-#endif /* NCR53c710_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.scr linux-2.6.22-591/drivers/scsi/53c7xx.scr
+--- linux-2.6.22-570/drivers/scsi/53c7xx.scr 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c7xx.scr 1969-12-31 19:00:00.000000000 -0500
+@@ -1,1591 +0,0 @@
+-#undef DEBUG
+-#undef EVENTS
+-#undef NO_SELECTION_TIMEOUT
+-#define BIG_ENDIAN
+-
+-; 53c710 driver. Modified from Drew Eckhardts driver
+-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+-;
+-; I have left the script for the 53c8xx family in here, as it is likely
+-; to be useful to see what I changed when bug hunting.
+-
+-; NCR 53c810 driver, main script
+-; Sponsored by
+-; iX Multiuser Multitasking Magazine
+-; hm@ix.de
+-;
+-; Copyright 1993, 1994, 1995 Drew Eckhardt
+-; Visionary Computing
+-; (Unix and Linux consulting and custom programming)
+-; drew@PoohSticks.ORG
+-; +1 (303) 786-7975
+-;
+-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+-;
+-; PRE-ALPHA
+-;
+-; For more information, please consult
+-;
+-; NCR 53C810
+-; PCI-SCSI I/O Processor
+-; Data Manual
+-;
+-; NCR 53C710
+-; SCSI I/O Processor
+-; Programmers Guide
+-;
+-; NCR Microelectronics
+-; 1635 Aeroplaza Drive
+-; Colorado Springs, CO 80916
+-; 1+ (719) 578-3400
+-;
+-; Toll free literature number
+-; +1 (800) 334-5454
+-;
+-; IMPORTANT : This code is self modifying due to the limitations of
+-; the NCR53c7,8xx series chips. Persons debugging this code with
+-; the remote debugger should take this into account, and NOT set
+-; breakpoints in modified instructions.
+-;
+-; Design:
+-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard
+-; microcontroller using a simple instruction set.
+-;
+-; So, to minimize the effects of interrupt latency, and to maximize
+-; throughput, this driver offloads the practical maximum amount
+-; of processing to the SCSI chip while still maintaining a common
+-; structure.
+-;
+-; Where tradeoffs were needed between efficiency on the older
+-; chips and the newer NCR53c800 series, the NCR53c800 series
+-; was chosen.
+-;
+-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully
+-; automate SCSI transfers without host processor intervention, this
+-; isn't the case with the NCR53c710 and newer chips which allow
+-;
+-; - reads and writes to the internal registers from within the SCSI
+-; scripts, allowing the SCSI SCRIPTS(tm) code to save processor
+-; state so that multiple threads of execution are possible, and also
+-; provide an ALU for loop control, etc.
+-;
+-; - table indirect addressing for some instructions. This allows
+-; pointers to be located relative to the DSA ((Data Structure
+-; Address) register.
+-;
+-; These features make it possible to implement a mailbox style interface,
+-; where the same piece of code is run to handle I/O for multiple threads
+-; at once minimizing our need to relocate code. Since the NCR53c700/
+-; NCR53c800 series have a unique combination of features, making a
+-; a standard ingoing/outgoing mailbox system, costly, I've modified it.
+-;
+-; - Mailboxes are a mixture of code and data. This lets us greatly
+-; simplify the NCR53c810 code and do things that would otherwise
+-; not be possible.
+-;
+-; The saved data pointer is now implemented as follows :
+-;
+-; Control flow has been architected such that if control reaches
+-; munge_save_data_pointer, on a restore pointers message or
+-; reconnection, a jump to the address formerly in the TEMP register
+-; will allow the SCSI command to resume execution.
+-;
+-
+-;
+-; Note : the DSA structures must be aligned on 32 bit boundaries,
+-; since the source and destination of MOVE MEMORY instructions
+-; must share the same alignment and this is the alignment of the
+-; NCR registers.
+-;
+-
+-; For some systems (MVME166, for example) dmode is always the same, so don't
+-; waste time writing it
+-
+-#if 1
+-#define DMODE_MEMORY_TO_NCR
+-#define DMODE_MEMORY_TO_MEMORY
+-#define DMODE_NCR_TO_MEMORY
+-#else
+-#define DMODE_MEMORY_TO_NCR MOVE dmode_memory_to_ncr TO DMODE
+-#define DMODE_MEMORY_TO_MEMORY MOVE dmode_memory_to_memory TO DMODE
+-#define DMODE_NCR_TO_MEMORY MOVE dmode_ncr_to_memory TO DMODE
+-#endif
+-
+-ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa
+-ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa
+-ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address
+- ; for current dsa
+-ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target
+- ; sync routine
+-ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target
+- ; sscf value (53c710)
+-ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa
+-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command
+- ; saved data pointer
+-ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command
+- ; current residual code
+-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command
+- ; saved residual code
+-ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand
+-ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to
+-ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value
+-
+-;
+-; Once a device has initiated reselection, we need to compare it
+-; against the singly linked list of commands which have disconnected
+-; and are pending reselection. These commands are maintained in
+-; an unordered singly linked list of DSA structures, through the
+-; DSA pointers at their 'centers' headed by the reconnect_dsa_head
+-; pointer.
+-;
+-; To avoid complications in removing commands from the list,
+-; I minimize the amount of expensive (at eight operations per
+-; addition @ 500-600ns each) pointer operations which must
+-; be done in the NCR driver by precomputing them on the
+-; host processor during dsa structure generation.
+-;
+-; The fixed-up per DSA code knows how to recognize the nexus
+-; associated with the corresponding SCSI command, and modifies
+-; the source and destination pointers for the MOVE MEMORY
+-; instruction which is executed when reselected_ok is called
+-; to remove the command from the list. Similarly, DSA is
+-; loaded with the address of the next DSA structure and
+-; reselected_check_next is called if a failure occurs.
+-;
+-; Perhaps more concisely, the net effect of the mess is
+-;
+-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head,
+-; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) {
+-; src = &dsa->next;
+-; if (target_id == dsa->id && target_lun == dsa->lun) {
+-; *dest = *src;
+-; break;
+-; }
+-; }
+-;
+-; if (!dsa)
+-; error (int_err_unexpected_reselect);
+-; else
+-; longjmp (dsa->jump_resume, 0);
+-;
+-;
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; Define DSA structure used for mailboxes
+-ENTRY dsa_code_template
+-dsa_code_template:
+-ENTRY dsa_code_begin
+-dsa_code_begin:
+-; RGH: Don't care about TEMP and DSA here
+- DMODE_MEMORY_TO_NCR
+- MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch
+- DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+- ; We are about to go and select the device, so must set SSCF bits
+- MOVE MEMORY 4, dsa_sscf_710, addr_scratch
+-#ifdef BIG_ENDIAN
+- MOVE SCRATCH3 TO SFBR
+-#else
+- MOVE SCRATCH0 TO SFBR
+-#endif
+- MOVE SFBR TO SBCL
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#else
+- CALL scratch_to_dsa
+-#endif
+- CALL select
+-; Handle the phase mismatch which may have resulted from the
+-; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN
+-; may or may not be necessary, and we should update script_asm.pl
+-; to handle multiple pieces.
+- CLEAR ATN
+- CLEAR ACK
+-
+-; Replace second operand with address of JUMP instruction dest operand
+-; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c.
+-ENTRY dsa_code_fix_jump
+-dsa_code_fix_jump:
+- MOVE MEMORY 4, NOP_insn, 0
+- JUMP select_done
+-
+-; wrong_dsa loads the DSA register with the value of the dsa_next
+-; field.
+-;
+-wrong_dsa:
+-#if (CHIP == 710)
+-; NOTE DSA is corrupt when we arrive here!
+-#endif
+-; Patch the MOVE MEMORY INSTRUCTION such that
+-; the destination address is the address of the OLD
+-; next pointer.
+-;
+- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8
+- DMODE_MEMORY_TO_NCR
+-;
+-; Move the _contents_ of the next pointer into the DSA register as
+-; the next I_T_L or I_T_L_Q tupple to check against the established
+-; nexus.
+-;
+- MOVE MEMORY 4, dsa_temp_next, addr_scratch
+- DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#else
+- CALL scratch_to_dsa
+-#endif
+- JUMP reselected_check_next
+-
+-ABSOLUTE dsa_save_data_pointer = 0
+-ENTRY dsa_code_save_data_pointer
+-dsa_code_save_data_pointer:
+-#if (CHIP == 710)
+- ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt
+- ; We MUST return with DSA correct
+- MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+- CLEAR ACK
+-#ifdef DEBUG
+- INT int_debug_saved
+-#endif
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+- JUMP jump_temp
+-#else
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_temp, dsa_temp_addr_saved_pointer
+- DMODE_MEMORY_TO_MEMORY
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+- CLEAR ACK
+-#ifdef DEBUG
+- INT int_debug_saved
+-#endif
+- RETURN
+-#endif
+-ABSOLUTE dsa_restore_pointers = 0
+-ENTRY dsa_code_restore_pointers
+-dsa_code_restore_pointers:
+-#if (CHIP == 710)
+- ; TEMP and DSA are corrupt when we get here, but who cares!
+- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+- CLEAR ACK
+- ; Restore DSA, note we don't care about TEMP
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#ifdef DEBUG
+- INT int_debug_restored
+-#endif
+- JUMP jump_temp
+-#else
+- DMODE_MEMORY_TO_NCR
+- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, addr_temp
+- DMODE_MEMORY_TO_MEMORY
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+- CLEAR ACK
+-#ifdef DEBUG
+- INT int_debug_restored
+-#endif
+- RETURN
+-#endif
+-
+-ABSOLUTE dsa_check_reselect = 0
+-; dsa_check_reselect determines whether or not the current target and
+-; lun match the current DSA
+-ENTRY dsa_code_check_reselect
+-dsa_code_check_reselect:
+-#if (CHIP == 710)
+- /* Arrives here with DSA correct */
+- /* Assumes we are always ID 7 */
+- MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set
+- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80
+-#else
+- MOVE SSID TO SFBR ; SSID contains 3 bit target ID
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0xf8
+-#endif
+-;
+-; Hack - move to scratch first, since SFBR is not writeable
+-; via the CPU and hence a MOVE MEMORY instruction.
+-;
+- DMODE_MEMORY_TO_NCR
+- MOVE MEMORY 1, reselected_identify, addr_scratch
+- DMODE_MEMORY_TO_MEMORY
+-#ifdef BIG_ENDIAN
+- ; BIG ENDIAN ON MVME16x
+- MOVE SCRATCH3 TO SFBR
+-#else
+- MOVE SCRATCH0 TO SFBR
+-#endif
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+-; Are you sure about that? richard@sleepie.demon.co.uk
+- JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8
+-; Patch the MOVE MEMORY INSTRUCTION such that
+-; the source address is the address of this dsa's
+-; next pointer.
+- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4
+- CALL reselected_ok
+-#if (CHIP == 710)
+-; Restore DSA following memory moves in reselected_ok
+-; dsa_temp_sync doesn't really care about DSA, but it has an
+-; optional debug INT so a valid DSA is a good idea.
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+- CALL dsa_temp_sync
+-; Release ACK on the IDENTIFY message _after_ we've set the synchronous
+-; transfer parameters!
+- CLEAR ACK
+-; Implicitly restore pointers on reselection, so a RETURN
+-; will transfer control back to the right spot.
+- CALL REL (dsa_code_restore_pointers)
+- RETURN
+-ENTRY dsa_zero
+-dsa_zero:
+-ENTRY dsa_code_template_end
+-dsa_code_template_end:
+-
+-; Perform sanity check for dsa_fields_start == dsa_code_template_end -
+-; dsa_zero, puke.
+-
+-ABSOLUTE dsa_fields_start = 0 ; Sanity marker
+- ; pad 48 bytes (fix this RSN)
+-ABSOLUTE dsa_next = 48 ; len 4 Next DSA
+- ; del 4 Previous DSA address
+-ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread.
+-ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for
+- ; table indirect select
+-ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for
+- ; select message
+-ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for
+- ; command
+-ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout
+-ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain
+-ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin
+-ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte
+-ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out
+- ; (Synchronous transfer negotiation, etc).
+-ABSOLUTE dsa_end = 112
+-
+-ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next),
+- ; terminated by a call to JUMP wait_reselect
+-
+-; Linked lists of DSA structures
+-ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect
+-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing
+- ; address of reconnect_dsa_head
+-
+-; These select the source and destination of a MOVE MEMORY instruction
+-ABSOLUTE dmode_memory_to_memory = 0x0
+-ABSOLUTE dmode_memory_to_ncr = 0x0
+-ABSOLUTE dmode_ncr_to_memory = 0x0
+-
+-ABSOLUTE addr_scratch = 0x0
+-ABSOLUTE addr_temp = 0x0
+-#if (CHIP == 710)
+-ABSOLUTE saved_dsa = 0x0
+-ABSOLUTE emulfly = 0x0
+-ABSOLUTE addr_dsa = 0x0
+-#endif
+-#endif /* CHIP != 700 && CHIP != 70066 */
+-
+-; Interrupts -
+-; MSB indicates type
+-; 0 handle error condition
+-; 1 handle message
+-; 2 handle normal condition
+-; 3 debugging interrupt
+-; 4 testing interrupt
+-; Next byte indicates specific error
+-
+-; XXX not yet implemented, I'm not sure if I want to -
+-; Next byte indicates the routine the error occurred in
+-; The LSB indicates the specific place the error occurred
+-
+-ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered
+-ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED)
+-ABSOLUTE int_err_unexpected_reselect = 0x00020000
+-ABSOLUTE int_err_check_condition = 0x00030000
+-ABSOLUTE int_err_no_phase = 0x00040000
+-ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received
+-ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received
+-ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message
+- ; received
+-
+-ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram
+- ; registers.
+-ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established
+-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete
+-ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected
+-ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa
+-ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset.
+-ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly
+-ABSOLUTE int_debug_break = 0x03000000 ; Break point
+-#ifdef DEBUG
+-ABSOLUTE int_debug_scheduled = 0x03010000 ; new I/O scheduled
+-ABSOLUTE int_debug_idle = 0x03020000 ; scheduler is idle
+-ABSOLUTE int_debug_dsa_loaded = 0x03030000 ; dsa reloaded
+-ABSOLUTE int_debug_reselected = 0x03040000 ; NCR reselected
+-ABSOLUTE int_debug_head = 0x03050000 ; issue head overwritten
+-ABSOLUTE int_debug_disconnected = 0x03060000 ; disconnected
+-ABSOLUTE int_debug_disconnect_msg = 0x03070000 ; got message to disconnect
+-ABSOLUTE int_debug_dsa_schedule = 0x03080000 ; in dsa_schedule
+-ABSOLUTE int_debug_reselect_check = 0x03090000 ; Check for reselection of DSA
+-ABSOLUTE int_debug_reselected_ok = 0x030a0000 ; Reselection accepted
+-#endif
+-ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver
+-#ifdef DEBUG
+-ABSOLUTE int_debug_saved = 0x030c0000 ; save/restore pointers
+-ABSOLUTE int_debug_restored = 0x030d0000
+-ABSOLUTE int_debug_sync = 0x030e0000 ; Sanity check synchronous
+- ; parameters.
+-ABSOLUTE int_debug_datain = 0x030f0000 ; going into data in phase
+- ; now.
+-ABSOLUTE int_debug_check_dsa = 0x03100000 ; Sanity check DSA against
+- ; SDID.
+-#endif
+-
+-ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete
+-ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete
+-ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete
+-
+-
+-; These should start with 0x05000000, with low bits incrementing for
+-; each one.
+-
+-#ifdef EVENTS
+-ABSOLUTE int_EVENT_SELECT = 0
+-ABSOLUTE int_EVENT_DISCONNECT = 0
+-ABSOLUTE int_EVENT_RESELECT = 0
+-ABSOLUTE int_EVENT_COMPLETE = 0
+-ABSOLUTE int_EVENT_IDLE = 0
+-ABSOLUTE int_EVENT_SELECT_FAILED = 0
+-ABSOLUTE int_EVENT_BEFORE_SELECT = 0
+-ABSOLUTE int_EVENT_RESELECT_FAILED = 0
+-#endif
+-
+-ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message
+-ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message
+-ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source
+-ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in
+-ABSOLUTE NOP_insn = 0 ; NOP instruction
+-
+-; Pointer to message, potentially multi-byte
+-ABSOLUTE msg_buf = 0
+-
+-; Pointer to holding area for reselection information
+-ABSOLUTE reselected_identify = 0
+-ABSOLUTE reselected_tag = 0
+-
+-; Request sense command pointer, it's a 6 byte command, should
+-; be constant for all commands since we always want 16 bytes of
+-; sense and we don't need to change any fields as we did under
+-; SCSI-I when we actually cared about the LUN field.
+-;EXTERNAL NCR53c7xx_sense ; Request sense command
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; dsa_schedule
+-; PURPOSE : after a DISCONNECT message has been received, and pointers
+-; saved, insert the current DSA structure at the head of the
+-; disconnected queue and fall through to the scheduler.
+-;
+-; CALLS : OK
+-;
+-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list
+-; of disconnected commands
+-;
+-; MODIFIES : SCRATCH, reconnect_dsa_head
+-;
+-; EXITS : always passes control to schedule
+-
+-ENTRY dsa_schedule
+-dsa_schedule:
+-#ifdef DEBUG
+- INT int_debug_dsa_schedule
+-#endif
+-
+-;
+-; Calculate the address of the next pointer within the DSA
+-; structure of the command that is currently disconnecting
+-;
+-#if (CHIP == 710)
+- ; Read what should be the current DSA from memory - actual DSA
+- ; register is probably corrupt
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+- CALL dsa_to_scratch
+-#endif
+- MOVE SCRATCH0 + dsa_next TO SCRATCH0
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-; Point the next field of this DSA structure at the current disconnected
+-; list
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8
+- DMODE_MEMORY_TO_MEMORY
+-dsa_schedule_insert:
+- MOVE MEMORY 4, reconnect_dsa_head, 0
+-
+-; And update the head pointer.
+-#if (CHIP == 710)
+- ; Read what should be the current DSA from memory - actual DSA
+- ; register is probably corrupt
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+- CALL dsa_to_scratch
+-#endif
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, reconnect_dsa_head
+- DMODE_MEMORY_TO_MEMORY
+-/* Temporarily, see what happens. */
+-#ifndef ORIGINAL
+-#if (CHIP != 710)
+- MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+- CLEAR ACK
+-#endif
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+- WAIT DISCONNECT
+-#ifdef EVENTS
+- INT int_EVENT_DISCONNECT;
+-#endif
+-#ifdef DEBUG
+- INT int_debug_disconnected
+-#endif
+- JUMP schedule
+-#endif
+-
+-;
+-; select
+-;
+-; PURPOSE : establish a nexus for the SCSI command referenced by DSA.
+-; On success, the current DSA structure is removed from the issue
+-; queue. Usually, this is entered as a fall-through from schedule,
+-; although the contingent allegiance handling code will write
+-; the select entry address to the DSP to restart a command as a
+-; REQUEST SENSE. A message is sent (usually IDENTIFY, although
+-; additional SDTR or WDTR messages may be sent). COMMAND OUT
+-; is handled.
+-;
+-; INPUTS : DSA - SCSI command, issue_dsa_head
+-;
+-; CALLS : NOT OK
+-;
+-; MODIFIES : SCRATCH, issue_dsa_head
+-;
+-; EXITS : on reselection or selection, go to select_failed
+-; otherwise, RETURN so control is passed back to
+-; dsa_begin.
+-;
+-
+-ENTRY select
+-select:
+-
+-#ifdef EVENTS
+- INT int_EVENT_BEFORE_SELECT
+-#endif
+-
+-#ifdef DEBUG
+- INT int_debug_scheduled
+-#endif
+- CLEAR TARGET
+-
+-; XXX
+-;
+-; In effect, SELECTION operations are backgrounded, with execution
+-; continuing until code which waits for REQ or a fatal interrupt is
+-; encountered.
+-;
+-; So, for more performance, we could overlap the code which removes
+-; the command from the NCRs issue queue with the selection, but
+-; at this point I don't want to deal with the error recovery.
+-;
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-#if (CHIP == 710)
+- ; Enable selection timer
+-#ifdef NO_SELECTION_TIMEOUT
+- MOVE CTEST7 & 0xff TO CTEST7
+-#else
+- MOVE CTEST7 & 0xef TO CTEST7
+-#endif
+-#endif
+- SELECT ATN FROM dsa_select, select_failed
+- JUMP select_msgout, WHEN MSG_OUT
+-ENTRY select_msgout
+-select_msgout:
+-#if (CHIP == 710)
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+- MOVE FROM dsa_msgout, WHEN MSG_OUT
+-#else
+-ENTRY select_msgout
+- SELECT ATN 0, select_failed
+-select_msgout:
+- MOVE 0, 0, WHEN MSGOUT
+-#endif
+-
+-#ifdef EVENTS
+- INT int_EVENT_SELECT
+-#endif
+- RETURN
+-
+-;
+-; select_done
+-;
+-; PURPOSE: continue on to normal data transfer; called as the exit
+-; point from dsa_begin.
+-;
+-; INPUTS: dsa
+-;
+-; CALLS: OK
+-;
+-;
+-
+-select_done:
+-#if (CHIP == 710)
+-; NOTE DSA is corrupt when we arrive here!
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-
+-#ifdef DEBUG
+-ENTRY select_check_dsa
+-select_check_dsa:
+- INT int_debug_check_dsa
+-#endif
+-
+-; After a successful selection, we should get either a CMD phase or
+-; some transfer request negotiation message.
+-
+- JUMP cmdout, WHEN CMD
+- INT int_err_unexpected_phase, WHEN NOT MSG_IN
+-
+-select_msg_in:
+- CALL msg_in, WHEN MSG_IN
+- JUMP select_msg_in, WHEN MSG_IN
+-
+-cmdout:
+- INT int_err_unexpected_phase, WHEN NOT CMD
+-#if (CHIP == 700)
+- INT int_norm_selected
+-#endif
+-ENTRY cmdout_cmdout
+-cmdout_cmdout:
+-#if (CHIP != 700) && (CHIP != 70066)
+- MOVE FROM dsa_cmdout, WHEN CMD
+-#else
+- MOVE 0, 0, WHEN CMD
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-;
+-; data_transfer
+-; other_out
+-; other_in
+-; other_transfer
+-;
+-; PURPOSE : handle the main data transfer for a SCSI command in
+-; several parts. In the first part, data_transfer, DATA_IN
+-; and DATA_OUT phases are allowed, with the user provided
+-; code (usually dynamically generated based on the scatter/gather
+-; list associated with a SCSI command) called to handle these
+-; phases.
+-;
+-; After control has passed to one of the user provided
+-; DATA_IN or DATA_OUT routines, back calls are made to
+-; other_transfer_in or other_transfer_out to handle non-DATA IN
+-; and DATA OUT phases respectively, with the state of the active
+-; data pointer being preserved in TEMP.
+-;
+-; On completion, the user code passes control to other_transfer
+-; which causes DATA_IN and DATA_OUT to result in unexpected_phase
+-; interrupts so that data overruns may be trapped.
+-;
+-; INPUTS : DSA - SCSI command
+-;
+-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in
+-; other_transfer
+-;
+-; MODIFIES : SCRATCH
+-;
+-; EXITS : if STATUS IN is detected, signifying command completion,
+-; the NCR jumps to command_complete. If MSG IN occurs, a
+-; CALL is made to msg_in. Otherwise, other_transfer runs in
+-; an infinite loop.
+-;
+-
+-ENTRY data_transfer
+-data_transfer:
+- JUMP cmdout_cmdout, WHEN CMD
+- CALL msg_in, WHEN MSG_IN
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+- JUMP do_dataout, WHEN DATA_OUT
+- JUMP do_datain, WHEN DATA_IN
+- JUMP command_complete, WHEN STATUS
+- JUMP data_transfer
+-ENTRY end_data_transfer
+-end_data_transfer:
+-
+-;
+-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain
+-; should be fixed up whenever the nexus changes so it can point to the
+-; correct routine for that command.
+-;
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; Nasty jump to dsa->dataout
+-do_dataout:
+-#if (CHIP == 710)
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+- CALL dsa_to_scratch
+-#endif
+- MOVE SCRATCH0 + dsa_dataout TO SCRATCH0
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4
+- DMODE_MEMORY_TO_MEMORY
+-dataout_to_jump:
+- MOVE MEMORY 4, 0, dataout_jump + 4
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-dataout_jump:
+- JUMP 0
+-
+-; Nasty jump to dsa->dsain
+-do_datain:
+-#if (CHIP == 710)
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+- CALL dsa_to_scratch
+-#endif
+- MOVE SCRATCH0 + dsa_datain TO SCRATCH0
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, datain_to_jump + 4
+- DMODE_MEMORY_TO_MEMORY
+-ENTRY datain_to_jump
+-datain_to_jump:
+- MOVE MEMORY 4, 0, datain_jump + 4
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-#ifdef DEBUG
+- INT int_debug_datain
+-#endif
+-datain_jump:
+- JUMP 0
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-
+-; Note that other_out and other_in loop until a non-data phase
+-; is discovered, so we only execute return statements when we
+-; can go on to the next data phase block move statement.
+-
+-ENTRY other_out
+-other_out:
+-#if 0
+- INT 0x03ffdead
+-#endif
+- INT int_err_unexpected_phase, WHEN CMD
+- JUMP msg_in_restart, WHEN MSG_IN
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+- INT int_err_unexpected_phase, WHEN DATA_IN
+- JUMP command_complete, WHEN STATUS
+- JUMP other_out, WHEN NOT DATA_OUT
+-#if (CHIP == 710)
+-; TEMP should be OK, as we got here from a call in the user dataout code.
+-#endif
+- RETURN
+-
+-ENTRY other_in
+-other_in:
+-#if 0
+- INT 0x03ffdead
+-#endif
+- INT int_err_unexpected_phase, WHEN CMD
+- JUMP msg_in_restart, WHEN MSG_IN
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+- INT int_err_unexpected_phase, WHEN DATA_OUT
+- JUMP command_complete, WHEN STATUS
+- JUMP other_in, WHEN NOT DATA_IN
+-#if (CHIP == 710)
+-; TEMP should be OK, as we got here from a call in the user datain code.
+-#endif
+- RETURN
+-
+-
+-ENTRY other_transfer
+-other_transfer:
+- INT int_err_unexpected_phase, WHEN CMD
+- CALL msg_in, WHEN MSG_IN
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+- INT int_err_unexpected_phase, WHEN DATA_OUT
+- INT int_err_unexpected_phase, WHEN DATA_IN
+- JUMP command_complete, WHEN STATUS
+- JUMP other_transfer
+-
+-;
+-; msg_in_restart
+-; msg_in
+-; munge_msg
+-;
+-; PURPOSE : process messages from a target. msg_in is called when the
+-; caller hasn't read the first byte of the message. munge_message
+-; is called when the caller has read the first byte of the message,
+-; and left it in SFBR. msg_in_restart is called when the caller
+-; hasn't read the first byte of the message, and wishes RETURN
+-; to transfer control back to the address of the conditional
+-; CALL instruction rather than to the instruction after it.
+-;
+-; Various int_* interrupts are generated when the host system
+-; needs to intervene, as is the case with SDTR, WDTR, and
+-; INITIATE RECOVERY messages.
+-;
+-; When the host system handles one of these interrupts,
+-; it can respond by reentering at reject_message,
+-; which rejects the message and returns control to
+-; the caller of msg_in or munge_msg, accept_message
+-; which clears ACK and returns control, or reply_message
+-; which sends the message pointed to by the DSA
+-; msgout_other table indirect field.
+-;
+-; DISCONNECT messages are handled by moving the command
+-; to the reconnect_dsa_queue.
+-#if (CHIP == 710)
+-; NOTE: DSA should be valid when we get here - we cannot save both it
+-; and TEMP in this routine.
+-#endif
+-;
+-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg
+-; only)
+-;
+-; CALLS : NO. The TEMP register isn't backed up to allow nested calls.
+-;
+-; MODIFIES : SCRATCH, DSA on DISCONNECT
+-;
+-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS,
+-; and normal return from message handlers running under
+-; Linux, control is returned to the caller. Receipt
+-; of DISCONNECT messages pass control to dsa_schedule.
+-;
+-ENTRY msg_in_restart
+-msg_in_restart:
+-; XXX - hackish
+-;
+-; Since it's easier to debug changes to the statically
+-; compiled code, rather than the dynamically generated
+-; stuff, such as
+-;
+-; MOVE x, y, WHEN data_phase
+-; CALL other_z, WHEN NOT data_phase
+-; MOVE x, y, WHEN data_phase
+-;
+-; I'd like to have certain routines (notably the message handler)
+-; restart on the conditional call rather than the next instruction.
+-;
+-; So, subtract 8 from the return address
+-
+- MOVE TEMP0 + 0xf8 TO TEMP0
+- MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY
+- MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY
+- MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY
+-
+-ENTRY msg_in
+-msg_in:
+- MOVE 1, msg_buf, WHEN MSG_IN
+-
+-munge_msg:
+- JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE
+- JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message
+-;
+-; XXX - I've seen a handful of broken SCSI devices which fail to issue
+-; a SAVE POINTERS message before disconnecting in the middle of
+-; a transfer, assuming that the DATA POINTER will be implicitly
+-; restored.
+-;
+-; Historically, I've often done an implicit save when the DISCONNECT
+-; message is processed. We may want to consider having the option of
+-; doing that here.
+-;
+- JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER
+- JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS
+- JUMP munge_disconnect, IF 0x04 ; DISCONNECT
+- INT int_msg_1, IF 0x07 ; MESSAGE REJECT
+- INT int_msg_1, IF 0x0f ; INITIATE RECOVERY
+-#ifdef EVENTS
+- INT int_EVENT_SELECT_FAILED
+-#endif
+- JUMP reject_message
+-
+-munge_2:
+- JUMP reject_message
+-;
+-; The SCSI standard allows targets to recover from transient
+-; error conditions by backing up the data pointer with a
+-; RESTORE POINTERS message.
+-;
+-; So, we must save and restore the _residual_ code as well as
+-; the current instruction pointer. Because of this messiness,
+-; it is simpler to put dynamic code in the dsa for this and to
+-; just do a simple jump down there.
+-;
+-
+-munge_save_data_pointer:
+-#if (CHIP == 710)
+- ; We have something in TEMP here, so first we must save that
+- MOVE TEMP0 TO SFBR
+- MOVE SFBR TO SCRATCH0
+- MOVE TEMP1 TO SFBR
+- MOVE SFBR TO SCRATCH1
+- MOVE TEMP2 TO SFBR
+- MOVE SFBR TO SCRATCH2
+- MOVE TEMP3 TO SFBR
+- MOVE SFBR TO SCRATCH3
+- MOVE MEMORY 4, addr_scratch, jump_temp + 4
+- ; Now restore DSA
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+- MOVE DSA0 + dsa_save_data_pointer TO SFBR
+- MOVE SFBR TO SCRATCH0
+- MOVE DSA1 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH1
+- MOVE DSA2 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH2
+- MOVE DSA3 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH3
+-
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4
+- DMODE_MEMORY_TO_MEMORY
+-jump_dsa_save:
+- JUMP 0
+-
+-munge_restore_pointers:
+-#if (CHIP == 710)
+- ; The code at dsa_restore_pointers will RETURN, but we don't care
+- ; about TEMP here, as it will overwrite it anyway.
+-#endif
+- MOVE DSA0 + dsa_restore_pointers TO SFBR
+- MOVE SFBR TO SCRATCH0
+- MOVE DSA1 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH1
+- MOVE DSA2 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH2
+- MOVE DSA3 + 0xff TO SFBR WITH CARRY
+- MOVE SFBR TO SCRATCH3
+-
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4
+- DMODE_MEMORY_TO_MEMORY
+-jump_dsa_restore:
+- JUMP 0
+-
+-
+-munge_disconnect:
+-#ifdef DEBUG
+- INT int_debug_disconnect_msg
+-#endif
+-
+-/*
+- * Before, we overlapped processing with waiting for disconnect, but
+- * debugging was beginning to appear messy. Temporarily move things
+- * to just before the WAIT DISCONNECT.
+- */
+-
+-#ifdef ORIGINAL
+-#if (CHIP == 710)
+-; Following clears Unexpected Disconnect bit. What do we do?
+-#else
+- MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+- CLEAR ACK
+-#endif
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+- JUMP dsa_schedule
+-#else
+- WAIT DISCONNECT
+- INT int_norm_disconnected
+-#endif
+-
+-munge_extended:
+- CLEAR ACK
+- INT int_err_unexpected_phase, WHEN NOT MSG_IN
+- MOVE 1, msg_buf + 1, WHEN MSG_IN
+- JUMP munge_extended_2, IF 0x02
+- JUMP munge_extended_3, IF 0x03
+- JUMP reject_message
+-
+-munge_extended_2:
+- CLEAR ACK
+- MOVE 1, msg_buf + 2, WHEN MSG_IN
+- JUMP reject_message, IF NOT 0x02 ; Must be WDTR
+- CLEAR ACK
+- MOVE 1, msg_buf + 3, WHEN MSG_IN
+- INT int_msg_wdtr
+-
+-munge_extended_3:
+- CLEAR ACK
+- MOVE 1, msg_buf + 2, WHEN MSG_IN
+- JUMP reject_message, IF NOT 0x01 ; Must be SDTR
+- CLEAR ACK
+- MOVE 2, msg_buf + 3, WHEN MSG_IN
+- INT int_msg_sdtr
+-
+-ENTRY reject_message
+-reject_message:
+- SET ATN
+- CLEAR ACK
+- MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT
+- RETURN
+-
+-ENTRY accept_message
+-accept_message:
+- CLEAR ATN
+- CLEAR ACK
+- RETURN
+-
+-ENTRY respond_message
+-respond_message:
+- SET ATN
+- CLEAR ACK
+- MOVE FROM dsa_msgout_other, WHEN MSG_OUT
+- RETURN
+-
+-;
+-; command_complete
+-;
+-; PURPOSE : handle command termination when STATUS IN is detected by reading
+-; a status byte followed by a command termination message.
+-;
+-; Normal termination results in an INTFLY instruction, and
+-; the host system can pick out which command terminated by
+-; examining the MESSAGE and STATUS buffers of all currently
+-; executing commands;
+-;
+-; Abnormal (CHECK_CONDITION) termination results in an
+-; int_err_check_condition interrupt so that a REQUEST SENSE
+-; command can be issued out-of-order so that no other command
+-; clears the contingent allegiance condition.
+-;
+-;
+-; INPUTS : DSA - command
+-;
+-; CALLS : OK
+-;
+-; EXITS : On successful termination, control is passed to schedule.
+-; On abnormal termination, the user will usually modify the
+-; DSA fields and corresponding buffers and return control
+-; to select.
+-;
+-
+-ENTRY command_complete
+-command_complete:
+- MOVE FROM dsa_status, WHEN STATUS
+-#if (CHIP != 700) && (CHIP != 70066)
+- MOVE SFBR TO SCRATCH0 ; Save status
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-ENTRY command_complete_msgin
+-command_complete_msgin:
+- MOVE FROM dsa_msgin, WHEN MSG_IN
+-; Indicate that we should be expecting a disconnect
+-#if (CHIP != 710)
+- MOVE SCNTL2 & 0x7f TO SCNTL2
+-#else
+- ; Above code cleared the Unexpected Disconnect bit, what do we do?
+-#endif
+- CLEAR ACK
+-#if (CHIP != 700) && (CHIP != 70066)
+- WAIT DISCONNECT
+-
+-;
+-; The SCSI specification states that when a UNIT ATTENTION condition
+-; is pending, as indicated by a CHECK CONDITION status message,
+-; the target shall revert to asynchronous transfers. Since
+-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET
+-; basis, and returning control to our scheduler could work on a command
+-; running on another lun on that target using the old parameters, we must
+-; interrupt the host processor to get them changed, or change them ourselves.
+-;
+-; Once SCSI-II tagged queueing is implemented, things will be even more
+-; hairy, since contingent allegiance conditions exist on a per-target/lun
+-; basis, and issuing a new command with a different tag would clear it.
+-; In these cases, we must interrupt the host processor to get a request
+-; added to the HEAD of the queue with the request sense command, or we
+-; must automatically issue the request sense command.
+-
+-#if 0
+- MOVE SCRATCH0 TO SFBR
+- JUMP command_failed, IF 0x02
+-#endif
+-#if (CHIP == 710)
+-#if defined(MVME16x_INTFLY)
+-; For MVME16x (ie CHIP=710) we will force an INTFLY by triggering a software
+-; interrupt (SW7). We can use SCRATCH, as we are about to jump to
+-; schedule, which corrupts it anyway. Will probably remove this later,
+-; but want to check performance effects first.
+-
+-#define INTFLY_ADDR 0xfff40070
+-
+- MOVE 0 TO SCRATCH0
+- MOVE 0x80 TO SCRATCH1
+- MOVE 0 TO SCRATCH2
+- MOVE 0 TO SCRATCH3
+- MOVE MEMORY 4, addr_scratch, INTFLY_ADDR
+-#else
+- INT int_norm_emulateintfly
+-#endif
+-#else
+- INTFLY
+-#endif
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-#ifdef EVENTS
+- INT int_EVENT_COMPLETE
+-#endif
+-#if (CHIP != 700) && (CHIP != 70066)
+- JUMP schedule
+-command_failed:
+- INT int_err_check_condition
+-#else
+- INT int_norm_command_complete
+-#endif
+-
+-;
+-; wait_reselect
+-;
+-; PURPOSE : This is essentially the idle routine, where control lands
+-; when there are no new processes to schedule. wait_reselect
+-; waits for reselection, selection, and new commands.
+-;
+-; When a successful reselection occurs, with the aid
+-; of fixed up code in each DSA, wait_reselect walks the
+-; reconnect_dsa_queue, asking each dsa if the target ID
+-; and LUN match its.
+-;
+-; If a match is found, a call is made back to reselected_ok,
+-; which through the miracles of self modifying code, extracts
+-; the found DSA from the reconnect_dsa_queue and then
+-; returns control to the DSAs thread of execution.
+-;
+-; INPUTS : NONE
+-;
+-; CALLS : OK
+-;
+-; MODIFIES : DSA,
+-;
+-; EXITS : On successful reselection, control is returned to the
+-; DSA which called reselected_ok. If the WAIT RESELECT
+-; was interrupted by a new commands arrival signaled by
+-; SIG_P, control is passed to schedule. If the NCR is
+-; selected, the host system is interrupted with an
+-; int_err_selected which is usually responded to by
+-; setting DSP to the target_abort address.
+-
+-ENTRY wait_reselect
+-wait_reselect:
+-#ifdef EVENTS
+- int int_EVENT_IDLE
+-#endif
+-#ifdef DEBUG
+- int int_debug_idle
+-#endif
+- WAIT RESELECT wait_reselect_failed
+-
+-reselected:
+-#ifdef EVENTS
+- int int_EVENT_RESELECT
+-#endif
+- CLEAR TARGET
+- DMODE_MEMORY_TO_MEMORY
+- ; Read all data needed to reestablish the nexus -
+- MOVE 1, reselected_identify, WHEN MSG_IN
+- ; We used to CLEAR ACK here.
+-#if (CHIP != 700) && (CHIP != 70066)
+-#ifdef DEBUG
+- int int_debug_reselected
+-#endif
+-
+- ; Point DSA at the current head of the disconnected queue.
+- DMODE_MEMORY_TO_NCR
+- MOVE MEMORY 4, reconnect_dsa_head, addr_scratch
+- DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+-#else
+- CALL scratch_to_dsa
+-#endif
+-
+- ; Fix the update-next pointer so that the reconnect_dsa_head
+- ; pointer is the one that will be updated if this DSA is a hit
+- ; and we remove it from the queue.
+-
+- MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-
+-ENTRY reselected_check_next
+-reselected_check_next:
+-#ifdef DEBUG
+- INT int_debug_reselect_check
+-#endif
+- ; Check for a NULL pointer.
+- MOVE DSA0 TO SFBR
+- JUMP reselected_not_end, IF NOT 0
+- MOVE DSA1 TO SFBR
+- JUMP reselected_not_end, IF NOT 0
+- MOVE DSA2 TO SFBR
+- JUMP reselected_not_end, IF NOT 0
+- MOVE DSA3 TO SFBR
+- JUMP reselected_not_end, IF NOT 0
+- INT int_err_unexpected_reselect
+-
+-reselected_not_end:
+- ;
+- ; XXX the ALU is only eight bits wide, and the assembler
+- ; wont do the dirt work for us. As long as dsa_check_reselect
+- ; is negative, we need to sign extend with 1 bits to the full
+- ; 32 bit width of the address.
+- ;
+- ; A potential work around would be to have a known alignment
+- ; of the DSA structure such that the base address plus
+- ; dsa_check_reselect doesn't require carrying from bytes
+- ; higher than the LSB.
+- ;
+-
+- MOVE DSA0 TO SFBR
+- MOVE SFBR + dsa_check_reselect TO SCRATCH0
+- MOVE DSA1 TO SFBR
+- MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY
+- MOVE DSA2 TO SFBR
+- MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY
+- MOVE DSA3 TO SFBR
+- MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY
+-
+- DMODE_NCR_TO_MEMORY
+- MOVE MEMORY 4, addr_scratch, reselected_check + 4
+- DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-reselected_check:
+- JUMP 0
+-
+-
+-;
+-;
+-#if (CHIP == 710)
+-; We have problems here - the memory move corrupts TEMP and DSA. This
+-; routine is called from DSA code, and patched from many places. Scratch
+-; is probably free when it is called.
+-; We have to:
+-; copy temp to scratch, one byte at a time
+-; write scratch to patch a jump in place of the return
+-; do the move memory
+-; jump to the patched in return address
+-; DSA is corrupt when we get here, and can be left corrupt
+-
+-ENTRY reselected_ok
+-reselected_ok:
+- MOVE TEMP0 TO SFBR
+- MOVE SFBR TO SCRATCH0
+- MOVE TEMP1 TO SFBR
+- MOVE SFBR TO SCRATCH1
+- MOVE TEMP2 TO SFBR
+- MOVE SFBR TO SCRATCH2
+- MOVE TEMP3 TO SFBR
+- MOVE SFBR TO SCRATCH3
+- MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4
+-reselected_ok_patch:
+- MOVE MEMORY 4, 0, 0
+-reselected_ok_jump:
+- JUMP 0
+-#else
+-ENTRY reselected_ok
+-reselected_ok:
+-reselected_ok_patch:
+- MOVE MEMORY 4, 0, 0 ; Patched : first word
+- ; is address of
+- ; successful dsa_next
+- ; Second word is last
+- ; unsuccessful dsa_next,
+- ; starting with
+- ; dsa_reconnect_head
+- ; We used to CLEAR ACK here.
+-#ifdef DEBUG
+- INT int_debug_reselected_ok
+-#endif
+-#ifdef DEBUG
+- INT int_debug_check_dsa
+-#endif
+- RETURN ; Return control to where
+-#endif
+-#else
+- INT int_norm_reselected
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-selected:
+- INT int_err_selected;
+-
+-;
+-; A select or reselect failure can be caused by one of two conditions :
+-; 1. SIG_P was set. This will be the case if the user has written
+-; a new value to a previously NULL head of the issue queue.
+-;
+-; 2. The NCR53c810 was selected or reselected by another device.
+-;
+-; 3. The bus was already busy since we were selected or reselected
+-; before starting the command.
+-
+-wait_reselect_failed:
+-#ifdef EVENTS
+- INT int_EVENT_RESELECT_FAILED
+-#endif
+-; Check selected bit.
+-#if (CHIP == 710)
+- ; Must work out how to tell if we are selected....
+-#else
+- MOVE SIST0 & 0x20 TO SFBR
+- JUMP selected, IF 0x20
+-#endif
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+- MOVE CTEST2 & 0x40 TO SFBR
+- JUMP schedule, IF 0x40
+-; Check connected bit.
+-; FIXME: this needs to change if we support target mode
+- MOVE ISTAT & 0x08 TO SFBR
+- JUMP reselected, IF 0x08
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-#if 0
+- JUMP schedule
+-#else
+- INT int_debug_panic
+-#endif
+-
+-
+-select_failed:
+-#if (CHIP == 710)
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+-#ifdef EVENTS
+- int int_EVENT_SELECT_FAILED
+-#endif
+-; Otherwise, mask the selected and reselected bits off SIST0
+-#if (CHIP ==710)
+- ; Let's assume we don't get selected for now
+- MOVE SSTAT0 & 0x10 TO SFBR
+-#else
+- MOVE SIST0 & 0x30 TO SFBR
+- JUMP selected, IF 0x20
+-#endif
+- JUMP reselected, IF 0x10
+-; If SIGP is set, the user just gave us another command, and
+-; we should restart or return to the scheduler.
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+- MOVE CTEST2 & 0x40 TO SFBR
+- JUMP select, IF 0x40
+-; Check connected bit.
+-; FIXME: this needs to change if we support target mode
+-; FIXME: is this really necessary?
+- MOVE ISTAT & 0x08 TO SFBR
+- JUMP reselected, IF 0x08
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-#if 0
+- JUMP schedule
+-#else
+- INT int_debug_panic
+-#endif
+-
+-;
+-; test_1
+-; test_2
+-;
+-; PURPOSE : run some verification tests on the NCR. test_1
+-; copies test_src to test_dest and interrupts the host
+-; processor, testing for cache coherency and interrupt
+-; problems in the processes.
+-;
+-; test_2 runs a command with offsets relative to the
+-; DSA on entry, and is useful for miscellaneous experimentation.
+-;
+-
+-; Verify that interrupts are working correctly and that we don't
+-; have a cache invalidation problem.
+-
+-ABSOLUTE test_src = 0, test_dest = 0
+-ENTRY test_1
+-test_1:
+- MOVE MEMORY 4, test_src, test_dest
+- INT int_test_1
+-
+-;
+-; Run arbitrary commands, with test code establishing a DSA
+-;
+-
+-ENTRY test_2
+-test_2:
+- CLEAR TARGET
+-#if (CHIP == 710)
+- ; Enable selection timer
+-#ifdef NO_SELECTION_TIMEOUT
+- MOVE CTEST7 & 0xff TO CTEST7
+-#else
+- MOVE CTEST7 & 0xef TO CTEST7
+-#endif
+-#endif
+- SELECT ATN FROM 0, test_2_fail
+- JUMP test_2_msgout, WHEN MSG_OUT
+-ENTRY test_2_msgout
+-test_2_msgout:
+-#if (CHIP == 710)
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+- MOVE FROM 8, WHEN MSG_OUT
+- MOVE FROM 16, WHEN CMD
+- MOVE FROM 24, WHEN DATA_IN
+- MOVE FROM 32, WHEN STATUS
+- MOVE FROM 40, WHEN MSG_IN
+-#if (CHIP != 710)
+- MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+- CLEAR ACK
+- WAIT DISCONNECT
+-test_2_fail:
+-#if (CHIP == 710)
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+- INT int_test_2
+-
+-ENTRY debug_break
+-debug_break:
+- INT int_debug_break
+-
+-;
+-; initiator_abort
+-; target_abort
+-;
+-; PURPOSE : Abort the currently established nexus from with initiator
+-; or target mode.
+-;
+-;
+-
+-ENTRY target_abort
+-target_abort:
+- SET TARGET
+- DISCONNECT
+- CLEAR TARGET
+- JUMP schedule
+-
+-ENTRY initiator_abort
+-initiator_abort:
+- SET ATN
+-;
+-; The SCSI-I specification says that targets may go into MSG out at
+-; their leisure upon receipt of the ATN single. On all versions of the
+-; specification, we can't change phases until REQ transitions true->false,
+-; so we need to sink/source one byte of data to allow the transition.
+-;
+-; For the sake of safety, we'll only source one byte of data in all
+-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an
+-; arbitrary number of bytes.
+- JUMP spew_cmd, WHEN CMD
+- JUMP eat_msgin, WHEN MSG_IN
+- JUMP eat_datain, WHEN DATA_IN
+- JUMP eat_status, WHEN STATUS
+- JUMP spew_dataout, WHEN DATA_OUT
+- JUMP sated
+-spew_cmd:
+- MOVE 1, NCR53c7xx_zero, WHEN CMD
+- JUMP sated
+-eat_msgin:
+- MOVE 1, NCR53c7xx_sink, WHEN MSG_IN
+- JUMP eat_msgin, WHEN MSG_IN
+- JUMP sated
+-eat_status:
+- MOVE 1, NCR53c7xx_sink, WHEN STATUS
+- JUMP eat_status, WHEN STATUS
+- JUMP sated
+-eat_datain:
+- MOVE 1, NCR53c7xx_sink, WHEN DATA_IN
+- JUMP eat_datain, WHEN DATA_IN
+- JUMP sated
+-spew_dataout:
+- MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT
+-sated:
+-#if (CHIP != 710)
+- MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+- MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT
+- WAIT DISCONNECT
+- INT int_norm_aborted
+-
+-#if (CHIP != 710)
+-;
+-; dsa_to_scratch
+-; scratch_to_dsa
+-;
+-; PURPOSE :
+-; The NCR chips cannot do a move memory instruction with the DSA register
+-; as the source or destination. So, we provide a couple of subroutines
+-; that let us switch between the DSA register and scratch register.
+-;
+-; Memory moves to/from the DSPS register also don't work, but we
+-; don't use them.
+-;
+-;
+-
+-
+-dsa_to_scratch:
+- MOVE DSA0 TO SFBR
+- MOVE SFBR TO SCRATCH0
+- MOVE DSA1 TO SFBR
+- MOVE SFBR TO SCRATCH1
+- MOVE DSA2 TO SFBR
+- MOVE SFBR TO SCRATCH2
+- MOVE DSA3 TO SFBR
+- MOVE SFBR TO SCRATCH3
+- RETURN
+-
+-scratch_to_dsa:
+- MOVE SCRATCH0 TO SFBR
+- MOVE SFBR TO DSA0
+- MOVE SCRATCH1 TO SFBR
+- MOVE SFBR TO DSA1
+- MOVE SCRATCH2 TO SFBR
+- MOVE SFBR TO DSA2
+- MOVE SCRATCH3 TO SFBR
+- MOVE SFBR TO DSA3
+- RETURN
+-#endif
+-
+-#if (CHIP == 710)
+-; Little patched jump, used to overcome problems with TEMP getting
+-; corrupted on memory moves.
+-
+-jump_temp:
+- JUMP 0
+-#endif
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped
+--- linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped 1969-12-31 19:00:00.000000000 -0500
+@@ -1,2874 +0,0 @@
+-/* DO NOT EDIT - Generated automatically by script_asm.pl */
+-static u32 SCRIPT[] = {
+-/*
+-
+-
+-
+-
+-
+-; 53c710 driver. Modified from Drew Eckhardts driver
+-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+-;
+-; I have left the script for the 53c8xx family in here, as it is likely
+-; to be useful to see what I changed when bug hunting.
+-
+-; NCR 53c810 driver, main script
+-; Sponsored by
+-; iX Multiuser Multitasking Magazine
+-; hm@ix.de
+-;
+-; Copyright 1993, 1994, 1995 Drew Eckhardt
+-; Visionary Computing
+-; (Unix and Linux consulting and custom programming)
+-; drew@PoohSticks.ORG
+-; +1 (303) 786-7975
+-;
+-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+-;
+-; PRE-ALPHA
+-;
+-; For more information, please consult
+-;
+-; NCR 53C810
+-; PCI-SCSI I/O Processor
+-; Data Manual
+-;
+-; NCR 53C710
+-; SCSI I/O Processor
+-; Programmers Guide
+-;
+-; NCR Microelectronics
+-; 1635 Aeroplaza Drive
+-; Colorado Springs, CO 80916
+-; 1+ (719) 578-3400
+-;
+-; Toll free literature number
+-; +1 (800) 334-5454
+-;
+-; IMPORTANT : This code is self modifying due to the limitations of
+-; the NCR53c7,8xx series chips. Persons debugging this code with
+-; the remote debugger should take this into account, and NOT set
+-; breakpoints in modified instructions.
+-;
+-; Design:
+-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard
+-; microcontroller using a simple instruction set.
+-;
+-; So, to minimize the effects of interrupt latency, and to maximize
+-; throughput, this driver offloads the practical maximum amount
+-; of processing to the SCSI chip while still maintaining a common
+-; structure.
+-;
+-; Where tradeoffs were needed between efficiency on the older
+-; chips and the newer NCR53c800 series, the NCR53c800 series
+-; was chosen.
+-;
+-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully
+-; automate SCSI transfers without host processor intervention, this
+-; isn't the case with the NCR53c710 and newer chips which allow
+-;
+-; - reads and writes to the internal registers from within the SCSI
+-; scripts, allowing the SCSI SCRIPTS(tm) code to save processor
+-; state so that multiple threads of execution are possible, and also
+-; provide an ALU for loop control, etc.
+-;
+-; - table indirect addressing for some instructions. This allows
+-; pointers to be located relative to the DSA ((Data Structure
+-; Address) register.
+-;
+-; These features make it possible to implement a mailbox style interface,
+-; where the same piece of code is run to handle I/O for multiple threads
+-; at once minimizing our need to relocate code. Since the NCR53c700/
+-; NCR53c800 series have a unique combination of features, making a
+-; a standard ingoing/outgoing mailbox system, costly, I've modified it.
+-;
+-; - Mailboxes are a mixture of code and data. This lets us greatly
+-; simplify the NCR53c810 code and do things that would otherwise
+-; not be possible.
+-;
+-; The saved data pointer is now implemented as follows :
+-;
+-; Control flow has been architected such that if control reaches
+-; munge_save_data_pointer, on a restore pointers message or
+-; reconnection, a jump to the address formerly in the TEMP register
+-; will allow the SCSI command to resume execution.
+-;
+-
+-;
+-; Note : the DSA structures must be aligned on 32 bit boundaries,
+-; since the source and destination of MOVE MEMORY instructions
+-; must share the same alignment and this is the alignment of the
+-; NCR registers.
+-;
+-
+-; For some systems (MVME166, for example) dmode is always the same, so don't
+-; waste time writing it
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa
+-ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa
+-ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address
+- ; for current dsa
+-ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target
+- ; sync routine
+-ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target
+- ; sscf value (53c710)
+-ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa
+-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command
+- ; saved data pointer
+-ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command
+- ; current residual code
+-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command
+- ; saved residual code
+-ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand
+-ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to
+-ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value
+-
+-;
+-; Once a device has initiated reselection, we need to compare it
+-; against the singly linked list of commands which have disconnected
+-; and are pending reselection. These commands are maintained in
+-; an unordered singly linked list of DSA structures, through the
+-; DSA pointers at their 'centers' headed by the reconnect_dsa_head
+-; pointer.
+-;
+-; To avoid complications in removing commands from the list,
+-; I minimize the amount of expensive (at eight operations per
+-; addition @ 500-600ns each) pointer operations which must
+-; be done in the NCR driver by precomputing them on the
+-; host processor during dsa structure generation.
+-;
+-; The fixed-up per DSA code knows how to recognize the nexus
+-; associated with the corresponding SCSI command, and modifies
+-; the source and destination pointers for the MOVE MEMORY
+-; instruction which is executed when reselected_ok is called
+-; to remove the command from the list. Similarly, DSA is
+-; loaded with the address of the next DSA structure and
+-; reselected_check_next is called if a failure occurs.
+-;
+-; Perhaps more concisely, the net effect of the mess is
+-;
+-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head,
+-; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) {
+-; src = &dsa->next;
+-; if (target_id == dsa->id && target_lun == dsa->lun) {
+-; *dest = *src;
+-; break;
+-; }
+-; }
+-;
+-; if (!dsa)
+-; error (int_err_unexpected_reselect);
+-; else
+-; longjmp (dsa->jump_resume, 0);
+-;
+-;
+-
+-
+-; Define DSA structure used for mailboxes
+-ENTRY dsa_code_template
+-dsa_code_template:
+-ENTRY dsa_code_begin
+-dsa_code_begin:
+-; RGH: Don't care about TEMP and DSA here
+-
+- MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch
+-
+-at 0x00000000 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x00000003 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+- ; We are about to go and select the device, so must set SSCF bits
+- MOVE MEMORY 4, dsa_sscf_710, addr_scratch
+-
+-at 0x00000006 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+- MOVE SCRATCH3 TO SFBR
+-
+-at 0x00000009 : */ 0x72370000,0x00000000,
+-/*
+-
+-
+-
+- MOVE SFBR TO SBCL
+-
+-at 0x0000000b : */ 0x6a0b0000,0x00000000,
+-/*
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000000d : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- CALL select
+-
+-at 0x00000010 : */ 0x88080000,0x000001f8,
+-/*
+-; Handle the phase mismatch which may have resulted from the
+-; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN
+-; may or may not be necessary, and we should update script_asm.pl
+-; to handle multiple pieces.
+- CLEAR ATN
+-
+-at 0x00000012 : */ 0x60000008,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x00000014 : */ 0x60000040,0x00000000,
+-/*
+-
+-; Replace second operand with address of JUMP instruction dest operand
+-; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c.
+-ENTRY dsa_code_fix_jump
+-dsa_code_fix_jump:
+- MOVE MEMORY 4, NOP_insn, 0
+-
+-at 0x00000016 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+- JUMP select_done
+-
+-at 0x00000019 : */ 0x80080000,0x00000230,
+-/*
+-
+-; wrong_dsa loads the DSA register with the value of the dsa_next
+-; field.
+-;
+-wrong_dsa:
+-
+-; NOTE DSA is corrupt when we arrive here!
+-
+-; Patch the MOVE MEMORY INSTRUCTION such that
+-; the destination address is the address of the OLD
+-; next pointer.
+-;
+- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8
+-
+-at 0x0000001b : */ 0xc0000004,0x00000000,0x000007ec,
+-/*
+-
+-;
+-; Move the _contents_ of the next pointer into the DSA register as
+-; the next I_T_L or I_T_L_Q tupple to check against the established
+-; nexus.
+-;
+- MOVE MEMORY 4, dsa_temp_next, addr_scratch
+-
+-at 0x0000001e : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x00000021 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000024 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- JUMP reselected_check_next
+-
+-at 0x00000027 : */ 0x80080000,0x000006f0,
+-/*
+-
+-ABSOLUTE dsa_save_data_pointer = 0
+-ENTRY dsa_code_save_data_pointer
+-dsa_code_save_data_pointer:
+-
+- ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt
+- ; We MUST return with DSA correct
+- MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer
+-
+-at 0x00000029 : */ 0xc0000004,0x000009c8,0x00000000,
+-/*
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+-
+-at 0x0000002c : */ 0xc0000018,0x00000000,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x0000002f : */ 0x60000040,0x00000000,
+-/*
+-
+-
+-
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000031 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+- JUMP jump_temp
+-
+-at 0x00000034 : */ 0x80080000,0x000009c4,
+-/*
+-
+-ABSOLUTE dsa_restore_pointers = 0
+-ENTRY dsa_code_restore_pointers
+-dsa_code_restore_pointers:
+-
+- ; TEMP and DSA are corrupt when we get here, but who cares!
+- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4
+-
+-at 0x00000036 : */ 0xc0000004,0x00000000,0x000009c8,
+-/*
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+-
+-at 0x00000039 : */ 0xc0000018,0x00000000,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x0000003c : */ 0x60000040,0x00000000,
+-/*
+- ; Restore DSA, note we don't care about TEMP
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000003e : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- JUMP jump_temp
+-
+-at 0x00000041 : */ 0x80080000,0x000009c4,
+-/*
+-
+-
+-ABSOLUTE dsa_check_reselect = 0
+-; dsa_check_reselect determines whether or not the current target and
+-; lun match the current DSA
+-ENTRY dsa_code_check_reselect
+-dsa_code_check_reselect:
+-
+-
+-
+- MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set
+-
+-at 0x00000043 : */ 0x72230000,0x00000000,
+-/*
+- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80
+-
+-at 0x00000045 : */ 0x80848000,0x00ffff50,
+-/*
+-
+-
+-
+-
+-
+-;
+-; Hack - move to scratch first, since SFBR is not writeable
+-; via the CPU and hence a MOVE MEMORY instruction.
+-;
+-
+- MOVE MEMORY 1, reselected_identify, addr_scratch
+-
+-at 0x00000047 : */ 0xc0000001,0x00000000,0x00000000,
+-/*
+-
+-
+- ; BIG ENDIAN ON MVME16x
+- MOVE SCRATCH3 TO SFBR
+-
+-at 0x0000004a : */ 0x72370000,0x00000000,
+-/*
+-
+-
+-
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+-; Are you sure about that? richard@sleepie.demon.co.uk
+- JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8
+-
+-at 0x0000004c : */ 0x8084f800,0x00ffff34,
+-/*
+-; Patch the MOVE MEMORY INSTRUCTION such that
+-; the source address is the address of this dsa's
+-; next pointer.
+- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4
+-
+-at 0x0000004e : */ 0xc0000004,0x00000000,0x000007e8,
+-/*
+- CALL reselected_ok
+-
+-at 0x00000051 : */ 0x88080000,0x00000798,
+-/*
+-
+-; Restore DSA following memory moves in reselected_ok
+-; dsa_temp_sync doesn't really care about DSA, but it has an
+-; optional debug INT so a valid DSA is a good idea.
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000053 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+- CALL dsa_temp_sync
+-
+-at 0x00000056 : */ 0x88080000,0x00000000,
+-/*
+-; Release ACK on the IDENTIFY message _after_ we've set the synchronous
+-; transfer parameters!
+- CLEAR ACK
+-
+-at 0x00000058 : */ 0x60000040,0x00000000,
+-/*
+-; Implicitly restore pointers on reselection, so a RETURN
+-; will transfer control back to the right spot.
+- CALL REL (dsa_code_restore_pointers)
+-
+-at 0x0000005a : */ 0x88880000,0x00ffff68,
+-/*
+- RETURN
+-
+-at 0x0000005c : */ 0x90080000,0x00000000,
+-/*
+-ENTRY dsa_zero
+-dsa_zero:
+-ENTRY dsa_code_template_end
+-dsa_code_template_end:
+-
+-; Perform sanity check for dsa_fields_start == dsa_code_template_end -
+-; dsa_zero, puke.
+-
+-ABSOLUTE dsa_fields_start = 0 ; Sanity marker
+- ; pad 48 bytes (fix this RSN)
+-ABSOLUTE dsa_next = 48 ; len 4 Next DSA
+- ; del 4 Previous DSA address
+-ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread.
+-ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for
+- ; table indirect select
+-ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for
+- ; select message
+-ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for
+- ; command
+-ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout
+-ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain
+-ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin
+-ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte
+-ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out
+- ; (Synchronous transfer negotiation, etc).
+-ABSOLUTE dsa_end = 112
+-
+-ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next),
+- ; terminated by a call to JUMP wait_reselect
+-
+-; Linked lists of DSA structures
+-ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect
+-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing
+- ; address of reconnect_dsa_head
+-
+-; These select the source and destination of a MOVE MEMORY instruction
+-ABSOLUTE dmode_memory_to_memory = 0x0
+-ABSOLUTE dmode_memory_to_ncr = 0x0
+-ABSOLUTE dmode_ncr_to_memory = 0x0
+-
+-ABSOLUTE addr_scratch = 0x0
+-ABSOLUTE addr_temp = 0x0
+-
+-ABSOLUTE saved_dsa = 0x0
+-ABSOLUTE emulfly = 0x0
+-ABSOLUTE addr_dsa = 0x0
+-
+-
+-
+-; Interrupts -
+-; MSB indicates type
+-; 0 handle error condition
+-; 1 handle message
+-; 2 handle normal condition
+-; 3 debugging interrupt
+-; 4 testing interrupt
+-; Next byte indicates specific error
+-
+-; XXX not yet implemented, I'm not sure if I want to -
+-; Next byte indicates the routine the error occurred in
+-; The LSB indicates the specific place the error occurred
+-
+-ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered
+-ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED)
+-ABSOLUTE int_err_unexpected_reselect = 0x00020000
+-ABSOLUTE int_err_check_condition = 0x00030000
+-ABSOLUTE int_err_no_phase = 0x00040000
+-ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received
+-ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received
+-ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message
+- ; received
+-
+-ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram
+- ; registers.
+-ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established
+-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete
+-ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected
+-ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa
+-ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset.
+-ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly
+-ABSOLUTE int_debug_break = 0x03000000 ; Break point
+-
+-ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver
+-
+-
+-ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete
+-ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete
+-ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete
+-
+-
+-; These should start with 0x05000000, with low bits incrementing for
+-; each one.
+-
+-
+-
+-ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message
+-ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message
+-ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source
+-ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in
+-ABSOLUTE NOP_insn = 0 ; NOP instruction
+-
+-; Pointer to message, potentially multi-byte
+-ABSOLUTE msg_buf = 0
+-
+-; Pointer to holding area for reselection information
+-ABSOLUTE reselected_identify = 0
+-ABSOLUTE reselected_tag = 0
+-
+-; Request sense command pointer, it's a 6 byte command, should
+-; be constant for all commands since we always want 16 bytes of
+-; sense and we don't need to change any fields as we did under
+-; SCSI-I when we actually cared about the LUN field.
+-;EXTERNAL NCR53c7xx_sense ; Request sense command
+-
+-
+-; dsa_schedule
+-; PURPOSE : after a DISCONNECT message has been received, and pointers
+-; saved, insert the current DSA structure at the head of the
+-; disconnected queue and fall through to the scheduler.
+-;
+-; CALLS : OK
+-;
+-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list
+-; of disconnected commands
+-;
+-; MODIFIES : SCRATCH, reconnect_dsa_head
+-;
+-; EXITS : always passes control to schedule
+-
+-ENTRY dsa_schedule
+-dsa_schedule:
+-
+-
+-
+-
+-;
+-; Calculate the address of the next pointer within the DSA
+-; structure of the command that is currently disconnecting
+-;
+-
+- ; Read what should be the current DSA from memory - actual DSA
+- ; register is probably corrupt
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x0000005e : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- MOVE SCRATCH0 + dsa_next TO SCRATCH0
+-
+-at 0x00000061 : */ 0x7e343000,0x00000000,
+-/*
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+-
+-at 0x00000063 : */ 0x7f350000,0x00000000,
+-/*
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+-
+-at 0x00000065 : */ 0x7f360000,0x00000000,
+-/*
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-at 0x00000067 : */ 0x7f370000,0x00000000,
+-/*
+-
+-; Point the next field of this DSA structure at the current disconnected
+-; list
+-
+- MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8
+-
+-at 0x00000069 : */ 0xc0000004,0x00000000,0x000001b8,
+-/*
+-
+-dsa_schedule_insert:
+- MOVE MEMORY 4, reconnect_dsa_head, 0
+-
+-at 0x0000006c : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-; And update the head pointer.
+-
+- ; Read what should be the current DSA from memory - actual DSA
+- ; register is probably corrupt
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x0000006f : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+- MOVE MEMORY 4, addr_scratch, reconnect_dsa_head
+-
+-at 0x00000072 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-
+- CLEAR ACK
+-
+-at 0x00000075 : */ 0x60000040,0x00000000,
+-/*
+-
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000077 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+- WAIT DISCONNECT
+-
+-at 0x0000007a : */ 0x48000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-
+- JUMP schedule
+-
+-at 0x0000007c : */ 0x80080000,0x00000000,
+-/*
+-
+-
+-;
+-; select
+-;
+-; PURPOSE : establish a nexus for the SCSI command referenced by DSA.
+-; On success, the current DSA structure is removed from the issue
+-; queue. Usually, this is entered as a fall-through from schedule,
+-; although the contingent allegiance handling code will write
+-; the select entry address to the DSP to restart a command as a
+-; REQUEST SENSE. A message is sent (usually IDENTIFY, although
+-; additional SDTR or WDTR messages may be sent). COMMAND OUT
+-; is handled.
+-;
+-; INPUTS : DSA - SCSI command, issue_dsa_head
+-;
+-; CALLS : NOT OK
+-;
+-; MODIFIES : SCRATCH, issue_dsa_head
+-;
+-; EXITS : on reselection or selection, go to select_failed
+-; otherwise, RETURN so control is passed back to
+-; dsa_begin.
+-;
+-
+-ENTRY select
+-select:
+-
+-
+-
+-
+-
+-
+-
+-
+- CLEAR TARGET
+-
+-at 0x0000007e : */ 0x60000200,0x00000000,
+-/*
+-
+-; XXX
+-;
+-; In effect, SELECTION operations are backgrounded, with execution
+-; continuing until code which waits for REQ or a fatal interrupt is
+-; encountered.
+-;
+-; So, for more performance, we could overlap the code which removes
+-; the command from the NCRs issue queue with the selection, but
+-; at this point I don't want to deal with the error recovery.
+-;
+-
+-
+-
+- ; Enable selection timer
+-
+-
+-
+- MOVE CTEST7 & 0xef TO CTEST7
+-
+-at 0x00000080 : */ 0x7c1bef00,0x00000000,
+-/*
+-
+-
+- SELECT ATN FROM dsa_select, select_failed
+-
+-at 0x00000082 : */ 0x4300003c,0x00000828,
+-/*
+- JUMP select_msgout, WHEN MSG_OUT
+-
+-at 0x00000084 : */ 0x860b0000,0x00000218,
+-/*
+-ENTRY select_msgout
+-select_msgout:
+-
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000086 : */ 0x7a1b1000,0x00000000,
+-/*
+-
+- MOVE FROM dsa_msgout, WHEN MSG_OUT
+-
+-at 0x00000088 : */ 0x1e000000,0x00000040,
+-/*
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+- RETURN
+-
+-at 0x0000008a : */ 0x90080000,0x00000000,
+-/*
+-
+-;
+-; select_done
+-;
+-; PURPOSE: continue on to normal data transfer; called as the exit
+-; point from dsa_begin.
+-;
+-; INPUTS: dsa
+-;
+-; CALLS: OK
+-;
+-;
+-
+-select_done:
+-
+-; NOTE DSA is corrupt when we arrive here!
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000008c : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-
+-
+-
+-; After a successful selection, we should get either a CMD phase or
+-; some transfer request negotiation message.
+-
+- JUMP cmdout, WHEN CMD
+-
+-at 0x0000008f : */ 0x820b0000,0x0000025c,
+-/*
+- INT int_err_unexpected_phase, WHEN NOT MSG_IN
+-
+-at 0x00000091 : */ 0x9f030000,0x00000000,
+-/*
+-
+-select_msg_in:
+- CALL msg_in, WHEN MSG_IN
+-
+-at 0x00000093 : */ 0x8f0b0000,0x0000041c,
+-/*
+- JUMP select_msg_in, WHEN MSG_IN
+-
+-at 0x00000095 : */ 0x870b0000,0x0000024c,
+-/*
+-
+-cmdout:
+- INT int_err_unexpected_phase, WHEN NOT CMD
+-
+-at 0x00000097 : */ 0x9a030000,0x00000000,
+-/*
+-
+-
+-
+-ENTRY cmdout_cmdout
+-cmdout_cmdout:
+-
+- MOVE FROM dsa_cmdout, WHEN CMD
+-
+-at 0x00000099 : */ 0x1a000000,0x00000048,
+-/*
+-
+-
+-
+-
+-;
+-; data_transfer
+-; other_out
+-; other_in
+-; other_transfer
+-;
+-; PURPOSE : handle the main data transfer for a SCSI command in
+-; several parts. In the first part, data_transfer, DATA_IN
+-; and DATA_OUT phases are allowed, with the user provided
+-; code (usually dynamically generated based on the scatter/gather
+-; list associated with a SCSI command) called to handle these
+-; phases.
+-;
+-; After control has passed to one of the user provided
+-; DATA_IN or DATA_OUT routines, back calls are made to
+-; other_transfer_in or other_transfer_out to handle non-DATA IN
+-; and DATA OUT phases respectively, with the state of the active
+-; data pointer being preserved in TEMP.
+-;
+-; On completion, the user code passes control to other_transfer
+-; which causes DATA_IN and DATA_OUT to result in unexpected_phase
+-; interrupts so that data overruns may be trapped.
+-;
+-; INPUTS : DSA - SCSI command
+-;
+-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in
+-; other_transfer
+-;
+-; MODIFIES : SCRATCH
+-;
+-; EXITS : if STATUS IN is detected, signifying command completion,
+-; the NCR jumps to command_complete. If MSG IN occurs, a
+-; CALL is made to msg_in. Otherwise, other_transfer runs in
+-; an infinite loop.
+-;
+-
+-ENTRY data_transfer
+-data_transfer:
+- JUMP cmdout_cmdout, WHEN CMD
+-
+-at 0x0000009b : */ 0x820b0000,0x00000264,
+-/*
+- CALL msg_in, WHEN MSG_IN
+-
+-at 0x0000009d : */ 0x8f0b0000,0x0000041c,
+-/*
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x0000009f : */ 0x9e0b0000,0x00000000,
+-/*
+- JUMP do_dataout, WHEN DATA_OUT
+-
+-at 0x000000a1 : */ 0x800b0000,0x000002a4,
+-/*
+- JUMP do_datain, WHEN DATA_IN
+-
+-at 0x000000a3 : */ 0x810b0000,0x000002fc,
+-/*
+- JUMP command_complete, WHEN STATUS
+-
+-at 0x000000a5 : */ 0x830b0000,0x0000065c,
+-/*
+- JUMP data_transfer
+-
+-at 0x000000a7 : */ 0x80080000,0x0000026c,
+-/*
+-ENTRY end_data_transfer
+-end_data_transfer:
+-
+-;
+-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain
+-; should be fixed up whenever the nexus changes so it can point to the
+-; correct routine for that command.
+-;
+-
+-
+-; Nasty jump to dsa->dataout
+-do_dataout:
+-
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x000000a9 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- MOVE SCRATCH0 + dsa_dataout TO SCRATCH0
+-
+-at 0x000000ac : */ 0x7e345000,0x00000000,
+-/*
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+-
+-at 0x000000ae : */ 0x7f350000,0x00000000,
+-/*
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+-
+-at 0x000000b0 : */ 0x7f360000,0x00000000,
+-/*
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-at 0x000000b2 : */ 0x7f370000,0x00000000,
+-/*
+-
+- MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4
+-
+-at 0x000000b4 : */ 0xc0000004,0x00000000,0x000002e0,
+-/*
+-
+-dataout_to_jump:
+- MOVE MEMORY 4, 0, dataout_jump + 4
+-
+-at 0x000000b7 : */ 0xc0000004,0x00000000,0x000002f8,
+-/*
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000000ba : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-dataout_jump:
+- JUMP 0
+-
+-at 0x000000bd : */ 0x80080000,0x00000000,
+-/*
+-
+-; Nasty jump to dsa->dsain
+-do_datain:
+-
+- MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x000000bf : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+- MOVE SCRATCH0 + dsa_datain TO SCRATCH0
+-
+-at 0x000000c2 : */ 0x7e345400,0x00000000,
+-/*
+- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+-
+-at 0x000000c4 : */ 0x7f350000,0x00000000,
+-/*
+- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+-
+-at 0x000000c6 : */ 0x7f360000,0x00000000,
+-/*
+- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-at 0x000000c8 : */ 0x7f370000,0x00000000,
+-/*
+-
+- MOVE MEMORY 4, addr_scratch, datain_to_jump + 4
+-
+-at 0x000000ca : */ 0xc0000004,0x00000000,0x00000338,
+-/*
+-
+-ENTRY datain_to_jump
+-datain_to_jump:
+- MOVE MEMORY 4, 0, datain_jump + 4
+-
+-at 0x000000cd : */ 0xc0000004,0x00000000,0x00000350,
+-/*
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000000d0 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-datain_jump:
+- JUMP 0
+-
+-at 0x000000d3 : */ 0x80080000,0x00000000,
+-/*
+-
+-
+-
+-; Note that other_out and other_in loop until a non-data phase
+-; is discovered, so we only execute return statements when we
+-; can go on to the next data phase block move statement.
+-
+-ENTRY other_out
+-other_out:
+-
+-
+-
+- INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000d5 : */ 0x9a0b0000,0x00000000,
+-/*
+- JUMP msg_in_restart, WHEN MSG_IN
+-
+-at 0x000000d7 : */ 0x870b0000,0x000003fc,
+-/*
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000d9 : */ 0x9e0b0000,0x00000000,
+-/*
+- INT int_err_unexpected_phase, WHEN DATA_IN
+-
+-at 0x000000db : */ 0x990b0000,0x00000000,
+-/*
+- JUMP command_complete, WHEN STATUS
+-
+-at 0x000000dd : */ 0x830b0000,0x0000065c,
+-/*
+- JUMP other_out, WHEN NOT DATA_OUT
+-
+-at 0x000000df : */ 0x80030000,0x00000354,
+-/*
+-
+-; TEMP should be OK, as we got here from a call in the user dataout code.
+-
+- RETURN
+-
+-at 0x000000e1 : */ 0x90080000,0x00000000,
+-/*
+-
+-ENTRY other_in
+-other_in:
+-
+-
+-
+- INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000e3 : */ 0x9a0b0000,0x00000000,
+-/*
+- JUMP msg_in_restart, WHEN MSG_IN
+-
+-at 0x000000e5 : */ 0x870b0000,0x000003fc,
+-/*
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000e7 : */ 0x9e0b0000,0x00000000,
+-/*
+- INT int_err_unexpected_phase, WHEN DATA_OUT
+-
+-at 0x000000e9 : */ 0x980b0000,0x00000000,
+-/*
+- JUMP command_complete, WHEN STATUS
+-
+-at 0x000000eb : */ 0x830b0000,0x0000065c,
+-/*
+- JUMP other_in, WHEN NOT DATA_IN
+-
+-at 0x000000ed : */ 0x81030000,0x0000038c,
+-/*
+-
+-; TEMP should be OK, as we got here from a call in the user datain code.
+-
+- RETURN
+-
+-at 0x000000ef : */ 0x90080000,0x00000000,
+-/*
+-
+-
+-ENTRY other_transfer
+-other_transfer:
+- INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000f1 : */ 0x9a0b0000,0x00000000,
+-/*
+- CALL msg_in, WHEN MSG_IN
+-
+-at 0x000000f3 : */ 0x8f0b0000,0x0000041c,
+-/*
+- INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000f5 : */ 0x9e0b0000,0x00000000,
+-/*
+- INT int_err_unexpected_phase, WHEN DATA_OUT
+-
+-at 0x000000f7 : */ 0x980b0000,0x00000000,
+-/*
+- INT int_err_unexpected_phase, WHEN DATA_IN
+-
+-at 0x000000f9 : */ 0x990b0000,0x00000000,
+-/*
+- JUMP command_complete, WHEN STATUS
+-
+-at 0x000000fb : */ 0x830b0000,0x0000065c,
+-/*
+- JUMP other_transfer
+-
+-at 0x000000fd : */ 0x80080000,0x000003c4,
+-/*
+-
+-;
+-; msg_in_restart
+-; msg_in
+-; munge_msg
+-;
+-; PURPOSE : process messages from a target. msg_in is called when the
+-; caller hasn't read the first byte of the message. munge_message
+-; is called when the caller has read the first byte of the message,
+-; and left it in SFBR. msg_in_restart is called when the caller
+-; hasn't read the first byte of the message, and wishes RETURN
+-; to transfer control back to the address of the conditional
+-; CALL instruction rather than to the instruction after it.
+-;
+-; Various int_* interrupts are generated when the host system
+-; needs to intervene, as is the case with SDTR, WDTR, and
+-; INITIATE RECOVERY messages.
+-;
+-; When the host system handles one of these interrupts,
+-; it can respond by reentering at reject_message,
+-; which rejects the message and returns control to
+-; the caller of msg_in or munge_msg, accept_message
+-; which clears ACK and returns control, or reply_message
+-; which sends the message pointed to by the DSA
+-; msgout_other table indirect field.
+-;
+-; DISCONNECT messages are handled by moving the command
+-; to the reconnect_dsa_queue.
+-
+-; NOTE: DSA should be valid when we get here - we cannot save both it
+-; and TEMP in this routine.
+-
+-;
+-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg
+-; only)
+-;
+-; CALLS : NO. The TEMP register isn't backed up to allow nested calls.
+-;
+-; MODIFIES : SCRATCH, DSA on DISCONNECT
+-;
+-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS,
+-; and normal return from message handlers running under
+-; Linux, control is returned to the caller. Receipt
+-; of DISCONNECT messages pass control to dsa_schedule.
+-;
+-ENTRY msg_in_restart
+-msg_in_restart:
+-; XXX - hackish
+-;
+-; Since it's easier to debug changes to the statically
+-; compiled code, rather than the dynamically generated
+-; stuff, such as
+-;
+-; MOVE x, y, WHEN data_phase
+-; CALL other_z, WHEN NOT data_phase
+-; MOVE x, y, WHEN data_phase
+-;
+-; I'd like to have certain routines (notably the message handler)
+-; restart on the conditional call rather than the next instruction.
+-;
+-; So, subtract 8 from the return address
+-
+- MOVE TEMP0 + 0xf8 TO TEMP0
+-
+-at 0x000000ff : */ 0x7e1cf800,0x00000000,
+-/*
+- MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY
+-
+-at 0x00000101 : */ 0x7f1dff00,0x00000000,
+-/*
+- MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY
+-
+-at 0x00000103 : */ 0x7f1eff00,0x00000000,
+-/*
+- MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY
+-
+-at 0x00000105 : */ 0x7f1fff00,0x00000000,
+-/*
+-
+-ENTRY msg_in
+-msg_in:
+- MOVE 1, msg_buf, WHEN MSG_IN
+-
+-at 0x00000107 : */ 0x0f000001,0x00000000,
+-/*
+-
+-munge_msg:
+- JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE
+-
+-at 0x00000109 : */ 0x800c0001,0x00000574,
+-/*
+- JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message
+-
+-at 0x0000010b : */ 0x800cdf20,0x00000464,
+-/*
+-;
+-; XXX - I've seen a handful of broken SCSI devices which fail to issue
+-; a SAVE POINTERS message before disconnecting in the middle of
+-; a transfer, assuming that the DATA POINTER will be implicitly
+-; restored.
+-;
+-; Historically, I've often done an implicit save when the DISCONNECT
+-; message is processed. We may want to consider having the option of
+-; doing that here.
+-;
+- JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER
+-
+-at 0x0000010d : */ 0x800c0002,0x0000046c,
+-/*
+- JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS
+-
+-at 0x0000010f : */ 0x800c0003,0x00000518,
+-/*
+- JUMP munge_disconnect, IF 0x04 ; DISCONNECT
+-
+-at 0x00000111 : */ 0x800c0004,0x0000056c,
+-/*
+- INT int_msg_1, IF 0x07 ; MESSAGE REJECT
+-
+-at 0x00000113 : */ 0x980c0007,0x01020000,
+-/*
+- INT int_msg_1, IF 0x0f ; INITIATE RECOVERY
+-
+-at 0x00000115 : */ 0x980c000f,0x01020000,
+-/*
+-
+-
+-
+- JUMP reject_message
+-
+-at 0x00000117 : */ 0x80080000,0x00000604,
+-/*
+-
+-munge_2:
+- JUMP reject_message
+-
+-at 0x00000119 : */ 0x80080000,0x00000604,
+-/*
+-;
+-; The SCSI standard allows targets to recover from transient
+-; error conditions by backing up the data pointer with a
+-; RESTORE POINTERS message.
+-;
+-; So, we must save and restore the _residual_ code as well as
+-; the current instruction pointer. Because of this messiness,
+-; it is simpler to put dynamic code in the dsa for this and to
+-; just do a simple jump down there.
+-;
+-
+-munge_save_data_pointer:
+-
+- ; We have something in TEMP here, so first we must save that
+- MOVE TEMP0 TO SFBR
+-
+-at 0x0000011b : */ 0x721c0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH0
+-
+-at 0x0000011d : */ 0x6a340000,0x00000000,
+-/*
+- MOVE TEMP1 TO SFBR
+-
+-at 0x0000011f : */ 0x721d0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH1
+-
+-at 0x00000121 : */ 0x6a350000,0x00000000,
+-/*
+- MOVE TEMP2 TO SFBR
+-
+-at 0x00000123 : */ 0x721e0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH2
+-
+-at 0x00000125 : */ 0x6a360000,0x00000000,
+-/*
+- MOVE TEMP3 TO SFBR
+-
+-at 0x00000127 : */ 0x721f0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH3
+-
+-at 0x00000129 : */ 0x6a370000,0x00000000,
+-/*
+- MOVE MEMORY 4, addr_scratch, jump_temp + 4
+-
+-at 0x0000012b : */ 0xc0000004,0x00000000,0x000009c8,
+-/*
+- ; Now restore DSA
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000012e : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+- MOVE DSA0 + dsa_save_data_pointer TO SFBR
+-
+-at 0x00000131 : */ 0x76100000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH0
+-
+-at 0x00000133 : */ 0x6a340000,0x00000000,
+-/*
+- MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-
+-at 0x00000135 : */ 0x7711ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH1
+-
+-at 0x00000137 : */ 0x6a350000,0x00000000,
+-/*
+- MOVE DSA2 + 0xff TO SFBR WITH CARRY
+-
+-at 0x00000139 : */ 0x7712ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH2
+-
+-at 0x0000013b : */ 0x6a360000,0x00000000,
+-/*
+- MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000013d : */ 0x7713ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH3
+-
+-at 0x0000013f : */ 0x6a370000,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4
+-
+-at 0x00000141 : */ 0xc0000004,0x00000000,0x00000514,
+-/*
+-
+-jump_dsa_save:
+- JUMP 0
+-
+-at 0x00000144 : */ 0x80080000,0x00000000,
+-/*
+-
+-munge_restore_pointers:
+-
+- ; The code at dsa_restore_pointers will RETURN, but we don't care
+- ; about TEMP here, as it will overwrite it anyway.
+-
+- MOVE DSA0 + dsa_restore_pointers TO SFBR
+-
+-at 0x00000146 : */ 0x76100000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH0
+-
+-at 0x00000148 : */ 0x6a340000,0x00000000,
+-/*
+- MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000014a : */ 0x7711ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH1
+-
+-at 0x0000014c : */ 0x6a350000,0x00000000,
+-/*
+- MOVE DSA2 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000014e : */ 0x7712ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH2
+-
+-at 0x00000150 : */ 0x6a360000,0x00000000,
+-/*
+- MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-
+-at 0x00000152 : */ 0x7713ff00,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH3
+-
+-at 0x00000154 : */ 0x6a370000,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4
+-
+-at 0x00000156 : */ 0xc0000004,0x00000000,0x00000568,
+-/*
+-
+-jump_dsa_restore:
+- JUMP 0
+-
+-at 0x00000159 : */ 0x80080000,0x00000000,
+-/*
+-
+-
+-munge_disconnect:
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+- JUMP dsa_schedule
+-
+-at 0x0000015b : */ 0x80080000,0x00000178,
+-/*
+-
+-
+-
+-
+-
+-munge_extended:
+- CLEAR ACK
+-
+-at 0x0000015d : */ 0x60000040,0x00000000,
+-/*
+- INT int_err_unexpected_phase, WHEN NOT MSG_IN
+-
+-at 0x0000015f : */ 0x9f030000,0x00000000,
+-/*
+- MOVE 1, msg_buf + 1, WHEN MSG_IN
+-
+-at 0x00000161 : */ 0x0f000001,0x00000001,
+-/*
+- JUMP munge_extended_2, IF 0x02
+-
+-at 0x00000163 : */ 0x800c0002,0x000005a4,
+-/*
+- JUMP munge_extended_3, IF 0x03
+-
+-at 0x00000165 : */ 0x800c0003,0x000005d4,
+-/*
+- JUMP reject_message
+-
+-at 0x00000167 : */ 0x80080000,0x00000604,
+-/*
+-
+-munge_extended_2:
+- CLEAR ACK
+-
+-at 0x00000169 : */ 0x60000040,0x00000000,
+-/*
+- MOVE 1, msg_buf + 2, WHEN MSG_IN
+-
+-at 0x0000016b : */ 0x0f000001,0x00000002,
+-/*
+- JUMP reject_message, IF NOT 0x02 ; Must be WDTR
+-
+-at 0x0000016d : */ 0x80040002,0x00000604,
+-/*
+- CLEAR ACK
+-
+-at 0x0000016f : */ 0x60000040,0x00000000,
+-/*
+- MOVE 1, msg_buf + 3, WHEN MSG_IN
+-
+-at 0x00000171 : */ 0x0f000001,0x00000003,
+-/*
+- INT int_msg_wdtr
+-
+-at 0x00000173 : */ 0x98080000,0x01000000,
+-/*
+-
+-munge_extended_3:
+- CLEAR ACK
+-
+-at 0x00000175 : */ 0x60000040,0x00000000,
+-/*
+- MOVE 1, msg_buf + 2, WHEN MSG_IN
+-
+-at 0x00000177 : */ 0x0f000001,0x00000002,
+-/*
+- JUMP reject_message, IF NOT 0x01 ; Must be SDTR
+-
+-at 0x00000179 : */ 0x80040001,0x00000604,
+-/*
+- CLEAR ACK
+-
+-at 0x0000017b : */ 0x60000040,0x00000000,
+-/*
+- MOVE 2, msg_buf + 3, WHEN MSG_IN
+-
+-at 0x0000017d : */ 0x0f000002,0x00000003,
+-/*
+- INT int_msg_sdtr
+-
+-at 0x0000017f : */ 0x98080000,0x01010000,
+-/*
+-
+-ENTRY reject_message
+-reject_message:
+- SET ATN
+-
+-at 0x00000181 : */ 0x58000008,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x00000183 : */ 0x60000040,0x00000000,
+-/*
+- MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT
+-
+-at 0x00000185 : */ 0x0e000001,0x00000000,
+-/*
+- RETURN
+-
+-at 0x00000187 : */ 0x90080000,0x00000000,
+-/*
+-
+-ENTRY accept_message
+-accept_message:
+- CLEAR ATN
+-
+-at 0x00000189 : */ 0x60000008,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x0000018b : */ 0x60000040,0x00000000,
+-/*
+- RETURN
+-
+-at 0x0000018d : */ 0x90080000,0x00000000,
+-/*
+-
+-ENTRY respond_message
+-respond_message:
+- SET ATN
+-
+-at 0x0000018f : */ 0x58000008,0x00000000,
+-/*
+- CLEAR ACK
+-
+-at 0x00000191 : */ 0x60000040,0x00000000,
+-/*
+- MOVE FROM dsa_msgout_other, WHEN MSG_OUT
+-
+-at 0x00000193 : */ 0x1e000000,0x00000068,
+-/*
+- RETURN
+-
+-at 0x00000195 : */ 0x90080000,0x00000000,
+-/*
+-
+-;
+-; command_complete
+-;
+-; PURPOSE : handle command termination when STATUS IN is detected by reading
+-; a status byte followed by a command termination message.
+-;
+-; Normal termination results in an INTFLY instruction, and
+-; the host system can pick out which command terminated by
+-; examining the MESSAGE and STATUS buffers of all currently
+-; executing commands;
+-;
+-; Abnormal (CHECK_CONDITION) termination results in an
+-; int_err_check_condition interrupt so that a REQUEST SENSE
+-; command can be issued out-of-order so that no other command
+-; clears the contingent allegiance condition.
+-;
+-;
+-; INPUTS : DSA - command
+-;
+-; CALLS : OK
+-;
+-; EXITS : On successful termination, control is passed to schedule.
+-; On abnormal termination, the user will usually modify the
+-; DSA fields and corresponding buffers and return control
+-; to select.
+-;
+-
+-ENTRY command_complete
+-command_complete:
+- MOVE FROM dsa_status, WHEN STATUS
+-
+-at 0x00000197 : */ 0x1b000000,0x00000060,
+-/*
+-
+- MOVE SFBR TO SCRATCH0 ; Save status
+-
+-at 0x00000199 : */ 0x6a340000,0x00000000,
+-/*
+-
+-ENTRY command_complete_msgin
+-command_complete_msgin:
+- MOVE FROM dsa_msgin, WHEN MSG_IN
+-
+-at 0x0000019b : */ 0x1f000000,0x00000058,
+-/*
+-; Indicate that we should be expecting a disconnect
+-
+-
+-
+- ; Above code cleared the Unexpected Disconnect bit, what do we do?
+-
+- CLEAR ACK
+-
+-at 0x0000019d : */ 0x60000040,0x00000000,
+-/*
+-
+- WAIT DISCONNECT
+-
+-at 0x0000019f : */ 0x48000000,0x00000000,
+-/*
+-
+-;
+-; The SCSI specification states that when a UNIT ATTENTION condition
+-; is pending, as indicated by a CHECK CONDITION status message,
+-; the target shall revert to asynchronous transfers. Since
+-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET
+-; basis, and returning control to our scheduler could work on a command
+-; running on another lun on that target using the old parameters, we must
+-; interrupt the host processor to get them changed, or change them ourselves.
+-;
+-; Once SCSI-II tagged queueing is implemented, things will be even more
+-; hairy, since contingent allegiance conditions exist on a per-target/lun
+-; basis, and issuing a new command with a different tag would clear it.
+-; In these cases, we must interrupt the host processor to get a request
+-; added to the HEAD of the queue with the request sense command, or we
+-; must automatically issue the request sense command.
+-
+-
+-
+-
+-
+-
+-
+- INT int_norm_emulateintfly
+-
+-at 0x000001a1 : */ 0x98080000,0x02060000,
+-/*
+-
+-
+-
+-
+-
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001a3 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+- JUMP schedule
+-
+-at 0x000001a6 : */ 0x80080000,0x00000000,
+-/*
+-command_failed:
+- INT int_err_check_condition
+-
+-at 0x000001a8 : */ 0x98080000,0x00030000,
+-/*
+-
+-
+-
+-
+-;
+-; wait_reselect
+-;
+-; PURPOSE : This is essentially the idle routine, where control lands
+-; when there are no new processes to schedule. wait_reselect
+-; waits for reselection, selection, and new commands.
+-;
+-; When a successful reselection occurs, with the aid
+-; of fixed up code in each DSA, wait_reselect walks the
+-; reconnect_dsa_queue, asking each dsa if the target ID
+-; and LUN match its.
+-;
+-; If a match is found, a call is made back to reselected_ok,
+-; which through the miracles of self modifying code, extracts
+-; the found DSA from the reconnect_dsa_queue and then
+-; returns control to the DSAs thread of execution.
+-;
+-; INPUTS : NONE
+-;
+-; CALLS : OK
+-;
+-; MODIFIES : DSA,
+-;
+-; EXITS : On successful reselection, control is returned to the
+-; DSA which called reselected_ok. If the WAIT RESELECT
+-; was interrupted by a new commands arrival signaled by
+-; SIG_P, control is passed to schedule. If the NCR is
+-; selected, the host system is interrupted with an
+-; int_err_selected which is usually responded to by
+-; setting DSP to the target_abort address.
+-
+-ENTRY wait_reselect
+-wait_reselect:
+-
+-
+-
+-
+-
+-
+- WAIT RESELECT wait_reselect_failed
+-
+-at 0x000001aa : */ 0x50000000,0x00000800,
+-/*
+-
+-reselected:
+-
+-
+-
+- CLEAR TARGET
+-
+-at 0x000001ac : */ 0x60000200,0x00000000,
+-/*
+-
+- ; Read all data needed to reestablish the nexus -
+- MOVE 1, reselected_identify, WHEN MSG_IN
+-
+-at 0x000001ae : */ 0x0f000001,0x00000000,
+-/*
+- ; We used to CLEAR ACK here.
+-
+-
+-
+-
+-
+- ; Point DSA at the current head of the disconnected queue.
+-
+- MOVE MEMORY 4, reconnect_dsa_head, addr_scratch
+-
+-at 0x000001b0 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x000001b3 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+- ; Fix the update-next pointer so that the reconnect_dsa_head
+- ; pointer is the one that will be updated if this DSA is a hit
+- ; and we remove it from the queue.
+-
+- MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8
+-
+-at 0x000001b6 : */ 0xc0000004,0x00000000,0x000007ec,
+-/*
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001b9 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-ENTRY reselected_check_next
+-reselected_check_next:
+-
+-
+-
+- ; Check for a NULL pointer.
+- MOVE DSA0 TO SFBR
+-
+-at 0x000001bc : */ 0x72100000,0x00000000,
+-/*
+- JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001be : */ 0x80040000,0x00000738,
+-/*
+- MOVE DSA1 TO SFBR
+-
+-at 0x000001c0 : */ 0x72110000,0x00000000,
+-/*
+- JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001c2 : */ 0x80040000,0x00000738,
+-/*
+- MOVE DSA2 TO SFBR
+-
+-at 0x000001c4 : */ 0x72120000,0x00000000,
+-/*
+- JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001c6 : */ 0x80040000,0x00000738,
+-/*
+- MOVE DSA3 TO SFBR
+-
+-at 0x000001c8 : */ 0x72130000,0x00000000,
+-/*
+- JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001ca : */ 0x80040000,0x00000738,
+-/*
+- INT int_err_unexpected_reselect
+-
+-at 0x000001cc : */ 0x98080000,0x00020000,
+-/*
+-
+-reselected_not_end:
+- ;
+- ; XXX the ALU is only eight bits wide, and the assembler
+- ; wont do the dirt work for us. As long as dsa_check_reselect
+- ; is negative, we need to sign extend with 1 bits to the full
+- ; 32 bit width of the address.
+- ;
+- ; A potential work around would be to have a known alignment
+- ; of the DSA structure such that the base address plus
+- ; dsa_check_reselect doesn't require carrying from bytes
+- ; higher than the LSB.
+- ;
+-
+- MOVE DSA0 TO SFBR
+-
+-at 0x000001ce : */ 0x72100000,0x00000000,
+-/*
+- MOVE SFBR + dsa_check_reselect TO SCRATCH0
+-
+-at 0x000001d0 : */ 0x6e340000,0x00000000,
+-/*
+- MOVE DSA1 TO SFBR
+-
+-at 0x000001d2 : */ 0x72110000,0x00000000,
+-/*
+- MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY
+-
+-at 0x000001d4 : */ 0x6f35ff00,0x00000000,
+-/*
+- MOVE DSA2 TO SFBR
+-
+-at 0x000001d6 : */ 0x72120000,0x00000000,
+-/*
+- MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY
+-
+-at 0x000001d8 : */ 0x6f36ff00,0x00000000,
+-/*
+- MOVE DSA3 TO SFBR
+-
+-at 0x000001da : */ 0x72130000,0x00000000,
+-/*
+- MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY
+-
+-at 0x000001dc : */ 0x6f37ff00,0x00000000,
+-/*
+-
+-
+- MOVE MEMORY 4, addr_scratch, reselected_check + 4
+-
+-at 0x000001de : */ 0xc0000004,0x00000000,0x00000794,
+-/*
+-
+-
+- ; Time to correct DSA following memory move
+- MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001e1 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-reselected_check:
+- JUMP 0
+-
+-at 0x000001e4 : */ 0x80080000,0x00000000,
+-/*
+-
+-
+-;
+-;
+-
+-; We have problems here - the memory move corrupts TEMP and DSA. This
+-; routine is called from DSA code, and patched from many places. Scratch
+-; is probably free when it is called.
+-; We have to:
+-; copy temp to scratch, one byte at a time
+-; write scratch to patch a jump in place of the return
+-; do the move memory
+-; jump to the patched in return address
+-; DSA is corrupt when we get here, and can be left corrupt
+-
+-ENTRY reselected_ok
+-reselected_ok:
+- MOVE TEMP0 TO SFBR
+-
+-at 0x000001e6 : */ 0x721c0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH0
+-
+-at 0x000001e8 : */ 0x6a340000,0x00000000,
+-/*
+- MOVE TEMP1 TO SFBR
+-
+-at 0x000001ea : */ 0x721d0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH1
+-
+-at 0x000001ec : */ 0x6a350000,0x00000000,
+-/*
+- MOVE TEMP2 TO SFBR
+-
+-at 0x000001ee : */ 0x721e0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH2
+-
+-at 0x000001f0 : */ 0x6a360000,0x00000000,
+-/*
+- MOVE TEMP3 TO SFBR
+-
+-at 0x000001f2 : */ 0x721f0000,0x00000000,
+-/*
+- MOVE SFBR TO SCRATCH3
+-
+-at 0x000001f4 : */ 0x6a370000,0x00000000,
+-/*
+- MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4
+-
+-at 0x000001f6 : */ 0xc0000004,0x00000000,0x000007f4,
+-/*
+-reselected_ok_patch:
+- MOVE MEMORY 4, 0, 0
+-
+-at 0x000001f9 : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+-reselected_ok_jump:
+- JUMP 0
+-
+-at 0x000001fc : */ 0x80080000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-selected:
+- INT int_err_selected;
+-
+-at 0x000001fe : */ 0x98080000,0x00010000,
+-/*
+-
+-;
+-; A select or reselect failure can be caused by one of two conditions :
+-; 1. SIG_P was set. This will be the case if the user has written
+-; a new value to a previously NULL head of the issue queue.
+-;
+-; 2. The NCR53c810 was selected or reselected by another device.
+-;
+-; 3. The bus was already busy since we were selected or reselected
+-; before starting the command.
+-
+-wait_reselect_failed:
+-
+-
+-
+-; Check selected bit.
+-
+- ; Must work out how to tell if we are selected....
+-
+-
+-
+-
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+- MOVE CTEST2 & 0x40 TO SFBR
+-
+-at 0x00000200 : */ 0x74164000,0x00000000,
+-/*
+- JUMP schedule, IF 0x40
+-
+-at 0x00000202 : */ 0x800c0040,0x00000000,
+-/*
+-; Check connected bit.
+-; FIXME: this needs to change if we support target mode
+- MOVE ISTAT & 0x08 TO SFBR
+-
+-at 0x00000204 : */ 0x74210800,0x00000000,
+-/*
+- JUMP reselected, IF 0x08
+-
+-at 0x00000206 : */ 0x800c0008,0x000006b0,
+-/*
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-
+-
+-
+- INT int_debug_panic
+-
+-at 0x00000208 : */ 0x98080000,0x030b0000,
+-/*
+-
+-
+-
+-select_failed:
+-
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x0000020a : */ 0x7a1b1000,0x00000000,
+-/*
+-
+-
+-
+-
+-; Otherwise, mask the selected and reselected bits off SIST0
+-
+- ; Let's assume we don't get selected for now
+- MOVE SSTAT0 & 0x10 TO SFBR
+-
+-at 0x0000020c : */ 0x740d1000,0x00000000,
+-/*
+-
+-
+-
+-
+- JUMP reselected, IF 0x10
+-
+-at 0x0000020e : */ 0x800c0010,0x000006b0,
+-/*
+-; If SIGP is set, the user just gave us another command, and
+-; we should restart or return to the scheduler.
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+- MOVE CTEST2 & 0x40 TO SFBR
+-
+-at 0x00000210 : */ 0x74164000,0x00000000,
+-/*
+- JUMP select, IF 0x40
+-
+-at 0x00000212 : */ 0x800c0040,0x000001f8,
+-/*
+-; Check connected bit.
+-; FIXME: this needs to change if we support target mode
+-; FIXME: is this really necessary?
+- MOVE ISTAT & 0x08 TO SFBR
+-
+-at 0x00000214 : */ 0x74210800,0x00000000,
+-/*
+- JUMP reselected, IF 0x08
+-
+-at 0x00000216 : */ 0x800c0008,0x000006b0,
+-/*
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-
+-
+-
+- INT int_debug_panic
+-
+-at 0x00000218 : */ 0x98080000,0x030b0000,
+-/*
+-
+-
+-;
+-; test_1
+-; test_2
+-;
+-; PURPOSE : run some verification tests on the NCR. test_1
+-; copies test_src to test_dest and interrupts the host
+-; processor, testing for cache coherency and interrupt
+-; problems in the processes.
+-;
+-; test_2 runs a command with offsets relative to the
+-; DSA on entry, and is useful for miscellaneous experimentation.
+-;
+-
+-; Verify that interrupts are working correctly and that we don't
+-; have a cache invalidation problem.
+-
+-ABSOLUTE test_src = 0, test_dest = 0
+-ENTRY test_1
+-test_1:
+- MOVE MEMORY 4, test_src, test_dest
+-
+-at 0x0000021a : */ 0xc0000004,0x00000000,0x00000000,
+-/*
+- INT int_test_1
+-
+-at 0x0000021d : */ 0x98080000,0x04000000,
+-/*
+-
+-;
+-; Run arbitrary commands, with test code establishing a DSA
+-;
+-
+-ENTRY test_2
+-test_2:
+- CLEAR TARGET
+-
+-at 0x0000021f : */ 0x60000200,0x00000000,
+-/*
+-
+- ; Enable selection timer
+-
+-
+-
+- MOVE CTEST7 & 0xef TO CTEST7
+-
+-at 0x00000221 : */ 0x7c1bef00,0x00000000,
+-/*
+-
+-
+- SELECT ATN FROM 0, test_2_fail
+-
+-at 0x00000223 : */ 0x43000000,0x000008dc,
+-/*
+- JUMP test_2_msgout, WHEN MSG_OUT
+-
+-at 0x00000225 : */ 0x860b0000,0x0000089c,
+-/*
+-ENTRY test_2_msgout
+-test_2_msgout:
+-
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000227 : */ 0x7a1b1000,0x00000000,
+-/*
+-
+- MOVE FROM 8, WHEN MSG_OUT
+-
+-at 0x00000229 : */ 0x1e000000,0x00000008,
+-/*
+- MOVE FROM 16, WHEN CMD
+-
+-at 0x0000022b : */ 0x1a000000,0x00000010,
+-/*
+- MOVE FROM 24, WHEN DATA_IN
+-
+-at 0x0000022d : */ 0x19000000,0x00000018,
+-/*
+- MOVE FROM 32, WHEN STATUS
+-
+-at 0x0000022f : */ 0x1b000000,0x00000020,
+-/*
+- MOVE FROM 40, WHEN MSG_IN
+-
+-at 0x00000231 : */ 0x1f000000,0x00000028,
+-/*
+-
+-
+-
+- CLEAR ACK
+-
+-at 0x00000233 : */ 0x60000040,0x00000000,
+-/*
+- WAIT DISCONNECT
+-
+-at 0x00000235 : */ 0x48000000,0x00000000,
+-/*
+-test_2_fail:
+-
+- ; Disable selection timer
+- MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000237 : */ 0x7a1b1000,0x00000000,
+-/*
+-
+- INT int_test_2
+-
+-at 0x00000239 : */ 0x98080000,0x04010000,
+-/*
+-
+-ENTRY debug_break
+-debug_break:
+- INT int_debug_break
+-
+-at 0x0000023b : */ 0x98080000,0x03000000,
+-/*
+-
+-;
+-; initiator_abort
+-; target_abort
+-;
+-; PURPOSE : Abort the currently established nexus from with initiator
+-; or target mode.
+-;
+-;
+-
+-ENTRY target_abort
+-target_abort:
+- SET TARGET
+-
+-at 0x0000023d : */ 0x58000200,0x00000000,
+-/*
+- DISCONNECT
+-
+-at 0x0000023f : */ 0x48000000,0x00000000,
+-/*
+- CLEAR TARGET
+-
+-at 0x00000241 : */ 0x60000200,0x00000000,
+-/*
+- JUMP schedule
+-
+-at 0x00000243 : */ 0x80080000,0x00000000,
+-/*
+-
+-ENTRY initiator_abort
+-initiator_abort:
+- SET ATN
+-
+-at 0x00000245 : */ 0x58000008,0x00000000,
+-/*
+-;
+-; The SCSI-I specification says that targets may go into MSG out at
+-; their leisure upon receipt of the ATN single. On all versions of the
+-; specification, we can't change phases until REQ transitions true->false,
+-; so we need to sink/source one byte of data to allow the transition.
+-;
+-; For the sake of safety, we'll only source one byte of data in all
+-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an
+-; arbitrary number of bytes.
+- JUMP spew_cmd, WHEN CMD
+-
+-at 0x00000247 : */ 0x820b0000,0x0000094c,
+-/*
+- JUMP eat_msgin, WHEN MSG_IN
+-
+-at 0x00000249 : */ 0x870b0000,0x0000095c,
+-/*
+- JUMP eat_datain, WHEN DATA_IN
+-
+-at 0x0000024b : */ 0x810b0000,0x0000098c,
+-/*
+- JUMP eat_status, WHEN STATUS
+-
+-at 0x0000024d : */ 0x830b0000,0x00000974,
+-/*
+- JUMP spew_dataout, WHEN DATA_OUT
+-
+-at 0x0000024f : */ 0x800b0000,0x000009a4,
+-/*
+- JUMP sated
+-
+-at 0x00000251 : */ 0x80080000,0x000009ac,
+-/*
+-spew_cmd:
+- MOVE 1, NCR53c7xx_zero, WHEN CMD
+-
+-at 0x00000253 : */ 0x0a000001,0x00000000,
+-/*
+- JUMP sated
+-
+-at 0x00000255 : */ 0x80080000,0x000009ac,
+-/*
+-eat_msgin:
+- MOVE 1, NCR53c7xx_sink, WHEN MSG_IN
+-
+-at 0x00000257 : */ 0x0f000001,0x00000000,
+-/*
+- JUMP eat_msgin, WHEN MSG_IN
+-
+-at 0x00000259 : */ 0x870b0000,0x0000095c,
+-/*
+- JUMP sated
+-
+-at 0x0000025b : */ 0x80080000,0x000009ac,
+-/*
+-eat_status:
+- MOVE 1, NCR53c7xx_sink, WHEN STATUS
+-
+-at 0x0000025d : */ 0x0b000001,0x00000000,
+-/*
+- JUMP eat_status, WHEN STATUS
+-
+-at 0x0000025f : */ 0x830b0000,0x00000974,
+-/*
+- JUMP sated
+-
+-at 0x00000261 : */ 0x80080000,0x000009ac,
+-/*
+-eat_datain:
+- MOVE 1, NCR53c7xx_sink, WHEN DATA_IN
+-
+-at 0x00000263 : */ 0x09000001,0x00000000,
+-/*
+- JUMP eat_datain, WHEN DATA_IN
+-
+-at 0x00000265 : */ 0x810b0000,0x0000098c,
+-/*
+- JUMP sated
+-
+-at 0x00000267 : */ 0x80080000,0x000009ac,
+-/*
+-spew_dataout:
+- MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT
+-
+-at 0x00000269 : */ 0x08000001,0x00000000,
+-/*
+-sated:
+-
+-
+-
+- MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT
+-
+-at 0x0000026b : */ 0x0e000001,0x00000000,
+-/*
+- WAIT DISCONNECT
+-
+-at 0x0000026d : */ 0x48000000,0x00000000,
+-/*
+- INT int_norm_aborted
+-
+-at 0x0000026f : */ 0x98080000,0x02040000,
+-/*
+-
+-
+-
+-
+-; Little patched jump, used to overcome problems with TEMP getting
+-; corrupted on memory moves.
+-
+-jump_temp:
+- JUMP 0
+-
+-at 0x00000271 : */ 0x80080000,0x00000000,
+-};
+-
+-#define A_NCR53c7xx_msg_abort 0x00000000
+-static u32 A_NCR53c7xx_msg_abort_used[] __attribute((unused)) = {
+- 0x0000026c,
+-};
+-
+-#define A_NCR53c7xx_msg_reject 0x00000000
+-static u32 A_NCR53c7xx_msg_reject_used[] __attribute((unused)) = {
+- 0x00000186,
+-};
+-
+-#define A_NCR53c7xx_sink 0x00000000
+-static u32 A_NCR53c7xx_sink_used[] __attribute((unused)) = {
+- 0x00000258,
+- 0x0000025e,
+- 0x00000264,
+-};
+-
+-#define A_NCR53c7xx_zero 0x00000000
+-static u32 A_NCR53c7xx_zero_used[] __attribute((unused)) = {
+- 0x00000254,
+- 0x0000026a,
+-};
+-
+-#define A_NOP_insn 0x00000000
+-static u32 A_NOP_insn_used[] __attribute((unused)) = {
+- 0x00000017,
+-};
+-
+-#define A_addr_dsa 0x00000000
+-static u32 A_addr_dsa_used[] __attribute((unused)) = {
+- 0x0000000f,
+- 0x00000026,
+- 0x00000033,
+- 0x00000040,
+- 0x00000055,
+- 0x00000079,
+- 0x0000008e,
+- 0x000000bc,
+- 0x000000d2,
+- 0x00000130,
+- 0x000001a5,
+- 0x000001bb,
+- 0x000001e3,
+-};
+-
+-#define A_addr_reconnect_dsa_head 0x00000000
+-static u32 A_addr_reconnect_dsa_head_used[] __attribute((unused)) = {
+- 0x000001b7,
+-};
+-
+-#define A_addr_scratch 0x00000000
+-static u32 A_addr_scratch_used[] __attribute((unused)) = {
+- 0x00000002,
+- 0x00000004,
+- 0x00000008,
+- 0x00000020,
+- 0x00000022,
+- 0x00000049,
+- 0x00000060,
+- 0x0000006a,
+- 0x00000071,
+- 0x00000073,
+- 0x000000ab,
+- 0x000000b5,
+- 0x000000c1,
+- 0x000000cb,
+- 0x0000012c,
+- 0x00000142,
+- 0x00000157,
+- 0x000001b2,
+- 0x000001b4,
+- 0x000001df,
+- 0x000001f7,
+-};
+-
+-#define A_addr_temp 0x00000000
+-static u32 A_addr_temp_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_memory_to_memory 0x00000000
+-static u32 A_dmode_memory_to_memory_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_memory_to_ncr 0x00000000
+-static u32 A_dmode_memory_to_ncr_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_ncr_to_memory 0x00000000
+-static u32 A_dmode_ncr_to_memory_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_check_reselect 0x00000000
+-static u32 A_dsa_check_reselect_used[] __attribute((unused)) = {
+- 0x000001d0,
+-};
+-
+-#define A_dsa_cmdout 0x00000048
+-static u32 A_dsa_cmdout_used[] __attribute((unused)) = {
+- 0x0000009a,
+-};
+-
+-#define A_dsa_cmnd 0x00000038
+-static u32 A_dsa_cmnd_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_datain 0x00000054
+-static u32 A_dsa_datain_used[] __attribute((unused)) = {
+- 0x000000c2,
+-};
+-
+-#define A_dsa_dataout 0x00000050
+-static u32 A_dsa_dataout_used[] __attribute((unused)) = {
+- 0x000000ac,
+-};
+-
+-#define A_dsa_end 0x00000070
+-static u32 A_dsa_end_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_fields_start 0x00000000
+-static u32 A_dsa_fields_start_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_msgin 0x00000058
+-static u32 A_dsa_msgin_used[] __attribute((unused)) = {
+- 0x0000019c,
+-};
+-
+-#define A_dsa_msgout 0x00000040
+-static u32 A_dsa_msgout_used[] __attribute((unused)) = {
+- 0x00000089,
+-};
+-
+-#define A_dsa_msgout_other 0x00000068
+-static u32 A_dsa_msgout_other_used[] __attribute((unused)) = {
+- 0x00000194,
+-};
+-
+-#define A_dsa_next 0x00000030
+-static u32 A_dsa_next_used[] __attribute((unused)) = {
+- 0x00000061,
+-};
+-
+-#define A_dsa_restore_pointers 0x00000000
+-static u32 A_dsa_restore_pointers_used[] __attribute((unused)) = {
+- 0x00000146,
+-};
+-
+-#define A_dsa_save_data_pointer 0x00000000
+-static u32 A_dsa_save_data_pointer_used[] __attribute((unused)) = {
+- 0x00000131,
+-};
+-
+-#define A_dsa_select 0x0000003c
+-static u32 A_dsa_select_used[] __attribute((unused)) = {
+- 0x00000082,
+-};
+-
+-#define A_dsa_sscf_710 0x00000000
+-static u32 A_dsa_sscf_710_used[] __attribute((unused)) = {
+- 0x00000007,
+-};
+-
+-#define A_dsa_status 0x00000060
+-static u32 A_dsa_status_used[] __attribute((unused)) = {
+- 0x00000198,
+-};
+-
+-#define A_dsa_temp_addr_array_value 0x00000000
+-static u32 A_dsa_temp_addr_array_value_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_temp_addr_dsa_value 0x00000000
+-static u32 A_dsa_temp_addr_dsa_value_used[] __attribute((unused)) = {
+- 0x00000001,
+-};
+-
+-#define A_dsa_temp_addr_new_value 0x00000000
+-static u32 A_dsa_temp_addr_new_value_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_temp_addr_next 0x00000000
+-static u32 A_dsa_temp_addr_next_used[] __attribute((unused)) = {
+- 0x0000001c,
+- 0x0000004f,
+-};
+-
+-#define A_dsa_temp_addr_residual 0x00000000
+-static u32 A_dsa_temp_addr_residual_used[] __attribute((unused)) = {
+- 0x0000002d,
+- 0x0000003b,
+-};
+-
+-#define A_dsa_temp_addr_saved_pointer 0x00000000
+-static u32 A_dsa_temp_addr_saved_pointer_used[] __attribute((unused)) = {
+- 0x0000002b,
+- 0x00000037,
+-};
+-
+-#define A_dsa_temp_addr_saved_residual 0x00000000
+-static u32 A_dsa_temp_addr_saved_residual_used[] __attribute((unused)) = {
+- 0x0000002e,
+- 0x0000003a,
+-};
+-
+-#define A_dsa_temp_lun 0x00000000
+-static u32 A_dsa_temp_lun_used[] __attribute((unused)) = {
+- 0x0000004c,
+-};
+-
+-#define A_dsa_temp_next 0x00000000
+-static u32 A_dsa_temp_next_used[] __attribute((unused)) = {
+- 0x0000001f,
+-};
+-
+-#define A_dsa_temp_sync 0x00000000
+-static u32 A_dsa_temp_sync_used[] __attribute((unused)) = {
+- 0x00000057,
+-};
+-
+-#define A_dsa_temp_target 0x00000000
+-static u32 A_dsa_temp_target_used[] __attribute((unused)) = {
+- 0x00000045,
+-};
+-
+-#define A_emulfly 0x00000000
+-static u32 A_emulfly_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_debug_break 0x03000000
+-static u32 A_int_debug_break_used[] __attribute((unused)) = {
+- 0x0000023c,
+-};
+-
+-#define A_int_debug_panic 0x030b0000
+-static u32 A_int_debug_panic_used[] __attribute((unused)) = {
+- 0x00000209,
+- 0x00000219,
+-};
+-
+-#define A_int_err_check_condition 0x00030000
+-static u32 A_int_err_check_condition_used[] __attribute((unused)) = {
+- 0x000001a9,
+-};
+-
+-#define A_int_err_no_phase 0x00040000
+-static u32 A_int_err_no_phase_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_err_selected 0x00010000
+-static u32 A_int_err_selected_used[] __attribute((unused)) = {
+- 0x000001ff,
+-};
+-
+-#define A_int_err_unexpected_phase 0x00000000
+-static u32 A_int_err_unexpected_phase_used[] __attribute((unused)) = {
+- 0x00000092,
+- 0x00000098,
+- 0x000000a0,
+- 0x000000d6,
+- 0x000000da,
+- 0x000000dc,
+- 0x000000e4,
+- 0x000000e8,
+- 0x000000ea,
+- 0x000000f2,
+- 0x000000f6,
+- 0x000000f8,
+- 0x000000fa,
+- 0x00000160,
+-};
+-
+-#define A_int_err_unexpected_reselect 0x00020000
+-static u32 A_int_err_unexpected_reselect_used[] __attribute((unused)) = {
+- 0x000001cd,
+-};
+-
+-#define A_int_msg_1 0x01020000
+-static u32 A_int_msg_1_used[] __attribute((unused)) = {
+- 0x00000114,
+- 0x00000116,
+-};
+-
+-#define A_int_msg_sdtr 0x01010000
+-static u32 A_int_msg_sdtr_used[] __attribute((unused)) = {
+- 0x00000180,
+-};
+-
+-#define A_int_msg_wdtr 0x01000000
+-static u32 A_int_msg_wdtr_used[] __attribute((unused)) = {
+- 0x00000174,
+-};
+-
+-#define A_int_norm_aborted 0x02040000
+-static u32 A_int_norm_aborted_used[] __attribute((unused)) = {
+- 0x00000270,
+-};
+-
+-#define A_int_norm_command_complete 0x02020000
+-static u32 A_int_norm_command_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_disconnected 0x02030000
+-static u32 A_int_norm_disconnected_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_emulateintfly 0x02060000
+-static u32 A_int_norm_emulateintfly_used[] __attribute((unused)) = {
+- 0x000001a2,
+-};
+-
+-#define A_int_norm_reselect_complete 0x02010000
+-static u32 A_int_norm_reselect_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_reset 0x02050000
+-static u32 A_int_norm_reset_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_select_complete 0x02000000
+-static u32 A_int_norm_select_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_test_1 0x04000000
+-static u32 A_int_test_1_used[] __attribute((unused)) = {
+- 0x0000021e,
+-};
+-
+-#define A_int_test_2 0x04010000
+-static u32 A_int_test_2_used[] __attribute((unused)) = {
+- 0x0000023a,
+-};
+-
+-#define A_int_test_3 0x04020000
+-static u32 A_int_test_3_used[] __attribute((unused)) = {
+-};
+-
+-#define A_msg_buf 0x00000000
+-static u32 A_msg_buf_used[] __attribute((unused)) = {
+- 0x00000108,
+- 0x00000162,
+- 0x0000016c,
+- 0x00000172,
+- 0x00000178,
+- 0x0000017e,
+-};
+-
+-#define A_reconnect_dsa_head 0x00000000
+-static u32 A_reconnect_dsa_head_used[] __attribute((unused)) = {
+- 0x0000006d,
+- 0x00000074,
+- 0x000001b1,
+-};
+-
+-#define A_reselected_identify 0x00000000
+-static u32 A_reselected_identify_used[] __attribute((unused)) = {
+- 0x00000048,
+- 0x000001af,
+-};
+-
+-#define A_reselected_tag 0x00000000
+-static u32 A_reselected_tag_used[] __attribute((unused)) = {
+-};
+-
+-#define A_saved_dsa 0x00000000
+-static u32 A_saved_dsa_used[] __attribute((unused)) = {
+- 0x00000005,
+- 0x0000000e,
+- 0x00000023,
+- 0x00000025,
+- 0x00000032,
+- 0x0000003f,
+- 0x00000054,
+- 0x0000005f,
+- 0x00000070,
+- 0x00000078,
+- 0x0000008d,
+- 0x000000aa,
+- 0x000000bb,
+- 0x000000c0,
+- 0x000000d1,
+- 0x0000012f,
+- 0x000001a4,
+- 0x000001b5,
+- 0x000001ba,
+- 0x000001e2,
+-};
+-
+-#define A_schedule 0x00000000
+-static u32 A_schedule_used[] __attribute((unused)) = {
+- 0x0000007d,
+- 0x000001a7,
+- 0x00000203,
+- 0x00000244,
+-};
+-
+-#define A_test_dest 0x00000000
+-static u32 A_test_dest_used[] __attribute((unused)) = {
+- 0x0000021c,
+-};
+-
+-#define A_test_src 0x00000000
+-static u32 A_test_src_used[] __attribute((unused)) = {
+- 0x0000021b,
+-};
+-
+-#define Ent_accept_message 0x00000624
+-#define Ent_cmdout_cmdout 0x00000264
+-#define Ent_command_complete 0x0000065c
+-#define Ent_command_complete_msgin 0x0000066c
+-#define Ent_data_transfer 0x0000026c
+-#define Ent_datain_to_jump 0x00000334
+-#define Ent_debug_break 0x000008ec
+-#define Ent_dsa_code_begin 0x00000000
+-#define Ent_dsa_code_check_reselect 0x0000010c
+-#define Ent_dsa_code_fix_jump 0x00000058
+-#define Ent_dsa_code_restore_pointers 0x000000d8
+-#define Ent_dsa_code_save_data_pointer 0x000000a4
+-#define Ent_dsa_code_template 0x00000000
+-#define Ent_dsa_code_template_end 0x00000178
+-#define Ent_dsa_schedule 0x00000178
+-#define Ent_dsa_zero 0x00000178
+-#define Ent_end_data_transfer 0x000002a4
+-#define Ent_initiator_abort 0x00000914
+-#define Ent_msg_in 0x0000041c
+-#define Ent_msg_in_restart 0x000003fc
+-#define Ent_other_in 0x0000038c
+-#define Ent_other_out 0x00000354
+-#define Ent_other_transfer 0x000003c4
+-#define Ent_reject_message 0x00000604
+-#define Ent_reselected_check_next 0x000006f0
+-#define Ent_reselected_ok 0x00000798
+-#define Ent_respond_message 0x0000063c
+-#define Ent_select 0x000001f8
+-#define Ent_select_msgout 0x00000218
+-#define Ent_target_abort 0x000008f4
+-#define Ent_test_1 0x00000868
+-#define Ent_test_2 0x0000087c
+-#define Ent_test_2_msgout 0x0000089c
+-#define Ent_wait_reselect 0x000006a8
+-static u32 LABELPATCHES[] __attribute((unused)) = {
+- 0x00000011,
+- 0x0000001a,
+- 0x0000001d,
+- 0x00000028,
+- 0x0000002a,
+- 0x00000035,
+- 0x00000038,
+- 0x00000042,
+- 0x00000050,
+- 0x00000052,
+- 0x0000006b,
+- 0x00000083,
+- 0x00000085,
+- 0x00000090,
+- 0x00000094,
+- 0x00000096,
+- 0x0000009c,
+- 0x0000009e,
+- 0x000000a2,
+- 0x000000a4,
+- 0x000000a6,
+- 0x000000a8,
+- 0x000000b6,
+- 0x000000b9,
+- 0x000000cc,
+- 0x000000cf,
+- 0x000000d8,
+- 0x000000de,
+- 0x000000e0,
+- 0x000000e6,
+- 0x000000ec,
+- 0x000000ee,
+- 0x000000f4,
+- 0x000000fc,
+- 0x000000fe,
+- 0x0000010a,
+- 0x0000010c,
+- 0x0000010e,
+- 0x00000110,
+- 0x00000112,
+- 0x00000118,
+- 0x0000011a,
+- 0x0000012d,
+- 0x00000143,
+- 0x00000158,
+- 0x0000015c,
+- 0x00000164,
+- 0x00000166,
+- 0x00000168,
+- 0x0000016e,
+- 0x0000017a,
+- 0x000001ab,
+- 0x000001b8,
+- 0x000001bf,
+- 0x000001c3,
+- 0x000001c7,
+- 0x000001cb,
+- 0x000001e0,
+- 0x000001f8,
+- 0x00000207,
+- 0x0000020f,
+- 0x00000213,
+- 0x00000217,
+- 0x00000224,
+- 0x00000226,
+- 0x00000248,
+- 0x0000024a,
+- 0x0000024c,
+- 0x0000024e,
+- 0x00000250,
+- 0x00000252,
+- 0x00000256,
+- 0x0000025a,
+- 0x0000025c,
+- 0x00000260,
+- 0x00000262,
+- 0x00000266,
+- 0x00000268,
+-};
+-
+-static struct {
+- u32 offset;
+- void *address;
+-} EXTERNAL_PATCHES[] __attribute((unused)) = {
+-};
+-
+-static u32 INSTRUCTIONS __attribute((unused)) = 290;
+-static u32 PATCHES __attribute((unused)) = 78;
+-static u32 EXTERNAL_PATCHES_LEN __attribute((unused)) = 0;
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped
+--- linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped 1969-12-31 19:00:00.000000000 -0500
+@@ -1,102 +0,0 @@
+-#undef A_NCR53c7xx_msg_abort
+-#undef A_NCR53c7xx_msg_reject
+-#undef A_NCR53c7xx_sink
+-#undef A_NCR53c7xx_zero
+-#undef A_NOP_insn
+-#undef A_addr_dsa
+-#undef A_addr_reconnect_dsa_head
+-#undef A_addr_scratch
+-#undef A_addr_temp
+-#undef A_dmode_memory_to_memory
+-#undef A_dmode_memory_to_ncr
+-#undef A_dmode_ncr_to_memory
+-#undef A_dsa_check_reselect
+-#undef A_dsa_cmdout
+-#undef A_dsa_cmnd
+-#undef A_dsa_datain
+-#undef A_dsa_dataout
+-#undef A_dsa_end
+-#undef A_dsa_fields_start
+-#undef A_dsa_msgin
+-#undef A_dsa_msgout
+-#undef A_dsa_msgout_other
+-#undef A_dsa_next
+-#undef A_dsa_restore_pointers
+-#undef A_dsa_save_data_pointer
+-#undef A_dsa_select
+-#undef A_dsa_sscf_710
+-#undef A_dsa_status
+-#undef A_dsa_temp_addr_array_value
+-#undef A_dsa_temp_addr_dsa_value
+-#undef A_dsa_temp_addr_new_value
+-#undef A_dsa_temp_addr_next
+-#undef A_dsa_temp_addr_residual
+-#undef A_dsa_temp_addr_saved_pointer
+-#undef A_dsa_temp_addr_saved_residual
+-#undef A_dsa_temp_lun
+-#undef A_dsa_temp_next
+-#undef A_dsa_temp_sync
+-#undef A_dsa_temp_target
+-#undef A_emulfly
+-#undef A_int_debug_break
+-#undef A_int_debug_panic
+-#undef A_int_err_check_condition
+-#undef A_int_err_no_phase
+-#undef A_int_err_selected
+-#undef A_int_err_unexpected_phase
+-#undef A_int_err_unexpected_reselect
+-#undef A_int_msg_1
+-#undef A_int_msg_sdtr
+-#undef A_int_msg_wdtr
+-#undef A_int_norm_aborted
+-#undef A_int_norm_command_complete
+-#undef A_int_norm_disconnected
+-#undef A_int_norm_emulateintfly
+-#undef A_int_norm_reselect_complete
+-#undef A_int_norm_reset
+-#undef A_int_norm_select_complete
+-#undef A_int_test_1
+-#undef A_int_test_2
+-#undef A_int_test_3
+-#undef A_msg_buf
+-#undef A_reconnect_dsa_head
+-#undef A_reselected_identify
+-#undef A_reselected_tag
+-#undef A_saved_dsa
+-#undef A_schedule
+-#undef A_test_dest
+-#undef A_test_src
+-#undef Ent_accept_message
+-#undef Ent_cmdout_cmdout
+-#undef Ent_command_complete
+-#undef Ent_command_complete_msgin
+-#undef Ent_data_transfer
+-#undef Ent_datain_to_jump
+-#undef Ent_debug_break
+-#undef Ent_dsa_code_begin
+-#undef Ent_dsa_code_check_reselect
+-#undef Ent_dsa_code_fix_jump
+-#undef Ent_dsa_code_restore_pointers
+-#undef Ent_dsa_code_save_data_pointer
+-#undef Ent_dsa_code_template
+-#undef Ent_dsa_code_template_end
+-#undef Ent_dsa_schedule
+-#undef Ent_dsa_zero
+-#undef Ent_end_data_transfer
+-#undef Ent_initiator_abort
+-#undef Ent_msg_in
+-#undef Ent_msg_in_restart
+-#undef Ent_other_in
+-#undef Ent_other_out
+-#undef Ent_other_transfer
+-#undef Ent_reject_message
+-#undef Ent_reselected_check_next
+-#undef Ent_reselected_ok
+-#undef Ent_respond_message
+-#undef Ent_select
+-#undef Ent_select_msgout
+-#undef Ent_target_abort
+-#undef Ent_test_1
+-#undef Ent_test_2
+-#undef Ent_test_2_msgout
+-#undef Ent_wait_reselect
+diff -Nurb linux-2.6.22-570/drivers/scsi/BusLogic.c linux-2.6.22-591/drivers/scsi/BusLogic.c
+--- linux-2.6.22-570/drivers/scsi/BusLogic.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/BusLogic.c 2007-12-21 15:36:12.000000000 -0500
+@@ -304,16 +304,8 @@
+ static void BusLogic_DeallocateCCB(struct BusLogic_CCB *CCB)
+ {
+ struct BusLogic_HostAdapter *HostAdapter = CCB->HostAdapter;
+- struct scsi_cmnd *cmd = CCB->Command;
+
+- if (cmd->use_sg != 0) {
+- pci_unmap_sg(HostAdapter->PCI_Device,
+- (struct scatterlist *)cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- } else if (cmd->request_bufflen != 0) {
+- pci_unmap_single(HostAdapter->PCI_Device, CCB->DataPointer,
+- CCB->DataLength, cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(CCB->Command);
+ pci_unmap_single(HostAdapter->PCI_Device, CCB->SenseDataPointer,
+ CCB->SenseDataLength, PCI_DMA_FROMDEVICE);
+
+@@ -2648,7 +2640,8 @@
+ */
+ if (CCB->CDB[0] == INQUIRY && CCB->CDB[1] == 0 && CCB->HostAdapterStatus == BusLogic_CommandCompletedNormally) {
+ struct BusLogic_TargetFlags *TargetFlags = &HostAdapter->TargetFlags[CCB->TargetID];
+- struct SCSI_Inquiry *InquiryResult = (struct SCSI_Inquiry *) Command->request_buffer;
++ struct SCSI_Inquiry *InquiryResult =
++ (struct SCSI_Inquiry *) scsi_sglist(Command);
+ TargetFlags->TargetExists = true;
+ TargetFlags->TaggedQueuingSupported = InquiryResult->CmdQue;
+ TargetFlags->WideTransfersSupported = InquiryResult->WBus16;
+@@ -2819,9 +2812,8 @@
+ int CDB_Length = Command->cmd_len;
+ int TargetID = Command->device->id;
+ int LogicalUnit = Command->device->lun;
+- void *BufferPointer = Command->request_buffer;
+- int BufferLength = Command->request_bufflen;
+- int SegmentCount = Command->use_sg;
++ int BufferLength = scsi_bufflen(Command);
++ int Count;
+ struct BusLogic_CCB *CCB;
+ /*
+ SCSI REQUEST_SENSE commands will be executed automatically by the Host
+@@ -2851,36 +2843,35 @@
+ return 0;
+ }
+ }
++
+ /*
+ Initialize the fields in the BusLogic Command Control Block (CCB).
+ */
+- if (SegmentCount == 0 && BufferLength != 0) {
+- CCB->Opcode = BusLogic_InitiatorCCB;
+- CCB->DataLength = BufferLength;
+- CCB->DataPointer = pci_map_single(HostAdapter->PCI_Device,
+- BufferPointer, BufferLength,
+- Command->sc_data_direction);
+- } else if (SegmentCount != 0) {
+- struct scatterlist *ScatterList = (struct scatterlist *) BufferPointer;
+- int Segment, Count;
++ Count = scsi_dma_map(Command);
++ BUG_ON(Count < 0);
++ if (Count) {
++ struct scatterlist *sg;
++ int i;
+
+- Count = pci_map_sg(HostAdapter->PCI_Device, ScatterList, SegmentCount,
+- Command->sc_data_direction);
+ CCB->Opcode = BusLogic_InitiatorCCB_ScatterGather;
+ CCB->DataLength = Count * sizeof(struct BusLogic_ScatterGatherSegment);
+ if (BusLogic_MultiMasterHostAdapterP(HostAdapter))
+ CCB->DataPointer = (unsigned int) CCB->DMA_Handle + ((unsigned long) &CCB->ScatterGatherList - (unsigned long) CCB);
+ else
+ CCB->DataPointer = Virtual_to_32Bit_Virtual(CCB->ScatterGatherList);
+- for (Segment = 0; Segment < Count; Segment++) {
+- CCB->ScatterGatherList[Segment].SegmentByteCount = sg_dma_len(ScatterList + Segment);
+- CCB->ScatterGatherList[Segment].SegmentDataPointer = sg_dma_address(ScatterList + Segment);
++
++ scsi_for_each_sg(Command, sg, Count, i) {
++ CCB->ScatterGatherList[i].SegmentByteCount =
++ sg_dma_len(sg);
++ CCB->ScatterGatherList[i].SegmentDataPointer =
++ sg_dma_address(sg);
+ }
+- } else {
++ } else if (!Count) {
+ CCB->Opcode = BusLogic_InitiatorCCB;
+ CCB->DataLength = BufferLength;
+ CCB->DataPointer = 0;
+ }
++
+ switch (CDB[0]) {
+ case READ_6:
+ case READ_10:
+diff -Nurb linux-2.6.22-570/drivers/scsi/Kconfig linux-2.6.22-591/drivers/scsi/Kconfig
+--- linux-2.6.22-570/drivers/scsi/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -739,7 +739,7 @@
+
+ config SCSI_IBMMCA
+ tristate "IBMMCA SCSI support"
+- depends on MCA_LEGACY && SCSI
++ depends on MCA && SCSI
+ ---help---
+ This is support for the IBM SCSI adapter found in many of the PS/2
+ series computers. These machines have an MCA bus, so you need to
+@@ -1007,6 +1007,11 @@
+ To compile this driver as a module, choose M here: the
+ module will be called stex.
+
++config 53C700_BE_BUS
++ bool
++ depends on SCSI_A4000T || SCSI_ZORRO7XX || MVME16x_SCSI || BVME6000_SCSI
++ default y
++
+ config SCSI_SYM53C8XX_2
+ tristate "SYM53C8XX Version 2 SCSI support"
+ depends on PCI && SCSI
+@@ -1611,13 +1616,25 @@
+ If you have the Phase5 Fastlane Z3 SCSI controller, or plan to use
+ one in the near future, say Y to this question. Otherwise, say N.
+
+-config SCSI_AMIGA7XX
+- bool "Amiga NCR53c710 SCSI support (EXPERIMENTAL)"
+- depends on AMIGA && SCSI && EXPERIMENTAL && BROKEN
++config SCSI_A4000T
++ tristate "A4000T NCR53c710 SCSI support (EXPERIMENTAL)"
++ depends on AMIGA && SCSI && EXPERIMENTAL
++ select SCSI_SPI_ATTRS
+ help
+- Support for various NCR53c710-based SCSI controllers on the Amiga.
++ If you have an Amiga 4000T and have SCSI devices connected to the
++ built-in SCSI controller, say Y. Otherwise, say N.
++
++ To compile this driver as a module, choose M here: the
++ module will be called a4000t.
++
++config SCSI_ZORRO7XX
++ tristate "Zorro NCR53c710 SCSI support (EXPERIMENTAL)"
++ depends on ZORRO && SCSI && EXPERIMENTAL
++ select SCSI_SPI_ATTRS
++ help
++ Support for various NCR53c710-based SCSI controllers on Zorro
++ expansion boards for the Amiga.
+ This includes:
+- - the builtin SCSI controller on the Amiga 4000T,
+ - the Amiga 4091 Zorro III SCSI-2 controller,
+ - the MacroSystem Development's WarpEngine Amiga SCSI-2 controller
+ (info at
+@@ -1625,10 +1642,6 @@
+ - the SCSI controller on the Phase5 Blizzard PowerUP 603e+
+ accelerator card for the Amiga 1200,
+ - the SCSI controller on the GVP Turbo 040/060 accelerator.
+- Note that all of the above SCSI controllers, except for the builtin
+- SCSI controller on the Amiga 4000T, reside on the Zorro expansion
+- bus, so you also have to enable Zorro bus support if you want to use
+- them.
+
+ config OKTAGON_SCSI
+ tristate "BSC Oktagon SCSI support (EXPERIMENTAL)"
+@@ -1712,8 +1725,8 @@
+ single-board computer.
+
+ config MVME16x_SCSI
+- bool "NCR53C710 SCSI driver for MVME16x"
+- depends on MVME16x && SCSI && BROKEN
++ tristate "NCR53C710 SCSI driver for MVME16x"
++ depends on MVME16x && SCSI
+ select SCSI_SPI_ATTRS
+ help
+ The Motorola MVME162, 166, 167, 172 and 177 boards use the NCR53C710
+@@ -1721,22 +1734,14 @@
+ will want to say Y to this question.
+
+ config BVME6000_SCSI
+- bool "NCR53C710 SCSI driver for BVME6000"
+- depends on BVME6000 && SCSI && BROKEN
++ tristate "NCR53C710 SCSI driver for BVME6000"
++ depends on BVME6000 && SCSI
+ select SCSI_SPI_ATTRS
+ help
+ The BVME4000 and BVME6000 boards from BVM Ltd use the NCR53C710
+ SCSI controller chip. Almost everyone using one of these boards
+ will want to say Y to this question.
+
+-config SCSI_NCR53C7xx_FAST
+- bool "allow FAST-SCSI [10MHz]"
+- depends on SCSI_AMIGA7XX || MVME16x_SCSI || BVME6000_SCSI
+- help
+- This will enable 10MHz FAST-SCSI transfers with your host
+- adapter. Some systems have problems with that speed, so it's safest
+- to say N here.
+-
+ config SUN3_SCSI
+ tristate "Sun3 NCR5380 SCSI"
+ depends on SUN3 && SCSI
+diff -Nurb linux-2.6.22-570/drivers/scsi/Makefile linux-2.6.22-591/drivers/scsi/Makefile
+--- linux-2.6.22-570/drivers/scsi/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -37,7 +37,8 @@
+
+ obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o
+ obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o
+-obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o
++obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o
++obj-$(CONFIG_SCSI_ZORRO7XX) += 53c700.o zorro7xx.o
+ obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o
+ obj-$(CONFIG_A2091_SCSI) += a2091.o wd33c93.o
+ obj-$(CONFIG_GVP11_SCSI) += gvp11.o wd33c93.o
+@@ -53,8 +54,8 @@
+ obj-$(CONFIG_MAC_SCSI) += mac_scsi.o
+ obj-$(CONFIG_SCSI_MAC_ESP) += mac_esp.o NCR53C9x.o
+ obj-$(CONFIG_SUN3_SCSI) += sun3_scsi.o sun3_scsi_vme.o
+-obj-$(CONFIG_MVME16x_SCSI) += mvme16x.o 53c7xx.o
+-obj-$(CONFIG_BVME6000_SCSI) += bvme6000.o 53c7xx.o
++obj-$(CONFIG_MVME16x_SCSI) += 53c700.o mvme16x_scsi.o
++obj-$(CONFIG_BVME6000_SCSI) += 53c700.o bvme6000_scsi.o
+ obj-$(CONFIG_SCSI_SIM710) += 53c700.o sim710.o
+ obj-$(CONFIG_SCSI_ADVANSYS) += advansys.o
+ obj-$(CONFIG_SCSI_PSI240I) += psi240i.o
+@@ -168,10 +169,8 @@
+ oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o
+
+ # Files generated that shall be removed upon make clean
+-clean-files := 53c7xx_d.h 53c700_d.h \
+- 53c7xx_u.h 53c700_u.h
++clean-files := 53c700_d.h 53c700_u.h
+
+-$(obj)/53c7xx.o: $(obj)/53c7xx_d.h $(obj)/53c7xx_u.h
+ $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
+
+ # If you want to play with the firmware, uncomment
+@@ -179,11 +178,6 @@
+
+ ifdef GENERATE_FIRMWARE
+
+-$(obj)/53c7xx_d.h: $(src)/53c7xx.scr $(src)/script_asm.pl
+- $(CPP) -traditional -DCHIP=710 - < $< | grep -v '^#' | $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h)
+-
+-$(obj)/53c7xx_u.h: $(obj)/53c7xx_d.h
+-
+ $(obj)/53c700_d.h: $(src)/53c700.scr $(src)/script_asm.pl
+ $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) < $<
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.c linux-2.6.22-591/drivers/scsi/NCR5380.c
+--- linux-2.6.22-570/drivers/scsi/NCR5380.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/NCR5380.c 2007-12-21 15:36:12.000000000 -0500
+@@ -347,7 +347,7 @@
+ if((r & bit) == val)
+ return 0;
+ if(!in_interrupt())
+- yield();
++ cond_resched();
+ else
+ cpu_relax();
+ }
+@@ -357,7 +357,7 @@
+ static struct {
+ unsigned char value;
+ const char *name;
+-} phases[] = {
++} phases[] __maybe_unused = {
+ {PHASE_DATAOUT, "DATAOUT"},
+ {PHASE_DATAIN, "DATAIN"},
+ {PHASE_CMDOUT, "CMDOUT"},
+@@ -575,7 +575,8 @@
+ * Locks: none, irqs must be enabled on entry
+ */
+
+-static int __init NCR5380_probe_irq(struct Scsi_Host *instance, int possible)
++static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
++ int possible)
+ {
+ NCR5380_local_declare();
+ struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+@@ -629,7 +630,8 @@
+ * Locks: none
+ */
+
+-static void __init NCR5380_print_options(struct Scsi_Host *instance)
++static void __init __maybe_unused
++NCR5380_print_options(struct Scsi_Host *instance)
+ {
+ printk(" generic options"
+ #ifdef AUTOPROBE_IRQ
+@@ -703,8 +705,8 @@
+ static
+ char *lprint_opcode(int opcode, char *pos, char *buffer, int length);
+
+-static
+-int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, off_t offset, int length, int inout)
++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance,
++ char *buffer, char **start, off_t offset, int length, int inout)
+ {
+ char *pos = buffer;
+ struct NCR5380_hostdata *hostdata;
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.h linux-2.6.22-591/drivers/scsi/NCR5380.h
+--- linux-2.6.22-570/drivers/scsi/NCR5380.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/NCR5380.h 2007-12-21 15:36:12.000000000 -0500
+@@ -299,7 +299,7 @@
+ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
+ #endif
+ static void NCR5380_main(struct work_struct *work);
+-static void NCR5380_print_options(struct Scsi_Host *instance);
++static void __maybe_unused NCR5380_print_options(struct Scsi_Host *instance);
+ #ifdef NDEBUG
+ static void NCR5380_print_phase(struct Scsi_Host *instance);
+ static void NCR5380_print(struct Scsi_Host *instance);
+@@ -307,8 +307,8 @@
+ static int NCR5380_abort(Scsi_Cmnd * cmd);
+ static int NCR5380_bus_reset(Scsi_Cmnd * cmd);
+ static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *));
+-static int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start,
+-off_t offset, int length, int inout);
++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance,
++ char *buffer, char **start, off_t offset, int length, int inout);
+
+ static void NCR5380_reselect(struct Scsi_Host *instance);
+ static int NCR5380_select(struct Scsi_Host *instance, Scsi_Cmnd * cmd, int tag);
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR53c406a.c linux-2.6.22-591/drivers/scsi/NCR53c406a.c
+--- linux-2.6.22-570/drivers/scsi/NCR53c406a.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/NCR53c406a.c 2007-12-21 15:36:12.000000000 -0500
+@@ -698,7 +698,7 @@
+ int i;
+
+ VDEB(printk("NCR53c406a_queue called\n"));
+- DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, SCpnt->request_bufflen));
++ DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, scsi_bufflen(SCpnt)));
+
+ #if 0
+ VDEB(for (i = 0; i < SCpnt->cmd_len; i++)
+@@ -785,8 +785,8 @@
+ unsigned char status, int_reg;
+ #if USE_PIO
+ unsigned char pio_status;
+- struct scatterlist *sglist;
+- unsigned int sgcount;
++ struct scatterlist *sg;
++ int i;
+ #endif
+
+ VDEB(printk("NCR53c406a_intr called\n"));
+@@ -866,21 +866,17 @@
+ current_SC->SCp.phase = data_out;
+ VDEB(printk("NCR53c406a: Data-Out phase\n"));
+ outb(FLUSH_FIFO, CMD_REG);
+- LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */
++ LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */
+ #if USE_DMA /* No s/g support for DMA */
+- NCR53c406a_dma_write(current_SC->request_buffer, current_SC->request_bufflen);
++ NCR53c406a_dma_write(scsi_sglist(current_SC),
++ scsdi_bufflen(current_SC));
++
+ #endif /* USE_DMA */
+ outb(TRANSFER_INFO | DMA_OP, CMD_REG);
+ #if USE_PIO
+- if (!current_SC->use_sg) /* Don't use scatter-gather */
+- NCR53c406a_pio_write(current_SC->request_buffer, current_SC->request_bufflen);
+- else { /* use scatter-gather */
+- sgcount = current_SC->use_sg;
+- sglist = current_SC->request_buffer;
+- while (sgcount--) {
+- NCR53c406a_pio_write(page_address(sglist->page) + sglist->offset, sglist->length);
+- sglist++;
+- }
++ scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
++ NCR53c406a_pio_write(page_address(sg->page) + sg->offset,
++ sg->length);
+ }
+ REG0;
+ #endif /* USE_PIO */
+@@ -893,21 +889,16 @@
+ current_SC->SCp.phase = data_in;
+ VDEB(printk("NCR53c406a: Data-In phase\n"));
+ outb(FLUSH_FIFO, CMD_REG);
+- LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */
++ LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */
+ #if USE_DMA /* No s/g support for DMA */
+- NCR53c406a_dma_read(current_SC->request_buffer, current_SC->request_bufflen);
++ NCR53c406a_dma_read(scsi_sglist(current_SC),
++ scsdi_bufflen(current_SC));
+ #endif /* USE_DMA */
+ outb(TRANSFER_INFO | DMA_OP, CMD_REG);
+ #if USE_PIO
+- if (!current_SC->use_sg) /* Don't use scatter-gather */
+- NCR53c406a_pio_read(current_SC->request_buffer, current_SC->request_bufflen);
+- else { /* Use scatter-gather */
+- sgcount = current_SC->use_sg;
+- sglist = current_SC->request_buffer;
+- while (sgcount--) {
+- NCR53c406a_pio_read(page_address(sglist->page) + sglist->offset, sglist->length);
+- sglist++;
+- }
++ scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
++ NCR53c406a_pio_read(page_address(sg->page) + sg->offset,
++ sg->length);
+ }
+ REG0;
+ #endif /* USE_PIO */
+diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.c linux-2.6.22-591/drivers/scsi/a100u2w.c
+--- linux-2.6.22-570/drivers/scsi/a100u2w.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/a100u2w.c 2007-12-21 15:36:12.000000000 -0500
+@@ -19,27 +19,6 @@
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions, and the following disclaimer,
+- * without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- * derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -75,6 +54,8 @@
+ * 9/28/04 Christoph Hellwig <hch@lst.de>
+ * - merge the two source files
+ * - remove internal queueing code
++ * 14/06/07 Alan Cox <alan@redhat.com>
++ * - Grand cleanup and Linuxisation
+ */
+
+ #include <linux/module.h>
+@@ -102,14 +83,12 @@
+ #include "a100u2w.h"
+
+
+-#define JIFFIES_TO_MS(t) ((t) * 1000 / HZ)
+-#define MS_TO_JIFFIES(j) ((j * HZ) / 1000)
++static struct orc_scb *__orc_alloc_scb(struct orc_host * host);
++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb);
+
+-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp);
+-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb);
++static struct orc_nvram nvram, *nvramp = &nvram;
+
+-static NVRAM nvram, *nvramp = &nvram;
+-static UCHAR dftNvRam[64] =
++static u8 default_nvram[64] =
+ {
+ /*----------header -------------*/
+ 0x01, /* 0x00: Sub System Vendor ID 0 */
+@@ -158,823 +137,882 @@
+ };
+
+
+-/***************************************************************************/
+-static void waitForPause(unsigned amount)
+-{
+- ULONG the_time = jiffies + MS_TO_JIFFIES(amount);
+- while (time_before_eq(jiffies, the_time))
+- cpu_relax();
+-}
+-
+-/***************************************************************************/
+-static UCHAR waitChipReady(ORC_HCS * hcsp)
++static u8 wait_chip_ready(struct orc_host * host)
+ {
+ int i;
+
+ for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */
+- if (ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */
++ if (inb(host->base + ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */
+ return 1;
+- waitForPause(100); /* wait 100ms before try again */
++ mdelay(100);
+ }
+ return 0;
+ }
+
+-/***************************************************************************/
+-static UCHAR waitFWReady(ORC_HCS * hcsp)
++static u8 wait_firmware_ready(struct orc_host * host)
+ {
+ int i;
+
+ for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */
+- if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) /* Wait READY set */
++ if (inb(host->base + ORC_HSTUS) & RREADY) /* Wait READY set */
+ return 1;
+- waitForPause(100); /* wait 100ms before try again */
++ mdelay(100); /* wait 100ms before try again */
+ }
+ return 0;
+ }
+
+ /***************************************************************************/
+-static UCHAR waitSCSIRSTdone(ORC_HCS * hcsp)
++static u8 wait_scsi_reset_done(struct orc_host * host)
+ {
+ int i;
+
+ for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */
+- if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */
++ if (!(inb(host->base + ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */
+ return 1;
+- waitForPause(100); /* wait 100ms before try again */
++ mdelay(100); /* wait 100ms before try again */
+ }
+ return 0;
+ }
+
+ /***************************************************************************/
+-static UCHAR waitHDOoff(ORC_HCS * hcsp)
++static u8 wait_HDO_off(struct orc_host * host)
+ {
+ int i;
+
+ for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */
+- if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HDO)) /* Wait HDO off */
++ if (!(inb(host->base + ORC_HCTRL) & HDO)) /* Wait HDO off */
+ return 1;
+- waitForPause(100); /* wait 100ms before try again */
++ mdelay(100); /* wait 100ms before try again */
+ }
+ return 0;
+ }
+
+ /***************************************************************************/
+-static UCHAR waitHDIset(ORC_HCS * hcsp, UCHAR * pData)
++static u8 wait_hdi_set(struct orc_host * host, u8 * data)
+ {
+ int i;
+
+ for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */
+- if ((*pData = ORC_RD(hcsp->HCS_Base, ORC_HSTUS)) & HDI)
++ if ((*data = inb(host->base + ORC_HSTUS)) & HDI)
+ return 1; /* Wait HDI set */
+- waitForPause(100); /* wait 100ms before try again */
++ mdelay(100); /* wait 100ms before try again */
+ }
+ return 0;
+ }
+
+ /***************************************************************************/
+-static unsigned short get_FW_version(ORC_HCS * hcsp)
++static unsigned short orc_read_fwrev(struct orc_host * host)
+ {
+- UCHAR bData;
+- union {
+- unsigned short sVersion;
+- unsigned char cVersion[2];
+- } Version;
++ u16 version;
++ u8 data;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_VERSION);
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(ORC_CMD_VERSION, host->base + ORC_HDATA);
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */
++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */
+ return 0;
+- Version.cVersion[0] = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */
++ version = inb(host->base + ORC_HDATA);
++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */
+
+- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */
++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */
+ return 0;
+- Version.cVersion[1] = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */
++ version |= inb(host->base + ORC_HDATA) << 8;
++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */
+
+- return (Version.sVersion);
++ return version;
+ }
+
+ /***************************************************************************/
+-static UCHAR set_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char value)
++static u8 orc_nv_write(struct orc_host * host, unsigned char address, unsigned char value)
+ {
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_SET_NVM); /* Write command */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(ORC_CMD_SET_NVM, host->base + ORC_HDATA); /* Write command */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(address, host->base + ORC_HDATA); /* Write address */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, value); /* Write value */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(value, host->base + ORC_HDATA); /* Write value */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+ return 1;
+ }
+
+ /***************************************************************************/
+-static UCHAR get_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char *pDataIn)
++static u8 orc_nv_read(struct orc_host * host, u8 address, u8 *ptr)
+ {
+- unsigned char bData;
++ unsigned char data;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_GET_NVM); /* Write command */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(ORC_CMD_GET_NVM, host->base + ORC_HDATA); /* Write command */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(address, host->base + ORC_HDATA); /* Write address */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */
++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */
+ return 0;
+- *pDataIn = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */
++ *ptr = inb(host->base + ORC_HDATA);
++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */
+
+ return 1;
++
+ }
+
+-/***************************************************************************/
+-static void orc_exec_scb(ORC_HCS * hcsp, ORC_SCB * scbp)
++/**
++ * orc_exec_sb - Queue an SCB with the HA
++ * @host: host adapter the SCB belongs to
++ * @scb: SCB to queue for execution
++ */
++
++static void orc_exec_scb(struct orc_host * host, struct orc_scb * scb)
+ {
+- scbp->SCB_Status = ORCSCB_POST;
+- ORC_WR(hcsp->HCS_Base + ORC_PQUEUE, scbp->SCB_ScbIdx);
+- return;
++ scb->status = ORCSCB_POST;
++ outb(scb->scbidx, host->base + ORC_PQUEUE);
+ }
+
+
+-/***********************************************************************
+- Read SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-static int se2_rd_all(ORC_HCS * hcsp)
++/**
++ * se2_rd_all - read SCSI parameters from EEPROM
++ * @host: Host whose EEPROM is being loaded
++ *
++ * Read SCSI H/A configuration parameters from serial EEPROM
++ */
++
++static int se2_rd_all(struct orc_host * host)
+ {
+ int i;
+- UCHAR *np, chksum = 0;
++ u8 *np, chksum = 0;
+
+- np = (UCHAR *) nvramp;
++ np = (u8 *) nvramp;
+ for (i = 0; i < 64; i++, np++) { /* <01> */
+- if (get_NVRAM(hcsp, (unsigned char) i, np) == 0)
++ if (orc_nv_read(host, (u8) i, np) == 0)
+ return -1;
+-// *np++ = get_NVRAM(hcsp, (unsigned char ) i);
+ }
+
+-/*------ Is ckecksum ok ? ------*/
+- np = (UCHAR *) nvramp;
++ /*------ Is ckecksum ok ? ------*/
++ np = (u8 *) nvramp;
+ for (i = 0; i < 63; i++)
+ chksum += *np++;
+
+- if (nvramp->CheckSum != (UCHAR) chksum)
++ if (nvramp->CheckSum != (u8) chksum)
+ return -1;
+ return 1;
+ }
+
+-/************************************************************************
+- Update SCSI H/A configuration parameters from serial EEPROM
+-*************************************************************************/
+-static void se2_update_all(ORC_HCS * hcsp)
++/**
++ * se2_update_all - update the EEPROM
++ * @host: Host whose EEPROM is being updated
++ *
++ * Update changed bytes in the EEPROM image.
++ */
++
++static void se2_update_all(struct orc_host * host)
+ { /* setup default pattern */
+ int i;
+- UCHAR *np, *np1, chksum = 0;
++ u8 *np, *np1, chksum = 0;
+
+ /* Calculate checksum first */
+- np = (UCHAR *) dftNvRam;
++ np = (u8 *) default_nvram;
+ for (i = 0; i < 63; i++)
+ chksum += *np++;
+ *np = chksum;
+
+- np = (UCHAR *) dftNvRam;
+- np1 = (UCHAR *) nvramp;
++ np = (u8 *) default_nvram;
++ np1 = (u8 *) nvramp;
+ for (i = 0; i < 64; i++, np++, np1++) {
+- if (*np != *np1) {
+- set_NVRAM(hcsp, (unsigned char) i, *np);
+- }
++ if (*np != *np1)
++ orc_nv_write(host, (u8) i, *np);
+ }
+- return;
+ }
+
+-/*************************************************************************
+- Function name : read_eeprom
+-**************************************************************************/
+-static void read_eeprom(ORC_HCS * hcsp)
+-{
+- if (se2_rd_all(hcsp) != 1) {
+- se2_update_all(hcsp); /* setup default pattern */
+- se2_rd_all(hcsp); /* load again */
++/**
++ * read_eeprom - load EEPROM
++ * @host: Host EEPROM to read
++ *
++ * Read the EEPROM for a given host. If it is invalid or fails
++ * the restore the defaults and use them.
++ */
++
++static void read_eeprom(struct orc_host * host)
++{
++ if (se2_rd_all(host) != 1) {
++ se2_update_all(host); /* setup default pattern */
++ se2_rd_all(host); /* load again */
+ }
+ }
+
+
+-/***************************************************************************/
+-static UCHAR load_FW(ORC_HCS * hcsp)
++/**
++ * orc_load_firmware - initialise firmware
++ * @host: Host to set up
++ *
++ * Load the firmware from the EEPROM into controller SRAM. This
++ * is basically a 4K block copy and then a 4K block read to check
++ * correctness. The rest is convulted by the indirect interfaces
++ * in the hardware
++ */
++
++static u8 orc_load_firmware(struct orc_host * host)
+ {
+- U32 dData;
+- USHORT wBIOSAddress;
+- USHORT i;
+- UCHAR *pData, bData;
+-
+-
+- bData = ORC_RD(hcsp->HCS_Base, ORC_GCFG);
+- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData | EEPRG); /* Enable EEPROM programming */
+- ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, 0x00);
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x00);
+- if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0x55) {
+- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */
+- return 0;
+- }
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x01);
+- if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0xAA) {
+- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */
+- return 0;
+- }
+- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Enable SRAM programming */
+- pData = (UCHAR *) & dData;
+- dData = 0; /* Initial FW address to 0 */
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x10);
+- *pData = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x11);
+- *(pData + 1) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x12);
+- *(pData + 2) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */
+- ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, *(pData + 2));
+- ORC_WRLONG(hcsp->HCS_Base + ORC_FWBASEADR, dData); /* Write FW address */
++ u32 data32;
++ u16 bios_addr;
++ u16 i;
++ u8 *data32_ptr, data;
++
++
++ /* Set up the EEPROM for access */
+
+- wBIOSAddress = (USHORT) dData; /* FW code locate at BIOS address + ? */
+- for (i = 0, pData = (UCHAR *) & dData; /* Download the code */
++ data = inb(host->base + ORC_GCFG);
++ outb(data | EEPRG, host->base + ORC_GCFG); /* Enable EEPROM programming */
++ outb(0x00, host->base + ORC_EBIOSADR2);
++ outw(0x0000, host->base + ORC_EBIOSADR0);
++ if (inb(host->base + ORC_EBIOSDATA) != 0x55) {
++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */
++ return 0;
++ }
++ outw(0x0001, host->base + ORC_EBIOSADR0);
++ if (inb(host->base + ORC_EBIOSDATA) != 0xAA) {
++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */
++ return 0;
++ }
++
++ outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Enable SRAM programming */
++ data32_ptr = (u8 *) & data32;
++ data32 = 0; /* Initial FW address to 0 */
++ outw(0x0010, host->base + ORC_EBIOSADR0);
++ *data32_ptr = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */
++ outw(0x0011, host->base + ORC_EBIOSADR0);
++ *(data32_ptr + 1) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */
++ outw(0x0012, host->base + ORC_EBIOSADR0);
++ *(data32_ptr + 2) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */
++ outw(*(data32_ptr + 2), host->base + ORC_EBIOSADR2);
++ outl(data32, host->base + ORC_FWBASEADR); /* Write FW address */
++
++ /* Copy the code from the BIOS to the SRAM */
++
++ bios_addr = (u16) data32; /* FW code locate at BIOS address + ? */
++ for (i = 0, data32_ptr = (u8 *) & data32; /* Download the code */
+ i < 0x1000; /* Firmware code size = 4K */
+- i++, wBIOSAddress++) {
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress);
+- *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */
++ i++, bios_addr++) {
++ outw(bios_addr, host->base + ORC_EBIOSADR0);
++ *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */
+ if ((i % 4) == 3) {
+- ORC_WRLONG(hcsp->HCS_Base + ORC_RISCRAM, dData); /* Write every 4 bytes */
+- pData = (UCHAR *) & dData;
++ outl(data32, host->base + ORC_RISCRAM); /* Write every 4 bytes */
++ data32_ptr = (u8 *) & data32;
+ }
+ }
+
+- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Reset program count 0 */
+- wBIOSAddress -= 0x1000; /* Reset the BIOS adddress */
+- for (i = 0, pData = (UCHAR *) & dData; /* Check the code */
++ /* Go back and check they match */
++
++ outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Reset program count 0 */
++ bios_addr -= 0x1000; /* Reset the BIOS adddress */
++ for (i = 0, data32_ptr = (u8 *) & data32; /* Check the code */
+ i < 0x1000; /* Firmware code size = 4K */
+- i++, wBIOSAddress++) {
+- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress);
+- *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */
++ i++, bios_addr++) {
++ outw(bios_addr, host->base + ORC_EBIOSADR0);
++ *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */
+ if ((i % 4) == 3) {
+- if (ORC_RDLONG(hcsp->HCS_Base, ORC_RISCRAM) != dData) {
+- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */
+- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /*Disable EEPROM programming */
++ if (inl(host->base + ORC_RISCRAM) != data32) {
++ outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */
++ outb(data, host->base + ORC_GCFG); /*Disable EEPROM programming */
+ return 0;
+ }
+- pData = (UCHAR *) & dData;
++ data32_ptr = (u8 *) & data32;
+ }
+ }
+- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */
+- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */
++
++ /* Success */
++ outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */
++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */
+ return 1;
+ }
+
+ /***************************************************************************/
+-static void setup_SCBs(ORC_HCS * hcsp)
++static void setup_SCBs(struct orc_host * host)
+ {
+- ORC_SCB *pVirScb;
++ struct orc_scb *scb;
+ int i;
+- ESCB *pVirEscb;
+- dma_addr_t pPhysEscb;
++ struct orc_extended_scb *escb;
++ dma_addr_t escb_phys;
+
+- /* Setup SCB HCS_Base and SCB Size registers */
+- ORC_WR(hcsp->HCS_Base + ORC_SCBSIZE, ORC_MAXQUEUE); /* Total number of SCBs */
+- /* SCB HCS_Base address 0 */
+- ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE0, hcsp->HCS_physScbArray);
+- /* SCB HCS_Base address 1 */
+- ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE1, hcsp->HCS_physScbArray);
++ /* Setup SCB base and SCB Size registers */
++ outb(ORC_MAXQUEUE, host->base + ORC_SCBSIZE); /* Total number of SCBs */
++ /* SCB base address 0 */
++ outl(host->scb_phys, host->base + ORC_SCBBASE0);
++ /* SCB base address 1 */
++ outl(host->scb_phys, host->base + ORC_SCBBASE1);
+
+ /* setup scatter list address with one buffer */
+- pVirScb = hcsp->HCS_virScbArray;
+- pVirEscb = hcsp->HCS_virEscbArray;
++ scb = host->scb_virt;
++ escb = host->escb_virt;
+
+ for (i = 0; i < ORC_MAXQUEUE; i++) {
+- pPhysEscb = (hcsp->HCS_physEscbArray + (sizeof(ESCB) * i));
+- pVirScb->SCB_SGPAddr = (U32) pPhysEscb;
+- pVirScb->SCB_SensePAddr = (U32) pPhysEscb;
+- pVirScb->SCB_EScb = pVirEscb;
+- pVirScb->SCB_ScbIdx = i;
+- pVirScb++;
+- pVirEscb++;
++ escb_phys = (host->escb_phys + (sizeof(struct orc_extended_scb) * i));
++ scb->sg_addr = (u32) escb_phys;
++ scb->sense_addr = (u32) escb_phys;
++ scb->escb = escb;
++ scb->scbidx = i;
++ scb++;
++ escb++;
+ }
+-
+- return;
+ }
+
+-/***************************************************************************/
+-static void initAFlag(ORC_HCS * hcsp)
++/**
++ * init_alloc_map - initialise allocation map
++ * @host: host map to configure
++ *
++ * Initialise the allocation maps for this device. If the device
++ * is not quiescent the caller must hold the allocation lock
++ */
++
++static void init_alloc_map(struct orc_host * host)
+ {
+- UCHAR i, j;
++ u8 i, j;
+
+ for (i = 0; i < MAX_CHANNELS; i++) {
+ for (j = 0; j < 8; j++) {
+- hcsp->BitAllocFlag[i][j] = 0xffffffff;
++ host->allocation_map[i][j] = 0xffffffff;
+ }
+ }
+ }
+
+-/***************************************************************************/
+-static int init_orchid(ORC_HCS * hcsp)
++/**
++ * init_orchid - initialise the host adapter
++ * @host:host adapter to initialise
++ *
++ * Initialise the controller and if neccessary load the firmware.
++ *
++ * Returns -1 if the initialisation fails.
++ */
++
++static int init_orchid(struct orc_host * host)
+ {
+- UBYTE *readBytep;
+- USHORT revision;
+- UCHAR i;
+-
+- initAFlag(hcsp);
+- ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFF); /* Disable all interrupt */
+- if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) { /* Orchid is ready */
+- revision = get_FW_version(hcsp);
++ u8 *ptr;
++ u16 revision;
++ u8 i;
++
++ init_alloc_map(host);
++ outb(0xFF, host->base + ORC_GIMSK); /* Disable all interrupts */
++
++ if (inb(host->base + ORC_HSTUS) & RREADY) { /* Orchid is ready */
++ revision = orc_read_fwrev(host);
+ if (revision == 0xFFFF) {
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */
+- if (waitChipReady(hcsp) == 0)
+- return (-1);
+- load_FW(hcsp); /* Download FW */
+- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, 0); /* clear HOSTSTOP */
+- if (waitFWReady(hcsp) == 0)
+- return (-1);
++ outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */
++ if (wait_chip_ready(host) == 0)
++ return -1;
++ orc_load_firmware(host); /* Download FW */
++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */
++ outb(0x00, host->base + ORC_HCTRL); /* clear HOSTSTOP */
++ if (wait_firmware_ready(host) == 0)
++ return -1;
+ /* Wait for firmware ready */
+ } else {
+- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */
++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */
+ }
+ } else { /* Orchid is not Ready */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */
+- if (waitChipReady(hcsp) == 0)
+- return (-1);
+- load_FW(hcsp); /* Download FW */
+- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); /* Do Hardware Reset & */
++ outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */
++ if (wait_chip_ready(host) == 0)
++ return -1;
++ orc_load_firmware(host); /* Download FW */
++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */
++ outb(HDO, host->base + ORC_HCTRL); /* Do Hardware Reset & */
+
+ /* clear HOSTSTOP */
+- if (waitFWReady(hcsp) == 0) /* Wait for firmware ready */
+- return (-1);
++ if (wait_firmware_ready(host) == 0) /* Wait for firmware ready */
++ return -1;
+ }
+
+-/*------------- get serial EEProm settting -------*/
++ /* Load an EEProm copy into RAM */
++ /* Assumes single threaded at this point */
++ read_eeprom(host);
+
+- read_eeprom(hcsp);
+-
+- if (nvramp->Revision != 1)
+- return (-1);
+-
+- hcsp->HCS_SCSI_ID = nvramp->SCSI0Id;
+- hcsp->HCS_BIOS = nvramp->BIOSConfig1;
+- hcsp->HCS_MaxTar = MAX_TARGETS;
+- readBytep = (UCHAR *) & (nvramp->Target00Config);
+- for (i = 0; i < 16; readBytep++, i++) {
+- hcsp->TargetFlag[i] = *readBytep;
+- hcsp->MaximumTags[i] = ORC_MAXTAGS;
+- } /* for */
++ if (nvramp->revision != 1)
++ return -1;
+
+- if (nvramp->SCSI0Config & NCC_BUSRESET) { /* Reset SCSI bus */
+- hcsp->HCS_Flags |= HCF_SCSI_RESET;
++ host->scsi_id = nvramp->scsi_id;
++ host->BIOScfg = nvramp->BIOSConfig1;
++ host->max_targets = MAX_TARGETS;
++ ptr = (u8 *) & (nvramp->Target00Config);
++ for (i = 0; i < 16; ptr++, i++) {
++ host->target_flag[i] = *ptr;
++ host->max_tags[i] = ORC_MAXTAGS;
+ }
+- ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFB); /* enable RP FIFO interrupt */
+- return (0);
++
++ if (nvramp->SCSI0Config & NCC_BUSRESET)
++ host->flags |= HCF_SCSI_RESET;
++ outb(0xFB, host->base + ORC_GIMSK); /* enable RP FIFO interrupt */
++ return 0;
+ }
+
+-/*****************************************************************************
+- Function name : orc_reset_scsi_bus
+- Description : Reset registers, reset a hanging bus and
+- kill active and disconnected commands for target w/o soft reset
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_reset_scsi_bus(ORC_HCS * pHCB)
++/**
++ * orc_reset_scsi_bus - perform bus reset
++ * @host: host being reset
++ *
++ * Perform a full bus reset on the adapter.
++ */
++
++static int orc_reset_scsi_bus(struct orc_host * host)
+ { /* I need Host Control Block Information */
+- ULONG flags;
++ unsigned long flags;
+
+- spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags);
++ spin_lock_irqsave(&host->allocation_lock, flags);
+
+- initAFlag(pHCB);
++ init_alloc_map(host);
+ /* reset scsi bus */
+- ORC_WR(pHCB->HCS_Base + ORC_HCTRL, SCSIRST);
+- if (waitSCSIRSTdone(pHCB) == 0) {
+- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++ outb(SCSIRST, host->base + ORC_HCTRL);
++ /* FIXME: We can spend up to a second with the lock held and
++ interrupts off here */
++ if (wait_scsi_reset_done(host) == 0) {
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
+ return FAILED;
+ } else {
+- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
+ return SUCCESS;
+ }
+ }
+
+-/*****************************************************************************
+- Function name : orc_device_reset
+- Description : Reset registers, reset a hanging bus and
+- kill active and disconnected commands for target w/o soft reset
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_device_reset(ORC_HCS * pHCB, struct scsi_cmnd *SCpnt, unsigned int target)
++/**
++ * orc_device_reset - device reset handler
++ * @host: host to reset
++ * @cmd: command causing the reset
++ * @target; target device
++ *
++ * Reset registers, reset a hanging bus and kill active and disconnected
++ * commands for target w/o soft reset
++ */
++
++static int orc_device_reset(struct orc_host * host, struct scsi_cmnd *cmd, unsigned int target)
+ { /* I need Host Control Block Information */
+- ORC_SCB *pScb;
+- ESCB *pVirEscb;
+- ORC_SCB *pVirScb;
+- UCHAR i;
+- ULONG flags;
+-
+- spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags);
+- pScb = (ORC_SCB *) NULL;
+- pVirEscb = (ESCB *) NULL;
++ struct orc_scb *scb;
++ struct orc_extended_scb *escb;
++ struct orc_scb *host_scb;
++ u8 i;
++ unsigned long flags;
++
++ spin_lock_irqsave(&(host->allocation_lock), flags);
++ scb = (struct orc_scb *) NULL;
++ escb = (struct orc_extended_scb *) NULL;
+
+ /* setup scatter list address with one buffer */
+- pVirScb = pHCB->HCS_virScbArray;
++ host_scb = host->scb_virt;
+
+- initAFlag(pHCB);
+- /* device reset */
++ /* FIXME: is this safe if we then fail to issue the reset or race
++ a completion ? */
++ init_alloc_map(host);
++
++ /* Find the scb corresponding to the command */
+ for (i = 0; i < ORC_MAXQUEUE; i++) {
+- pVirEscb = pVirScb->SCB_EScb;
+- if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt))
++ escb = host_scb->escb;
++ if (host_scb->status && escb->srb == cmd)
+ break;
+- pVirScb++;
++ host_scb++;
+ }
+
+ if (i == ORC_MAXQUEUE) {
+- printk("Unable to Reset - No SCB Found\n");
+- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++ printk(KERN_ERR "Unable to Reset - No SCB Found\n");
++ spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ return FAILED;
+ }
+- if ((pScb = orc_alloc_scb(pHCB)) == NULL) {
+- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++
++ /* Allocate a new SCB for the reset command to the firmware */
++ if ((scb = __orc_alloc_scb(host)) == NULL) {
++ /* Can't happen.. */
++ spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ return FAILED;
+ }
+- pScb->SCB_Opcode = ORC_BUSDEVRST;
+- pScb->SCB_Target = target;
+- pScb->SCB_HaStat = 0;
+- pScb->SCB_TaStat = 0;
+- pScb->SCB_Status = 0x0;
+- pScb->SCB_Link = 0xFF;
+- pScb->SCB_Reserved0 = 0;
+- pScb->SCB_Reserved1 = 0;
+- pScb->SCB_XferLen = 0;
+- pScb->SCB_SGLen = 0;
+-
+- pVirEscb->SCB_Srb = NULL;
+- pVirEscb->SCB_Srb = SCpnt;
+- orc_exec_scb(pHCB, pScb); /* Start execute SCB */
+- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++
++ /* Reset device is handled by the firmare, we fill in an SCB and
++ fire it at the controller, it does the rest */
++ scb->opcode = ORC_BUSDEVRST;
++ scb->target = target;
++ scb->hastat = 0;
++ scb->tastat = 0;
++ scb->status = 0x0;
++ scb->link = 0xFF;
++ scb->reserved0 = 0;
++ scb->reserved1 = 0;
++ scb->xferlen = 0;
++ scb->sg_len = 0;
++
++ escb->srb = NULL;
++ escb->srb = cmd;
++ orc_exec_scb(host, scb); /* Start execute SCB */
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
+ return SUCCESS;
+ }
+
++/**
++ * __orc_alloc_scb - allocate an SCB
++ * @host: host to allocate from
++ *
++ * Allocate an SCB and return a pointer to the SCB object. NULL
++ * is returned if no SCB is free. The caller must already hold
++ * the allocator lock at this point.
++ */
+
+-/***************************************************************************/
+-static ORC_SCB *__orc_alloc_scb(ORC_HCS * hcsp)
++
++static struct orc_scb *__orc_alloc_scb(struct orc_host * host)
+ {
+- ORC_SCB *pTmpScb;
+- UCHAR Ch;
+- ULONG idx;
+- UCHAR index;
+- UCHAR i;
++ u8 channel;
++ unsigned long idx;
++ u8 index;
++ u8 i;
+
+- Ch = hcsp->HCS_Index;
++ channel = host->index;
+ for (i = 0; i < 8; i++) {
+ for (index = 0; index < 32; index++) {
+- if ((hcsp->BitAllocFlag[Ch][i] >> index) & 0x01) {
+- hcsp->BitAllocFlag[Ch][i] &= ~(1 << index);
++ if ((host->allocation_map[channel][i] >> index) & 0x01) {
++ host->allocation_map[channel][i] &= ~(1 << index);
+ break;
+ }
+ }
+ idx = index + 32 * i;
+- pTmpScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (idx * sizeof(ORC_SCB)));
+- return (pTmpScb);
++ /* Translate the index to a structure instance */
++ return (struct orc_scb *) ((unsigned long) host->scb_virt + (idx * sizeof(struct orc_scb)));
+ }
+- return (NULL);
++ return NULL;
+ }
+
+-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp)
++/**
++ * orc_alloc_scb - allocate an SCB
++ * @host: host to allocate from
++ *
++ * Allocate an SCB and return a pointer to the SCB object. NULL
++ * is returned if no SCB is free.
++ */
++
++static struct orc_scb *orc_alloc_scb(struct orc_host * host)
+ {
+- ORC_SCB *pTmpScb;
+- ULONG flags;
++ struct orc_scb *scb;
++ unsigned long flags;
+
+- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+- pTmpScb = __orc_alloc_scb(hcsp);
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+- return (pTmpScb);
++ spin_lock_irqsave(&host->allocation_lock, flags);
++ scb = __orc_alloc_scb(host);
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
++ return scb;
+ }
+
++/**
++ * orc_release_scb - release an SCB
++ * @host: host owning the SCB
++ * @scb: SCB that is now free
++ *
++ * Called to return a completed SCB to the allocation pool. Before
++ * calling the SCB must be out of use on both the host and the HA.
++ */
+
+-/***************************************************************************/
+-static void orc_release_scb(ORC_HCS * hcsp, ORC_SCB * scbp)
++static void orc_release_scb(struct orc_host *host, struct orc_scb *scb)
+ {
+- ULONG flags;
+- UCHAR Index;
+- UCHAR i;
+- UCHAR Ch;
+-
+- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+- Ch = hcsp->HCS_Index;
+- Index = scbp->SCB_ScbIdx;
+- i = Index / 32;
+- Index %= 32;
+- hcsp->BitAllocFlag[Ch][i] |= (1 << Index);
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++ unsigned long flags;
++ u8 index, i, channel;
++
++ spin_lock_irqsave(&(host->allocation_lock), flags);
++ channel = host->index; /* Channel */
++ index = scb->scbidx;
++ i = index / 32;
++ index %= 32;
++ host->allocation_map[channel][i] |= (1 << index);
++ spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ }
+
+-/*****************************************************************************
+- Function name : abort_SCB
+- Description : Abort a queued command.
+- (commands that are on the bus can't be aborted easily)
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static int abort_SCB(ORC_HCS * hcsp, ORC_SCB * pScb)
++/**
++ * orchid_abort_scb - abort a command
++ *
++ * Abort a queued command that has been passed to the firmware layer
++ * if possible. This is all handled by the firmware. We aks the firmware
++ * and it either aborts the command or fails
++ */
++
++static int orchid_abort_scb(struct orc_host * host, struct orc_scb * scb)
+ {
+- unsigned char bData, bStatus;
++ unsigned char data, status;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_ABORT_SCB); /* Write command */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(ORC_CMD_ABORT_SCB, host->base + ORC_HDATA); /* Write command */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- ORC_WR(hcsp->HCS_Base + ORC_HDATA, pScb->SCB_ScbIdx); /* Write address */
+- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */
++ outb(scb->scbidx, host->base + ORC_HDATA); /* Write address */
++ outb(HDO, host->base + ORC_HCTRL);
++ if (wait_HDO_off(host) == 0) /* Wait HDO off */
+ return 0;
+
+- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */
++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */
+ return 0;
+- bStatus = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */
++ status = inb(host->base + ORC_HDATA);
++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */
+
+- if (bStatus == 1) /* 0 - Successfully */
++ if (status == 1) /* 0 - Successfully */
+ return 0; /* 1 - Fail */
+ return 1;
+ }
+
+-/*****************************************************************************
+- Function name : inia100_abort
+- Description : Abort a queued command.
+- (commands that are on the bus can't be aborted easily)
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_abort_srb(ORC_HCS * hcsp, struct scsi_cmnd *SCpnt)
++static int inia100_abort_cmd(struct orc_host * host, struct scsi_cmnd *cmd)
+ {
+- ESCB *pVirEscb;
+- ORC_SCB *pVirScb;
+- UCHAR i;
+- ULONG flags;
+-
+- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+-
+- pVirScb = hcsp->HCS_virScbArray;
+-
+- for (i = 0; i < ORC_MAXQUEUE; i++, pVirScb++) {
+- pVirEscb = pVirScb->SCB_EScb;
+- if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) {
+- if (pVirScb->SCB_TagMsg == 0) {
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+- return FAILED;
++ struct orc_extended_scb *escb;
++ struct orc_scb *scb;
++ u8 i;
++ unsigned long flags;
++
++ spin_lock_irqsave(&(host->allocation_lock), flags);
++
++ scb = host->scb_virt;
++
++ /* Walk the queue until we find the SCB that belongs to the command
++ block. This isn't a performance critical path so a walk in the park
++ here does no harm */
++
++ for (i = 0; i < ORC_MAXQUEUE; i++, scb++) {
++ escb = scb->escb;
++ if (scb->status && escb->srb == cmd) {
++ if (scb->tag_msg == 0) {
++ goto out;
+ } else {
+- if (abort_SCB(hcsp, pVirScb)) {
+- pVirEscb->SCB_Srb = NULL;
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++ /* Issue an ABORT to the firmware */
++ if (orchid_abort_scb(host, scb)) {
++ escb->srb = NULL;
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
+ return SUCCESS;
+- } else {
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+- return FAILED;
+- }
++ } else
++ goto out;
+ }
+ }
+ }
+- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++out:
++ spin_unlock_irqrestore(&host->allocation_lock, flags);
+ return FAILED;
+ }
+
+-/***********************************************************************
+- Routine Description:
+- This is the interrupt service routine for the Orchid SCSI adapter.
+- It reads the interrupt register to determine if the adapter is indeed
+- the source of the interrupt and clears the interrupt at the device.
+- Arguments:
+- HwDeviceExtension - HBA miniport driver's adapter data storage
+- Return Value:
+-***********************************************************************/
+-static void orc_interrupt(
+- ORC_HCS * hcsp
+-)
++/**
++ * orc_interrupt - IRQ processing
++ * @host: Host causing the interrupt
++ *
++ * This function is called from the IRQ handler and protected
++ * by the host lock. While the controller reports that there are
++ * scb's for processing we pull them off the controller, turn the
++ * index into a host address pointer to the scb and call the scb
++ * handler.
++ *
++ * Returns IRQ_HANDLED if any SCBs were processed, IRQ_NONE otherwise
++ */
++
++static irqreturn_t orc_interrupt(struct orc_host * host)
+ {
+- BYTE bScbIdx;
+- ORC_SCB *pScb;
++ u8 scb_index;
++ struct orc_scb *scb;
+
+- if (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT) == 0) {
+- return; // 0;
++ /* Check if we have an SCB queued for servicing */
++ if (inb(host->base + ORC_RQUEUECNT) == 0)
++ return IRQ_NONE;
+
+- }
+ do {
+- bScbIdx = ORC_RD(hcsp->HCS_Base, ORC_RQUEUE);
+-
+- pScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (ULONG) (sizeof(ORC_SCB) * bScbIdx));
+- pScb->SCB_Status = 0x0;
+-
+- inia100SCBPost((BYTE *) hcsp, (BYTE *) pScb);
+- } while (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT));
+- return; //1;
++ /* Get the SCB index of the SCB to service */
++ scb_index = inb(host->base + ORC_RQUEUE);
+
++ /* Translate it back to a host pointer */
++ scb = (struct orc_scb *) ((unsigned long) host->scb_virt + (unsigned long) (sizeof(struct orc_scb) * scb_index));
++ scb->status = 0x0;
++ /* Process the SCB */
++ inia100_scb_handler(host, scb);
++ } while (inb(host->base + ORC_RQUEUECNT));
++ return IRQ_HANDLED;
+ } /* End of I1060Interrupt() */
+
+-/*****************************************************************************
+- Function name : inia100BuildSCB
+- Description :
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static void inia100BuildSCB(ORC_HCS * pHCB, ORC_SCB * pSCB, struct scsi_cmnd * SCpnt)
++/**
++ * inia100_build_scb - build SCB
++ * @host: host owing the control block
++ * @scb: control block to use
++ * @cmd: Mid layer command
++ *
++ * Build a host adapter control block from the SCSI mid layer command
++ */
++
++static void inia100_build_scb(struct orc_host * host, struct orc_scb * scb, struct scsi_cmnd * cmd)
+ { /* Create corresponding SCB */
+- struct scatterlist *pSrbSG;
+- ORC_SG *pSG; /* Pointer to SG list */
++ struct scatterlist *sg;
++ struct orc_sgent *sgent; /* Pointer to SG list */
+ int i, count_sg;
+- ESCB *pEScb;
++ struct orc_extended_scb *escb;
+
+- pEScb = pSCB->SCB_EScb;
+- pEScb->SCB_Srb = SCpnt;
+- pSG = NULL;
+-
+- pSCB->SCB_Opcode = ORC_EXECSCSI;
+- pSCB->SCB_Flags = SCF_NO_DCHK; /* Clear done bit */
+- pSCB->SCB_Target = SCpnt->device->id;
+- pSCB->SCB_Lun = SCpnt->device->lun;
+- pSCB->SCB_Reserved0 = 0;
+- pSCB->SCB_Reserved1 = 0;
+- pSCB->SCB_SGLen = 0;
+-
+- if ((pSCB->SCB_XferLen = (U32) SCpnt->request_bufflen)) {
+- pSG = (ORC_SG *) & pEScb->ESCB_SGList[0];
+- if (SCpnt->use_sg) {
+- pSrbSG = (struct scatterlist *) SCpnt->request_buffer;
+- count_sg = pci_map_sg(pHCB->pdev, pSrbSG, SCpnt->use_sg,
+- SCpnt->sc_data_direction);
+- pSCB->SCB_SGLen = (U32) (count_sg * 8);
+- for (i = 0; i < count_sg; i++, pSG++, pSrbSG++) {
+- pSG->SG_Ptr = (U32) sg_dma_address(pSrbSG);
+- pSG->SG_Len = (U32) sg_dma_len(pSrbSG);
+- }
+- } else if (SCpnt->request_bufflen != 0) {/* Non SG */
+- pSCB->SCB_SGLen = 0x8;
+- SCpnt->SCp.dma_handle = pci_map_single(pHCB->pdev,
+- SCpnt->request_buffer,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- pSG->SG_Ptr = (U32) SCpnt->SCp.dma_handle;
+- pSG->SG_Len = (U32) SCpnt->request_bufflen;
++ /* Links between the escb, scb and Linux scsi midlayer cmd */
++ escb = scb->escb;
++ escb->srb = cmd;
++ sgent = NULL;
++
++ /* Set up the SCB to do a SCSI command block */
++ scb->opcode = ORC_EXECSCSI;
++ scb->flags = SCF_NO_DCHK; /* Clear done bit */
++ scb->target = cmd->device->id;
++ scb->lun = cmd->device->lun;
++ scb->reserved0 = 0;
++ scb->reserved1 = 0;
++ scb->sg_len = 0;
++
++ scb->xferlen = (u32) scsi_bufflen(cmd);
++ sgent = (struct orc_sgent *) & escb->sglist[0];
++
++ count_sg = scsi_dma_map(cmd);
++ BUG_ON(count_sg < 0);
++
++ /* Build the scatter gather lists */
++ if (count_sg) {
++ scb->sg_len = (u32) (count_sg * 8);
++ scsi_for_each_sg(cmd, sg, count_sg, i) {
++ sgent->base = (u32) sg_dma_address(sg);
++ sgent->length = (u32) sg_dma_len(sg);
++ sgent++;
++ }
+ } else {
+- pSCB->SCB_SGLen = 0;
+- pSG->SG_Ptr = 0;
+- pSG->SG_Len = 0;
+- }
+- }
+- pSCB->SCB_SGPAddr = (U32) pSCB->SCB_SensePAddr;
+- pSCB->SCB_HaStat = 0;
+- pSCB->SCB_TaStat = 0;
+- pSCB->SCB_Link = 0xFF;
+- pSCB->SCB_SenseLen = SENSE_SIZE;
+- pSCB->SCB_CDBLen = SCpnt->cmd_len;
+- if (pSCB->SCB_CDBLen >= IMAX_CDB) {
+- printk("max cdb length= %x\b", SCpnt->cmd_len);
+- pSCB->SCB_CDBLen = IMAX_CDB;
+- }
+- pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW;
+- if (SCpnt->device->tagged_supported) { /* Tag Support */
+- pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */
++ scb->sg_len = 0;
++ sgent->base = 0;
++ sgent->length = 0;
++ }
++ scb->sg_addr = (u32) scb->sense_addr;
++ scb->hastat = 0;
++ scb->tastat = 0;
++ scb->link = 0xFF;
++ scb->sense_len = SENSE_SIZE;
++ scb->cdb_len = cmd->cmd_len;
++ if (scb->cdb_len >= IMAX_CDB) {
++ printk("max cdb length= %x\b", cmd->cmd_len);
++ scb->cdb_len = IMAX_CDB;
++ }
++ scb->ident = cmd->device->lun | DISC_ALLOW;
++ if (cmd->device->tagged_supported) { /* Tag Support */
++ scb->tag_msg = SIMPLE_QUEUE_TAG; /* Do simple tag only */
+ } else {
+- pSCB->SCB_TagMsg = 0; /* No tag support */
++ scb->tag_msg = 0; /* No tag support */
+ }
+- memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, pSCB->SCB_CDBLen);
+- return;
++ memcpy(&scb->cdb[0], &cmd->cmnd, scb->cdb_len);
+ }
+
+-/*****************************************************************************
+- Function name : inia100_queue
+- Description : Queue a command and setup interrupts for a free bus.
+- Input : pHCB - Pointer to host adapter structure
+- Output : None.
+- Return : pSRB - Pointer to SCSI request block.
+-*****************************************************************************/
+-static int inia100_queue(struct scsi_cmnd * SCpnt, void (*done) (struct scsi_cmnd *))
++/**
++ * inia100_queue - queue command with host
++ * @cmd: Command block
++ * @done: Completion function
++ *
++ * Called by the mid layer to queue a command. Process the command
++ * block, build the host specific scb structures and if there is room
++ * queue the command down to the controller
++ */
++
++static int inia100_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
+ {
+- register ORC_SCB *pSCB;
+- ORC_HCS *pHCB; /* Point to Host adapter control block */
++ struct orc_scb *scb;
++ struct orc_host *host; /* Point to Host adapter control block */
+
+- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+- SCpnt->scsi_done = done;
++ host = (struct orc_host *) cmd->device->host->hostdata;
++ cmd->scsi_done = done;
+ /* Get free SCSI control block */
+- if ((pSCB = orc_alloc_scb(pHCB)) == NULL)
++ if ((scb = orc_alloc_scb(host)) == NULL)
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+- inia100BuildSCB(pHCB, pSCB, SCpnt);
+- orc_exec_scb(pHCB, pSCB); /* Start execute SCB */
+-
+- return (0);
++ inia100_build_scb(host, scb, cmd);
++ orc_exec_scb(host, scb); /* Start execute SCB */
++ return 0;
+ }
+
+ /*****************************************************************************
+ Function name : inia100_abort
+ Description : Abort a queued command.
+ (commands that are on the bus can't be aborted easily)
+- Input : pHCB - Pointer to host adapter structure
++ Input : host - Pointer to host adapter structure
+ Output : None.
+ Return : pSRB - Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_abort(struct scsi_cmnd * SCpnt)
++static int inia100_abort(struct scsi_cmnd * cmd)
+ {
+- ORC_HCS *hcsp;
++ struct orc_host *host;
+
+- hcsp = (ORC_HCS *) SCpnt->device->host->hostdata;
+- return orc_abort_srb(hcsp, SCpnt);
++ host = (struct orc_host *) cmd->device->host->hostdata;
++ return inia100_abort_cmd(host, cmd);
+ }
+
+ /*****************************************************************************
+ Function name : inia100_reset
+ Description : Reset registers, reset a hanging bus and
+ kill active and disconnected commands for target w/o soft reset
+- Input : pHCB - Pointer to host adapter structure
++ Input : host - Pointer to host adapter structure
+ Output : None.
+ Return : pSRB - Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_bus_reset(struct scsi_cmnd * SCpnt)
++static int inia100_bus_reset(struct scsi_cmnd * cmd)
+ { /* I need Host Control Block Information */
+- ORC_HCS *pHCB;
+- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+- return orc_reset_scsi_bus(pHCB);
++ struct orc_host *host;
++ host = (struct orc_host *) cmd->device->host->hostdata;
++ return orc_reset_scsi_bus(host);
+ }
+
+ /*****************************************************************************
+ Function name : inia100_device_reset
+ Description : Reset the device
+- Input : pHCB - Pointer to host adapter structure
++ Input : host - Pointer to host adapter structure
+ Output : None.
+ Return : pSRB - Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_device_reset(struct scsi_cmnd * SCpnt)
++static int inia100_device_reset(struct scsi_cmnd * cmd)
+ { /* I need Host Control Block Information */
+- ORC_HCS *pHCB;
+- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+- return orc_device_reset(pHCB, SCpnt, scmd_id(SCpnt));
++ struct orc_host *host;
++ host = (struct orc_host *) cmd->device->host->hostdata;
++ return orc_device_reset(host, cmd, scmd_id(cmd));
+
+ }
+
+-/*****************************************************************************
+- Function name : inia100SCBPost
+- Description : This is callback routine be called when orc finish one
+- SCSI command.
+- Input : pHCB - Pointer to host adapter control block.
+- pSCB - Pointer to SCSI control block.
+- Output : None.
+- Return : None.
+-*****************************************************************************/
+-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb)
++/**
++ * inia100_scb_handler - interrupt callback
++ * @host: Host causing the interrupt
++ * @scb: SCB the controller returned as needing processing
++ *
++ * Perform completion processing on a control block. Do the conversions
++ * from host to SCSI midlayer error coding, save any sense data and
++ * the complete with the midlayer and recycle the scb.
++ */
++
++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb)
+ {
+- struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */
+- ORC_HCS *pHCB;
+- ORC_SCB *pSCB;
+- ESCB *pEScb;
+-
+- pHCB = (ORC_HCS *) pHcb;
+- pSCB = (ORC_SCB *) pScb;
+- pEScb = pSCB->SCB_EScb;
+- if ((pSRB = (struct scsi_cmnd *) pEScb->SCB_Srb) == 0) {
+- printk("inia100SCBPost: SRB pointer is empty\n");
+- orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */
++ struct scsi_cmnd *cmd; /* Pointer to SCSI request block */
++ struct orc_extended_scb *escb;
++
++ escb = scb->escb;
++ if ((cmd = (struct scsi_cmnd *) escb->srb) == NULL) {
++ printk(KERN_ERR "inia100_scb_handler: SRB pointer is empty\n");
++ orc_release_scb(host, scb); /* Release SCB for current channel */
+ return;
+ }
+- pEScb->SCB_Srb = NULL;
++ escb->srb = NULL;
+
+- switch (pSCB->SCB_HaStat) {
++ switch (scb->hastat) {
+ case 0x0:
+ case 0xa: /* Linked command complete without error and linked normally */
+ case 0xb: /* Linked command complete without error interrupt generated */
+- pSCB->SCB_HaStat = 0;
++ scb->hastat = 0;
+ break;
+
+ case 0x11: /* Selection time out-The initiator selection or target
+ reselection was not complete within the SCSI Time out period */
+- pSCB->SCB_HaStat = DID_TIME_OUT;
++ scb->hastat = DID_TIME_OUT;
+ break;
+
+ case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus
+ phase sequence was requested by the target. The host adapter
+ will generate a SCSI Reset Condition, notifying the host with
+ a SCRD interrupt */
+- pSCB->SCB_HaStat = DID_RESET;
++ scb->hastat = DID_RESET;
+ break;
+
+ case 0x1a: /* SCB Aborted. 07/21/98 */
+- pSCB->SCB_HaStat = DID_ABORT;
++ scb->hastat = DID_ABORT;
+ break;
+
+ case 0x12: /* Data overrun/underrun-The target attempted to transfer more data
+@@ -984,46 +1022,41 @@
+ case 0x16: /* Invalid CCB Operation Code-The first byte of the CCB was invalid. */
+
+ default:
+- printk("inia100: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat);
+- pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */
++ printk(KERN_DEBUG "inia100: %x %x\n", scb->hastat, scb->tastat);
++ scb->hastat = DID_ERROR; /* Couldn't find any better */
+ break;
+ }
+
+- if (pSCB->SCB_TaStat == 2) { /* Check condition */
+- memcpy((unsigned char *) &pSRB->sense_buffer[0],
+- (unsigned char *) &pEScb->ESCB_SGList[0], SENSE_SIZE);
+- }
+- pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16);
+-
+- if (pSRB->use_sg) {
+- pci_unmap_sg(pHCB->pdev,
+- (struct scatterlist *)pSRB->request_buffer,
+- pSRB->use_sg, pSRB->sc_data_direction);
+- } else if (pSRB->request_bufflen != 0) {
+- pci_unmap_single(pHCB->pdev, pSRB->SCp.dma_handle,
+- pSRB->request_bufflen,
+- pSRB->sc_data_direction);
+- }
+-
+- pSRB->scsi_done(pSRB); /* Notify system DONE */
+-
+- orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */
++ if (scb->tastat == 2) { /* Check condition */
++ memcpy((unsigned char *) &cmd->sense_buffer[0],
++ (unsigned char *) &escb->sglist[0], SENSE_SIZE);
++ }
++ cmd->result = scb->tastat | (scb->hastat << 16);
++ scsi_dma_unmap(cmd);
++ cmd->scsi_done(cmd); /* Notify system DONE */
++ orc_release_scb(host, scb); /* Release SCB for current channel */
+ }
+
+-/*
+- * Interrupt handler (main routine of the driver)
++/**
++ * inia100_intr - interrupt handler
++ * @irqno: Interrupt value
++ * @devid: Host adapter
++ *
++ * Entry point for IRQ handling. All the real work is performed
++ * by orc_interrupt.
+ */
+ static irqreturn_t inia100_intr(int irqno, void *devid)
+ {
+- struct Scsi_Host *host = (struct Scsi_Host *)devid;
+- ORC_HCS *pHcb = (ORC_HCS *)host->hostdata;
++ struct Scsi_Host *shost = (struct Scsi_Host *)devid;
++ struct orc_host *host = (struct orc_host *)shost->hostdata;
+ unsigned long flags;
++ irqreturn_t res;
+
+- spin_lock_irqsave(host->host_lock, flags);
+- orc_interrupt(pHcb);
+- spin_unlock_irqrestore(host->host_lock, flags);
++ spin_lock_irqsave(shost->host_lock, flags);
++ res = orc_interrupt(host);
++ spin_unlock_irqrestore(shost->host_lock, flags);
+
+- return IRQ_HANDLED;
++ return res;
+ }
+
+ static struct scsi_host_template inia100_template = {
+@@ -1044,12 +1077,12 @@
+ const struct pci_device_id *id)
+ {
+ struct Scsi_Host *shost;
+- ORC_HCS *pHCB;
++ struct orc_host *host;
+ unsigned long port, bios;
+ int error = -ENODEV;
+ u32 sz;
+- unsigned long dBiosAdr;
+- char *pbBiosAdr;
++ unsigned long biosaddr;
++ char *bios_phys;
+
+ if (pci_enable_device(pdev))
+ goto out;
+@@ -1068,55 +1101,55 @@
+ }
+
+ /* <02> read from base address + 0x50 offset to get the bios value. */
+- bios = ORC_RDWORD(port, 0x50);
++ bios = inw(port + 0x50);
+
+
+- shost = scsi_host_alloc(&inia100_template, sizeof(ORC_HCS));
++ shost = scsi_host_alloc(&inia100_template, sizeof(struct orc_host));
+ if (!shost)
+ goto out_release_region;
+
+- pHCB = (ORC_HCS *)shost->hostdata;
+- pHCB->pdev = pdev;
+- pHCB->HCS_Base = port;
+- pHCB->HCS_BIOS = bios;
+- spin_lock_init(&pHCB->BitAllocFlagLock);
++ host = (struct orc_host *)shost->hostdata;
++ host->pdev = pdev;
++ host->base = port;
++ host->BIOScfg = bios;
++ spin_lock_init(&host->allocation_lock);
+
+ /* Get total memory needed for SCB */
+- sz = ORC_MAXQUEUE * sizeof(ORC_SCB);
+- pHCB->HCS_virScbArray = pci_alloc_consistent(pdev, sz,
+- &pHCB->HCS_physScbArray);
+- if (!pHCB->HCS_virScbArray) {
++ sz = ORC_MAXQUEUE * sizeof(struct orc_scb);
++ host->scb_virt = pci_alloc_consistent(pdev, sz,
++ &host->scb_phys);
++ if (!host->scb_virt) {
+ printk("inia100: SCB memory allocation error\n");
+ goto out_host_put;
+ }
+- memset(pHCB->HCS_virScbArray, 0, sz);
++ memset(host->scb_virt, 0, sz);
+
+ /* Get total memory needed for ESCB */
+- sz = ORC_MAXQUEUE * sizeof(ESCB);
+- pHCB->HCS_virEscbArray = pci_alloc_consistent(pdev, sz,
+- &pHCB->HCS_physEscbArray);
+- if (!pHCB->HCS_virEscbArray) {
++ sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb);
++ host->escb_virt = pci_alloc_consistent(pdev, sz,
++ &host->escb_phys);
++ if (!host->escb_virt) {
+ printk("inia100: ESCB memory allocation error\n");
+ goto out_free_scb_array;
+ }
+- memset(pHCB->HCS_virEscbArray, 0, sz);
++ memset(host->escb_virt, 0, sz);
+
+- dBiosAdr = pHCB->HCS_BIOS;
+- dBiosAdr = (dBiosAdr << 4);
+- pbBiosAdr = phys_to_virt(dBiosAdr);
+- if (init_orchid(pHCB)) { /* Initialize orchid chip */
++ biosaddr = host->BIOScfg;
++ biosaddr = (biosaddr << 4);
++ bios_phys = phys_to_virt(biosaddr);
++ if (init_orchid(host)) { /* Initialize orchid chip */
+ printk("inia100: initial orchid fail!!\n");
+ goto out_free_escb_array;
+ }
+
+- shost->io_port = pHCB->HCS_Base;
++ shost->io_port = host->base;
+ shost->n_io_port = 0xff;
+ shost->can_queue = ORC_MAXQUEUE;
+ shost->unique_id = shost->io_port;
+- shost->max_id = pHCB->HCS_MaxTar;
++ shost->max_id = host->max_targets;
+ shost->max_lun = 16;
+- shost->irq = pHCB->HCS_Intr = pdev->irq;
+- shost->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */
++ shost->irq = pdev->irq;
++ shost->this_id = host->scsi_id; /* Assign HCS index */
+ shost->sg_tablesize = TOTAL_SG_ENTRY;
+
+ /* Initial orc chip */
+@@ -1137,36 +1170,36 @@
+ scsi_scan_host(shost);
+ return 0;
+
+- out_free_irq:
++out_free_irq:
+ free_irq(shost->irq, shost);
+- out_free_escb_array:
+- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB),
+- pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray);
+- out_free_scb_array:
+- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB),
+- pHCB->HCS_virScbArray, pHCB->HCS_physScbArray);
+- out_host_put:
++out_free_escb_array:
++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb),
++ host->escb_virt, host->escb_phys);
++out_free_scb_array:
++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb),
++ host->scb_virt, host->scb_phys);
++out_host_put:
+ scsi_host_put(shost);
+- out_release_region:
++out_release_region:
+ release_region(port, 256);
+- out_disable_device:
++out_disable_device:
+ pci_disable_device(pdev);
+- out:
++out:
+ return error;
+ }
+
+ static void __devexit inia100_remove_one(struct pci_dev *pdev)
+ {
+ struct Scsi_Host *shost = pci_get_drvdata(pdev);
+- ORC_HCS *pHCB = (ORC_HCS *)shost->hostdata;
++ struct orc_host *host = (struct orc_host *)shost->hostdata;
+
+ scsi_remove_host(shost);
+
+ free_irq(shost->irq, shost);
+- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB),
+- pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray);
+- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB),
+- pHCB->HCS_virScbArray, pHCB->HCS_physScbArray);
++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb),
++ host->escb_virt, host->escb_phys);
++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb),
++ host->scb_virt, host->scb_phys);
+ release_region(shost->io_port, 256);
+
+ scsi_host_put(shost);
+diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.h linux-2.6.22-591/drivers/scsi/a100u2w.h
+--- linux-2.6.22-570/drivers/scsi/a100u2w.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/a100u2w.h 2007-12-21 15:36:12.000000000 -0500
+@@ -18,27 +18,6 @@
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions, and the following disclaimer,
+- * without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- * derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -50,30 +29,19 @@
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+- */
+-
+-/*
++ *
+ * Revision History:
+ * 06/18/98 HL, Initial production Version 1.02
+ * 12/19/98 bv, Use spinlocks for 2.1.95 and up
+ * 06/25/02 Doug Ledford <dledford@redhat.com>
+ * - This and the i60uscsi.h file are almost identical,
+ * merged them into a single header used by both .c files.
++ * 14/06/07 Alan Cox <alan@redhat.com>
++ * - Grand cleanup and Linuxisation
+ */
+
+ #define inia100_REVID "Initio INI-A100U2W SCSI device driver; Revision: 1.02d"
+
+-#define ULONG unsigned long
+-#define USHORT unsigned short
+-#define UCHAR unsigned char
+-#define BYTE unsigned char
+-#define WORD unsigned short
+-#define DWORD unsigned long
+-#define UBYTE unsigned char
+-#define UWORD unsigned short
+-#define UDWORD unsigned long
+-#define U32 u32
+-
+ #if 1
+ #define ORC_MAXQUEUE 245
+ #define ORC_MAXTAGS 64
+@@ -90,10 +58,10 @@
+ /************************************************************************/
+ /* Scatter-Gather Element Structure */
+ /************************************************************************/
+-typedef struct ORC_SG_Struc {
+- U32 SG_Ptr; /* Data Pointer */
+- U32 SG_Len; /* Data Length */
+-} ORC_SG;
++struct orc_sgent {
++ u32 base; /* Data Pointer */
++ u32 length; /* Data Length */
++};
+
+ /* SCSI related definition */
+ #define DISC_NOT_ALLOW 0x80 /* Disconnect is not allowed */
+@@ -165,42 +133,45 @@
+ #define ORC_PRGMCTR1 0xE3 /* RISC program counter */
+ #define ORC_RISCRAM 0xEC /* RISC RAM data port 4 bytes */
+
+-typedef struct orc_extended_scb { /* Extended SCB */
+- ORC_SG ESCB_SGList[TOTAL_SG_ENTRY]; /*0 Start of SG list */
+- struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */
+-} ESCB;
++struct orc_extended_scb { /* Extended SCB */
++ struct orc_sgent sglist[TOTAL_SG_ENTRY]; /*0 Start of SG list */
++ struct scsi_cmnd *srb; /*50 SRB Pointer */
++};
+
+ /***********************************************************************
+ SCSI Control Block
++
++ 0x40 bytes long, the last 8 are user bytes
+ ************************************************************************/
+-typedef struct orc_scb { /* Scsi_Ctrl_Blk */
+- UBYTE SCB_Opcode; /*00 SCB command code&residual */
+- UBYTE SCB_Flags; /*01 SCB Flags */
+- UBYTE SCB_Target; /*02 Target Id */
+- UBYTE SCB_Lun; /*03 Lun */
+- U32 SCB_Reserved0; /*04 Reserved for ORCHID must 0 */
+- U32 SCB_XferLen; /*08 Data Transfer Length */
+- U32 SCB_Reserved1; /*0C Reserved for ORCHID must 0 */
+- U32 SCB_SGLen; /*10 SG list # * 8 */
+- U32 SCB_SGPAddr; /*14 SG List Buf physical Addr */
+- U32 SCB_SGPAddrHigh; /*18 SG Buffer high physical Addr */
+- UBYTE SCB_HaStat; /*1C Host Status */
+- UBYTE SCB_TaStat; /*1D Target Status */
+- UBYTE SCB_Status; /*1E SCB status */
+- UBYTE SCB_Link; /*1F Link pointer, default 0xFF */
+- UBYTE SCB_SenseLen; /*20 Sense Allocation Length */
+- UBYTE SCB_CDBLen; /*21 CDB Length */
+- UBYTE SCB_Ident; /*22 Identify */
+- UBYTE SCB_TagMsg; /*23 Tag Message */
+- UBYTE SCB_CDB[IMAX_CDB]; /*24 SCSI CDBs */
+- UBYTE SCB_ScbIdx; /*3C Index for this ORCSCB */
+- U32 SCB_SensePAddr; /*34 Sense Buffer physical Addr */
+-
+- ESCB *SCB_EScb; /*38 Extended SCB Pointer */
+-#ifndef ALPHA
+- UBYTE SCB_Reserved2[4]; /*3E Reserved for Driver use */
++struct orc_scb { /* Scsi_Ctrl_Blk */
++ u8 opcode; /*00 SCB command code&residual */
++ u8 flags; /*01 SCB Flags */
++ u8 target; /*02 Target Id */
++ u8 lun; /*03 Lun */
++ u32 reserved0; /*04 Reserved for ORCHID must 0 */
++ u32 xferlen; /*08 Data Transfer Length */
++ u32 reserved1; /*0C Reserved for ORCHID must 0 */
++ u32 sg_len; /*10 SG list # * 8 */
++ u32 sg_addr; /*14 SG List Buf physical Addr */
++ u32 sg_addrhigh; /*18 SG Buffer high physical Addr */
++ u8 hastat; /*1C Host Status */
++ u8 tastat; /*1D Target Status */
++ u8 status; /*1E SCB status */
++ u8 link; /*1F Link pointer, default 0xFF */
++ u8 sense_len; /*20 Sense Allocation Length */
++ u8 cdb_len; /*21 CDB Length */
++ u8 ident; /*22 Identify */
++ u8 tag_msg; /*23 Tag Message */
++ u8 cdb[IMAX_CDB]; /*24 SCSI CDBs */
++ u8 scbidx; /*3C Index for this ORCSCB */
++ u32 sense_addr; /*34 Sense Buffer physical Addr */
++
++ struct orc_extended_scb *escb; /*38 Extended SCB Pointer */
++ /* 64bit pointer or 32bit pointer + reserved ? */
++#ifndef CONFIG_64BIT
++ u8 reserved2[4]; /*3E Reserved for Driver use */
+ #endif
+-} ORC_SCB;
++};
+
+ /* Opcodes of ORCSCB_Opcode */
+ #define ORC_EXECSCSI 0x00 /* SCSI initiator command with residual */
+@@ -239,13 +210,13 @@
+ Target Device Control Structure
+ **********************************************************************/
+
+-typedef struct ORC_Tar_Ctrl_Struc {
+- UBYTE TCS_DrvDASD; /* 6 */
+- UBYTE TCS_DrvSCSI; /* 7 */
+- UBYTE TCS_DrvHead; /* 8 */
+- UWORD TCS_DrvFlags; /* 4 */
+- UBYTE TCS_DrvSector; /* 7 */
+-} ORC_TCS;
++struct orc_target {
++ u8 TCS_DrvDASD; /* 6 */
++ u8 TCS_DrvSCSI; /* 7 */
++ u8 TCS_DrvHead; /* 8 */
++ u16 TCS_DrvFlags; /* 4 */
++ u8 TCS_DrvSector; /* 7 */
++};
+
+ /* Bit Definition for TCF_DrvFlags */
+ #define TCS_DF_NODASD_SUPT 0x20 /* Suppress OS/2 DASD Mgr support */
+@@ -255,32 +226,23 @@
+ /***********************************************************************
+ Host Adapter Control Structure
+ ************************************************************************/
+-typedef struct ORC_Ha_Ctrl_Struc {
+- USHORT HCS_Base; /* 00 */
+- UBYTE HCS_Index; /* 02 */
+- UBYTE HCS_Intr; /* 04 */
+- UBYTE HCS_SCSI_ID; /* 06 H/A SCSI ID */
+- UBYTE HCS_BIOS; /* 07 BIOS configuration */
+-
+- UBYTE HCS_Flags; /* 0B */
+- UBYTE HCS_HAConfig1; /* 1B SCSI0MAXTags */
+- UBYTE HCS_MaxTar; /* 1B SCSI0MAXTags */
+-
+- USHORT HCS_Units; /* Number of units this adapter */
+- USHORT HCS_AFlags; /* Adapter info. defined flags */
+- ULONG HCS_Timeout; /* Adapter timeout value */
+- ORC_SCB *HCS_virScbArray; /* 28 Virtual Pointer to SCB array */
+- dma_addr_t HCS_physScbArray; /* Scb Physical address */
+- ESCB *HCS_virEscbArray; /* Virtual pointer to ESCB Scatter list */
+- dma_addr_t HCS_physEscbArray; /* scatter list Physical address */
+- UBYTE TargetFlag[16]; /* 30 target configuration, TCF_EN_TAG */
+- UBYTE MaximumTags[16]; /* 40 ORC_MAX_SCBS */
+- UBYTE ActiveTags[16][16]; /* 50 */
+- ORC_TCS HCS_Tcs[16]; /* 28 */
+- U32 BitAllocFlag[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */
+- spinlock_t BitAllocFlagLock;
++struct orc_host {
++ unsigned long base; /* Base address */
++ u8 index; /* Index (Channel)*/
++ u8 scsi_id; /* H/A SCSI ID */
++ u8 BIOScfg; /*BIOS configuration */
++ u8 flags;
++ u8 max_targets; /* SCSI0MAXTags */
++ struct orc_scb *scb_virt; /* Virtual Pointer to SCB array */
++ dma_addr_t scb_phys; /* Scb Physical address */
++ struct orc_extended_scb *escb_virt; /* Virtual pointer to ESCB Scatter list */
++ dma_addr_t escb_phys; /* scatter list Physical address */
++ u8 target_flag[16]; /* target configuration, TCF_EN_TAG */
++ u8 max_tags[16]; /* ORC_MAX_SCBS */
++ u32 allocation_map[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */
++ spinlock_t allocation_lock;
+ struct pci_dev *pdev;
+-} ORC_HCS;
++};
+
+ /* Bit Definition for HCS_Flags */
+
+@@ -301,79 +263,79 @@
+ #define HCS_AF_DISABLE_RESET 0x10 /* Adapter disable reset */
+ #define HCS_AF_DISABLE_ADPT 0x80 /* Adapter disable */
+
+-typedef struct _NVRAM {
++struct orc_nvram {
+ /*----------header ---------------*/
+- UCHAR SubVendorID0; /* 00 - Sub Vendor ID */
+- UCHAR SubVendorID1; /* 00 - Sub Vendor ID */
+- UCHAR SubSysID0; /* 02 - Sub System ID */
+- UCHAR SubSysID1; /* 02 - Sub System ID */
+- UCHAR SubClass; /* 04 - Sub Class */
+- UCHAR VendorID0; /* 05 - Vendor ID */
+- UCHAR VendorID1; /* 05 - Vendor ID */
+- UCHAR DeviceID0; /* 07 - Device ID */
+- UCHAR DeviceID1; /* 07 - Device ID */
+- UCHAR Reserved0[2]; /* 09 - Reserved */
+- UCHAR Revision; /* 0B - Revision of data structure */
++ u8 SubVendorID0; /* 00 - Sub Vendor ID */
++ u8 SubVendorID1; /* 00 - Sub Vendor ID */
++ u8 SubSysID0; /* 02 - Sub System ID */
++ u8 SubSysID1; /* 02 - Sub System ID */
++ u8 SubClass; /* 04 - Sub Class */
++ u8 VendorID0; /* 05 - Vendor ID */
++ u8 VendorID1; /* 05 - Vendor ID */
++ u8 DeviceID0; /* 07 - Device ID */
++ u8 DeviceID1; /* 07 - Device ID */
++ u8 Reserved0[2]; /* 09 - Reserved */
++ u8 revision; /* 0B - revision of data structure */
+ /* ----Host Adapter Structure ---- */
+- UCHAR NumOfCh; /* 0C - Number of SCSI channel */
+- UCHAR BIOSConfig1; /* 0D - BIOS configuration 1 */
+- UCHAR BIOSConfig2; /* 0E - BIOS boot channel&target ID */
+- UCHAR BIOSConfig3; /* 0F - BIOS configuration 3 */
++ u8 NumOfCh; /* 0C - Number of SCSI channel */
++ u8 BIOSConfig1; /* 0D - BIOS configuration 1 */
++ u8 BIOSConfig2; /* 0E - BIOS boot channel&target ID */
++ u8 BIOSConfig3; /* 0F - BIOS configuration 3 */
+ /* ----SCSI channel Structure ---- */
+ /* from "CTRL-I SCSI Host Adapter SetUp menu " */
+- UCHAR SCSI0Id; /* 10 - Channel 0 SCSI ID */
+- UCHAR SCSI0Config; /* 11 - Channel 0 SCSI configuration */
+- UCHAR SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */
+- UCHAR SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */
+- UCHAR ReservedforChannel0[2]; /* 14 - Reserved */
++ u8 scsi_id; /* 10 - Channel 0 SCSI ID */
++ u8 SCSI0Config; /* 11 - Channel 0 SCSI configuration */
++ u8 SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */
++ u8 SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */
++ u8 ReservedforChannel0[2]; /* 14 - Reserved */
+
+ /* ----SCSI target Structure ---- */
+ /* from "CTRL-I SCSI device SetUp menu " */
+- UCHAR Target00Config; /* 16 - Channel 0 Target 0 config */
+- UCHAR Target01Config; /* 17 - Channel 0 Target 1 config */
+- UCHAR Target02Config; /* 18 - Channel 0 Target 2 config */
+- UCHAR Target03Config; /* 19 - Channel 0 Target 3 config */
+- UCHAR Target04Config; /* 1A - Channel 0 Target 4 config */
+- UCHAR Target05Config; /* 1B - Channel 0 Target 5 config */
+- UCHAR Target06Config; /* 1C - Channel 0 Target 6 config */
+- UCHAR Target07Config; /* 1D - Channel 0 Target 7 config */
+- UCHAR Target08Config; /* 1E - Channel 0 Target 8 config */
+- UCHAR Target09Config; /* 1F - Channel 0 Target 9 config */
+- UCHAR Target0AConfig; /* 20 - Channel 0 Target A config */
+- UCHAR Target0BConfig; /* 21 - Channel 0 Target B config */
+- UCHAR Target0CConfig; /* 22 - Channel 0 Target C config */
+- UCHAR Target0DConfig; /* 23 - Channel 0 Target D config */
+- UCHAR Target0EConfig; /* 24 - Channel 0 Target E config */
+- UCHAR Target0FConfig; /* 25 - Channel 0 Target F config */
+-
+- UCHAR SCSI1Id; /* 26 - Channel 1 SCSI ID */
+- UCHAR SCSI1Config; /* 27 - Channel 1 SCSI configuration */
+- UCHAR SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */
+- UCHAR SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */
+- UCHAR ReservedforChannel1[2]; /* 2A - Reserved */
++ u8 Target00Config; /* 16 - Channel 0 Target 0 config */
++ u8 Target01Config; /* 17 - Channel 0 Target 1 config */
++ u8 Target02Config; /* 18 - Channel 0 Target 2 config */
++ u8 Target03Config; /* 19 - Channel 0 Target 3 config */
++ u8 Target04Config; /* 1A - Channel 0 Target 4 config */
++ u8 Target05Config; /* 1B - Channel 0 Target 5 config */
++ u8 Target06Config; /* 1C - Channel 0 Target 6 config */
++ u8 Target07Config; /* 1D - Channel 0 Target 7 config */
++ u8 Target08Config; /* 1E - Channel 0 Target 8 config */
++ u8 Target09Config; /* 1F - Channel 0 Target 9 config */
++ u8 Target0AConfig; /* 20 - Channel 0 Target A config */
++ u8 Target0BConfig; /* 21 - Channel 0 Target B config */
++ u8 Target0CConfig; /* 22 - Channel 0 Target C config */
++ u8 Target0DConfig; /* 23 - Channel 0 Target D config */
++ u8 Target0EConfig; /* 24 - Channel 0 Target E config */
++ u8 Target0FConfig; /* 25 - Channel 0 Target F config */
++
++ u8 SCSI1Id; /* 26 - Channel 1 SCSI ID */
++ u8 SCSI1Config; /* 27 - Channel 1 SCSI configuration */
++ u8 SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */
++ u8 SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */
++ u8 ReservedforChannel1[2]; /* 2A - Reserved */
+
+ /* ----SCSI target Structure ---- */
+ /* from "CTRL-I SCSI device SetUp menu " */
+- UCHAR Target10Config; /* 2C - Channel 1 Target 0 config */
+- UCHAR Target11Config; /* 2D - Channel 1 Target 1 config */
+- UCHAR Target12Config; /* 2E - Channel 1 Target 2 config */
+- UCHAR Target13Config; /* 2F - Channel 1 Target 3 config */
+- UCHAR Target14Config; /* 30 - Channel 1 Target 4 config */
+- UCHAR Target15Config; /* 31 - Channel 1 Target 5 config */
+- UCHAR Target16Config; /* 32 - Channel 1 Target 6 config */
+- UCHAR Target17Config; /* 33 - Channel 1 Target 7 config */
+- UCHAR Target18Config; /* 34 - Channel 1 Target 8 config */
+- UCHAR Target19Config; /* 35 - Channel 1 Target 9 config */
+- UCHAR Target1AConfig; /* 36 - Channel 1 Target A config */
+- UCHAR Target1BConfig; /* 37 - Channel 1 Target B config */
+- UCHAR Target1CConfig; /* 38 - Channel 1 Target C config */
+- UCHAR Target1DConfig; /* 39 - Channel 1 Target D config */
+- UCHAR Target1EConfig; /* 3A - Channel 1 Target E config */
+- UCHAR Target1FConfig; /* 3B - Channel 1 Target F config */
+- UCHAR reserved[3]; /* 3C - Reserved */
++ u8 Target10Config; /* 2C - Channel 1 Target 0 config */
++ u8 Target11Config; /* 2D - Channel 1 Target 1 config */
++ u8 Target12Config; /* 2E - Channel 1 Target 2 config */
++ u8 Target13Config; /* 2F - Channel 1 Target 3 config */
++ u8 Target14Config; /* 30 - Channel 1 Target 4 config */
++ u8 Target15Config; /* 31 - Channel 1 Target 5 config */
++ u8 Target16Config; /* 32 - Channel 1 Target 6 config */
++ u8 Target17Config; /* 33 - Channel 1 Target 7 config */
++ u8 Target18Config; /* 34 - Channel 1 Target 8 config */
++ u8 Target19Config; /* 35 - Channel 1 Target 9 config */
++ u8 Target1AConfig; /* 36 - Channel 1 Target A config */
++ u8 Target1BConfig; /* 37 - Channel 1 Target B config */
++ u8 Target1CConfig; /* 38 - Channel 1 Target C config */
++ u8 Target1DConfig; /* 39 - Channel 1 Target D config */
++ u8 Target1EConfig; /* 3A - Channel 1 Target E config */
++ u8 Target1FConfig; /* 3B - Channel 1 Target F config */
++ u8 reserved[3]; /* 3C - Reserved */
+ /* ---------- CheckSum ---------- */
+- UCHAR CheckSum; /* 3F - Checksum of NVRam */
+-} NVRAM, *PNVRAM;
++ u8 CheckSum; /* 3F - Checksum of NVRam */
++};
+
+ /* Bios Configuration for nvram->BIOSConfig1 */
+ #define NBC_BIOSENABLE 0x01 /* BIOS enable */
+@@ -407,10 +369,3 @@
+ #define NCC_RESET_TIME 0x0A /* SCSI RESET recovering time */
+ #define NTC_DEFAULT (NTC_1GIGA | NTC_NO_WIDESYNC | NTC_DISC_ENABLE)
+
+-#define ORC_RD(x,y) (UCHAR)(inb( (int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-#define ORC_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-#define ORC_RDLONG(x,y) (long)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-
+-#define ORC_WR( adr,data) outb( (UCHAR)(data), (int)(adr))
+-#define ORC_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr))
+-#define ORC_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr))
+diff -Nurb linux-2.6.22-570/drivers/scsi/a4000t.c linux-2.6.22-591/drivers/scsi/a4000t.c
+--- linux-2.6.22-570/drivers/scsi/a4000t.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/a4000t.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,143 @@
++/*
++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
++ * Amiga Technologies A4000T SCSI controller.
++ *
++ * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
++ * plus modifications of the 53c7xx.c driver to support the Amiga.
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/amigahw.h>
++#include <asm/amigaints.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++
++static struct scsi_host_template a4000t_scsi_driver_template = {
++ .name = "A4000T builtin SCSI",
++ .proc_name = "A4000t",
++ .this_id = 7,
++ .module = THIS_MODULE,
++};
++
++static struct platform_device *a4000t_scsi_device;
++
++#define A4000T_SCSI_ADDR 0xdd0040
++
++static int __devinit a4000t_probe(struct device *dev)
++{
++ struct Scsi_Host * host = NULL;
++ struct NCR_700_Host_Parameters *hostdata;
++
++ if (!(MACH_IS_AMIGA && AMIGAHW_PRESENT(A4000_SCSI)))
++ goto out;
++
++ if (!request_mem_region(A4000T_SCSI_ADDR, 0x1000,
++ "A4000T builtin SCSI"))
++ goto out;
++
++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++ if (hostdata == NULL) {
++ printk(KERN_ERR "a4000t-scsi: Failed to allocate host data\n");
++ goto out_release;
++ }
++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++ /* Fill in the required pieces of hostdata */
++ hostdata->base = (void __iomem *)ZTWO_VADDR(A4000T_SCSI_ADDR);
++ hostdata->clock = 50;
++ hostdata->chip710 = 1;
++ hostdata->dmode_extra = DMODE_FC2;
++ hostdata->dcntl_extra = EA_710;
++
++ /* and register the chip */
++ host = NCR_700_detect(&a4000t_scsi_driver_template, hostdata, dev);
++ if (!host) {
++ printk(KERN_ERR "a4000t-scsi: No host detected; "
++ "board configuration problem?\n");
++ goto out_free;
++ }
++
++ host->this_id = 7;
++ host->base = A4000T_SCSI_ADDR;
++ host->irq = IRQ_AMIGA_PORTS;
++
++ if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "a4000t-scsi",
++ host)) {
++ printk(KERN_ERR "a4000t-scsi: request_irq failed\n");
++ goto out_put_host;
++ }
++
++ scsi_scan_host(host);
++
++ return 0;
++
++ out_put_host:
++ scsi_host_put(host);
++ out_free:
++ kfree(hostdata);
++ out_release:
++ release_mem_region(A4000T_SCSI_ADDR, 0x1000);
++ out:
++ return -ENODEV;
++}
++
++static __devexit int a4000t_device_remove(struct device *dev)
++{
++ struct Scsi_Host *host = dev_to_shost(dev);
++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++ scsi_remove_host(host);
++
++ NCR_700_release(host);
++ kfree(hostdata);
++ free_irq(host->irq, host);
++ release_mem_region(A4000T_SCSI_ADDR, 0x1000);
++
++ return 0;
++}
++
++static struct device_driver a4000t_scsi_driver = {
++ .name = "a4000t-scsi",
++ .bus = &platform_bus_type,
++ .probe = a4000t_probe,
++ .remove = __devexit_p(a4000t_device_remove),
++};
++
++static int __init a4000t_scsi_init(void)
++{
++ int err;
++
++ err = driver_register(&a4000t_scsi_driver);
++ if (err)
++ return err;
++
++ a4000t_scsi_device = platform_device_register_simple("a4000t-scsi",
++ -1, NULL, 0);
++ if (IS_ERR(a4000t_scsi_device)) {
++ driver_unregister(&a4000t_scsi_driver);
++ return PTR_ERR(a4000t_scsi_device);
++ }
++
++ return err;
++}
++
++static void __exit a4000t_scsi_exit(void)
++{
++ platform_device_unregister(a4000t_scsi_device);
++ driver_unregister(&a4000t_scsi_driver);
++}
++
++module_init(a4000t_scsi_init);
++module_exit(a4000t_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aachba.c linux-2.6.22-591/drivers/scsi/aacraid/aachba.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/aachba.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aacraid/aachba.c 2007-12-21 15:36:12.000000000 -0500
+@@ -169,6 +169,18 @@
+ module_param(acbsize, int, S_IRUGO|S_IWUSR);
+ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
+
++int update_interval = 30 * 60;
++module_param(update_interval, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter.");
++
++int check_interval = 24 * 60 * 60;
++module_param(check_interval, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks.");
++
++int check_reset = 1;
++module_param(check_reset, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter.");
++
+ int expose_physicals = -1;
+ module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
+ MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on");
+@@ -312,11 +324,10 @@
+
+ if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS)
+ maximum_num_containers = MAXIMUM_NUM_CONTAINERS;
+- fsa_dev_ptr = kmalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers,
++ fsa_dev_ptr = kzalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers,
+ GFP_KERNEL);
+ if (!fsa_dev_ptr)
+ return -ENOMEM;
+- memset(fsa_dev_ptr, 0, sizeof(*fsa_dev_ptr) * maximum_num_containers);
+
+ dev->fsa_dev = fsa_dev_ptr;
+ dev->maximum_num_containers = maximum_num_containers;
+@@ -344,20 +355,15 @@
+ {
+ void *buf;
+ int transfer_len;
+- struct scatterlist *sg = scsicmd->request_buffer;
++ struct scatterlist *sg = scsi_sglist(scsicmd);
+
+- if (scsicmd->use_sg) {
+ buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ transfer_len = min(sg->length, len + offset);
+- } else {
+- buf = scsicmd->request_buffer;
+- transfer_len = min(scsicmd->request_bufflen, len + offset);
+- }
++
+ transfer_len -= offset;
+ if (buf && transfer_len > 0)
+ memcpy(buf + offset, data, transfer_len);
+
+- if (scsicmd->use_sg)
+ kunmap_atomic(buf - sg->offset, KM_IRQ0);
+
+ }
+@@ -451,7 +457,7 @@
+ {
+ struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
+
+- if (fsa_dev_ptr[scmd_id(scsicmd)].valid)
++ if ((fsa_dev_ptr[scmd_id(scsicmd)].valid & 1))
+ return aac_scsi_cmd(scsicmd);
+
+ scsicmd->result = DID_NO_CONNECT << 16;
+@@ -459,18 +465,18 @@
+ return 0;
+ }
+
+-static int _aac_probe_container2(void * context, struct fib * fibptr)
++static void _aac_probe_container2(void * context, struct fib * fibptr)
+ {
+ struct fsa_dev_info *fsa_dev_ptr;
+ int (*callback)(struct scsi_cmnd *);
+ struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context;
+
+- if (!aac_valid_context(scsicmd, fibptr))
+- return 0;
+
+- fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
++ if (!aac_valid_context(scsicmd, fibptr))
++ return;
+
+ scsicmd->SCp.Status = 0;
++ fsa_dev_ptr = fibptr->dev->fsa_dev;
+ if (fsa_dev_ptr) {
+ struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr);
+ fsa_dev_ptr += scmd_id(scsicmd);
+@@ -493,10 +499,11 @@
+ aac_fib_free(fibptr);
+ callback = (int (*)(struct scsi_cmnd *))(scsicmd->SCp.ptr);
+ scsicmd->SCp.ptr = NULL;
+- return (*callback)(scsicmd);
++ (*callback)(scsicmd);
++ return;
+ }
+
+-static int _aac_probe_container1(void * context, struct fib * fibptr)
++static void _aac_probe_container1(void * context, struct fib * fibptr)
+ {
+ struct scsi_cmnd * scsicmd;
+ struct aac_mount * dresp;
+@@ -506,13 +513,14 @@
+ dresp = (struct aac_mount *) fib_data(fibptr);
+ dresp->mnt[0].capacityhigh = 0;
+ if ((le32_to_cpu(dresp->status) != ST_OK) ||
+- (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE))
+- return _aac_probe_container2(context, fibptr);
++ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
++ _aac_probe_container2(context, fibptr);
++ return;
++ }
+ scsicmd = (struct scsi_cmnd *) context;
+- scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
+
+ if (!aac_valid_context(scsicmd, fibptr))
+- return 0;
++ return;
+
+ aac_fib_init(fibptr);
+
+@@ -527,21 +535,18 @@
+ sizeof(struct aac_query_mount),
+ FsaNormal,
+ 0, 1,
+- (fib_callback) _aac_probe_container2,
++ _aac_probe_container2,
+ (void *) scsicmd);
+ /*
+ * Check that the command queued to the controller
+ */
+- if (status == -EINPROGRESS) {
++ if (status == -EINPROGRESS)
+ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE;
+- return 0;
+- }
+- if (status < 0) {
++ else if (status < 0) {
+ /* Inherit results from VM_NameServe, if any */
+ dresp->status = cpu_to_le32(ST_OK);
+- return _aac_probe_container2(context, fibptr);
++ _aac_probe_container2(context, fibptr);
+ }
+- return 0;
+ }
+
+ static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(struct scsi_cmnd *))
+@@ -566,7 +571,7 @@
+ sizeof(struct aac_query_mount),
+ FsaNormal,
+ 0, 1,
+- (fib_callback) _aac_probe_container1,
++ _aac_probe_container1,
+ (void *) scsicmd);
+ /*
+ * Check that the command queued to the controller
+@@ -620,7 +625,7 @@
+ return -ENOMEM;
+ }
+ scsicmd->list.next = NULL;
+- scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))_aac_probe_container1;
++ scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))aac_probe_container_callback1;
+
+ scsicmd->device = scsidev;
+ scsidev->sdev_state = 0;
+@@ -825,7 +830,7 @@
+ readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
+ readcmd->count = cpu_to_le32(count<<9);
+ readcmd->cid = cpu_to_le16(scmd_id(cmd));
+- readcmd->flags = cpu_to_le16(1);
++ readcmd->flags = cpu_to_le16(IO_TYPE_READ);
+ readcmd->bpTotal = 0;
+ readcmd->bpComplete = 0;
+
+@@ -904,7 +909,7 @@
+ (void *) cmd);
+ }
+
+-static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ u16 fibsize;
+ struct aac_raw_io *writecmd;
+@@ -914,7 +919,9 @@
+ writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
+ writecmd->count = cpu_to_le32(count<<9);
+ writecmd->cid = cpu_to_le16(scmd_id(cmd));
+- writecmd->flags = 0;
++ writecmd->flags = fua ?
++ cpu_to_le16(IO_TYPE_WRITE|IO_SUREWRITE) :
++ cpu_to_le16(IO_TYPE_WRITE);
+ writecmd->bpTotal = 0;
+ writecmd->bpComplete = 0;
+
+@@ -933,7 +940,7 @@
+ (void *) cmd);
+ }
+
+-static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ u16 fibsize;
+ struct aac_write64 *writecmd;
+@@ -964,7 +971,7 @@
+ (void *) cmd);
+ }
+
+-static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ u16 fibsize;
+ struct aac_write *writecmd;
+@@ -1041,7 +1048,7 @@
+ struct aac_srb * srbcmd = aac_scsi_common(fib, cmd);
+
+ aac_build_sg64(cmd, (struct sgmap64*) &srbcmd->sg);
+- srbcmd->count = cpu_to_le32(cmd->request_bufflen);
++ srbcmd->count = cpu_to_le32(scsi_bufflen(cmd));
+
+ memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len);
+@@ -1069,7 +1076,7 @@
+ struct aac_srb * srbcmd = aac_scsi_common(fib, cmd);
+
+ aac_build_sg(cmd, (struct sgmap*)&srbcmd->sg);
+- srbcmd->count = cpu_to_le32(cmd->request_bufflen);
++ srbcmd->count = cpu_to_le32(scsi_bufflen(cmd));
+
+ memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len);
+@@ -1172,6 +1179,7 @@
+ }
+
+ if (!dev->in_reset) {
++ char buffer[16];
+ tmp = le32_to_cpu(dev->adapter_info.kernelrev);
+ printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n",
+ dev->name,
+@@ -1192,16 +1200,23 @@
+ dev->name, dev->id,
+ tmp>>24,(tmp>>16)&0xff,tmp&0xff,
+ le32_to_cpu(dev->adapter_info.biosbuild));
+- if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+- printk(KERN_INFO "%s%d: serial %x\n",
+- dev->name, dev->id,
+- le32_to_cpu(dev->adapter_info.serial[0]));
++ buffer[0] = '\0';
++ if (aac_show_serial_number(
++ shost_to_class(dev->scsi_host_ptr), buffer))
++ printk(KERN_INFO "%s%d: serial %s",
++ dev->name, dev->id, buffer);
+ if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) {
+ printk(KERN_INFO "%s%d: TSID %.*s\n",
+ dev->name, dev->id,
+ (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
+ dev->supplement_adapter_info.VpdInfo.Tsid);
+ }
++ if (!check_reset ||
++ (dev->supplement_adapter_info.SupportedOptions2 &
++ le32_to_cpu(AAC_OPTION_IGNORE_RESET))) {
++ printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
++ dev->name, dev->id);
++ }
+ }
+
+ dev->nondasd_support = 0;
+@@ -1332,7 +1347,7 @@
+ if (!aac_valid_context(scsicmd, fibptr))
+ return;
+
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
++ dev = fibptr->dev;
+ cid = scmd_id(scsicmd);
+
+ if (nblank(dprintk(x))) {
+@@ -1372,15 +1387,8 @@
+
+ BUG_ON(fibptr == NULL);
+
+- if(scsicmd->use_sg)
+- pci_unmap_sg(dev->pdev,
+- (struct scatterlist *)scsicmd->request_buffer,
+- scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+- else if(scsicmd->request_bufflen)
+- pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle,
+- scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
++ scsi_dma_unmap(scsicmd);
++
+ readreply = (struct aac_read_reply *)fib_data(fibptr);
+ if (le32_to_cpu(readreply->status) == ST_OK)
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+@@ -1498,6 +1506,7 @@
+ {
+ u64 lba;
+ u32 count;
++ int fua;
+ int status;
+ struct aac_dev *dev;
+ struct fib * cmd_fibcontext;
+@@ -1512,6 +1521,7 @@
+ count = scsicmd->cmnd[4];
+ if (count == 0)
+ count = 256;
++ fua = 0;
+ } else if (scsicmd->cmnd[0] == WRITE_16) { /* 16 byte command */
+ dprintk((KERN_DEBUG "aachba: received a write(16) command on id %d.\n", scmd_id(scsicmd)));
+
+@@ -1524,6 +1534,7 @@
+ (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
+ count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16) |
+ (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13];
++ fua = scsicmd->cmnd[1] & 0x8;
+ } else if (scsicmd->cmnd[0] == WRITE_12) { /* 12 byte command */
+ dprintk((KERN_DEBUG "aachba: received a write(12) command on id %d.\n", scmd_id(scsicmd)));
+
+@@ -1531,10 +1542,12 @@
+ | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
+ | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++ fua = scsicmd->cmnd[1] & 0x8;
+ } else {
+ dprintk((KERN_DEBUG "aachba: received a write(10) command on id %d.\n", scmd_id(scsicmd)));
+ lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
++ fua = scsicmd->cmnd[1] & 0x8;
+ }
+ dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n",
+ smp_processor_id(), (unsigned long long)lba, jiffies));
+@@ -1549,7 +1562,7 @@
+ return 0;
+ }
+
+- status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count);
++ status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua);
+
+ /*
+ * Check that the command queued to the controller
+@@ -1592,7 +1605,7 @@
+ COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ else {
+ struct scsi_device *sdev = cmd->device;
+- struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
++ struct aac_dev *dev = fibptr->dev;
+ u32 cid = sdev_id(sdev);
+ printk(KERN_WARNING
+ "synchronize_callback: synchronize failed, status = %d\n",
+@@ -1699,7 +1712,7 @@
+
+ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
+ {
+- u32 cid = 0;
++ u32 cid;
+ struct Scsi_Host *host = scsicmd->device->host;
+ struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev;
+@@ -1711,15 +1724,15 @@
+ * Test does not apply to ID 16, the pseudo id for the controller
+ * itself.
+ */
+- if (scmd_id(scsicmd) != host->this_id) {
+- if ((scmd_channel(scsicmd) == CONTAINER_CHANNEL)) {
+- if((scmd_id(scsicmd) >= dev->maximum_num_containers) ||
++ cid = scmd_id(scsicmd);
++ if (cid != host->this_id) {
++ if (scmd_channel(scsicmd) == CONTAINER_CHANNEL) {
++ if((cid >= dev->maximum_num_containers) ||
+ (scsicmd->device->lun != 0)) {
+ scsicmd->result = DID_NO_CONNECT << 16;
+ scsicmd->scsi_done(scsicmd);
+ return 0;
+ }
+- cid = scmd_id(scsicmd);
+
+ /*
+ * If the target container doesn't exist, it may have
+@@ -1782,7 +1795,7 @@
+ {
+ struct inquiry_data inq_data;
+
+- dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scmd_id(scsicmd)));
++ dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", cid));
+ memset(&inq_data, 0, sizeof (struct inquiry_data));
+
+ inq_data.inqd_ver = 2; /* claim compliance to SCSI-2 */
+@@ -1794,7 +1807,7 @@
+ * Set the Vendor, Product, and Revision Level
+ * see: <vendor>.c i.e. aac.c
+ */
+- if (scmd_id(scsicmd) == host->this_id) {
++ if (cid == host->this_id) {
+ setinqstr(dev, (void *) (inq_data.inqd_vid), ARRAY_SIZE(container_types));
+ inq_data.inqd_pdt = INQD_PDT_PROC; /* Processor device */
+ aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data));
+@@ -1886,15 +1899,29 @@
+
+ case MODE_SENSE:
+ {
+- char mode_buf[4];
++ char mode_buf[7];
++ int mode_buf_length = 4;
+
+ dprintk((KERN_DEBUG "MODE SENSE command.\n"));
+ mode_buf[0] = 3; /* Mode data length */
+ mode_buf[1] = 0; /* Medium type - default */
+- mode_buf[2] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */
++ mode_buf[2] = 0; /* Device-specific param,
++ bit 8: 0/1 = write enabled/protected
++ bit 4: 0/1 = FUA enabled */
++ if (dev->raw_io_interface)
++ mode_buf[2] = 0x10;
+ mode_buf[3] = 0; /* Block descriptor length */
+-
+- aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf));
++ if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
++ ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
++ mode_buf[0] = 6;
++ mode_buf[4] = 8;
++ mode_buf[5] = 1;
++ mode_buf[6] = 0x04; /* WCE */
++ mode_buf_length = 7;
++ if (mode_buf_length > scsicmd->cmnd[4])
++ mode_buf_length = scsicmd->cmnd[4];
++ }
++ aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length);
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ scsicmd->scsi_done(scsicmd);
+
+@@ -1902,18 +1929,33 @@
+ }
+ case MODE_SENSE_10:
+ {
+- char mode_buf[8];
++ char mode_buf[11];
++ int mode_buf_length = 8;
+
+ dprintk((KERN_DEBUG "MODE SENSE 10 byte command.\n"));
+ mode_buf[0] = 0; /* Mode data length (MSB) */
+ mode_buf[1] = 6; /* Mode data length (LSB) */
+ mode_buf[2] = 0; /* Medium type - default */
+- mode_buf[3] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */
++ mode_buf[3] = 0; /* Device-specific param,
++ bit 8: 0/1 = write enabled/protected
++ bit 4: 0/1 = FUA enabled */
++ if (dev->raw_io_interface)
++ mode_buf[3] = 0x10;
+ mode_buf[4] = 0; /* reserved */
+ mode_buf[5] = 0; /* reserved */
+ mode_buf[6] = 0; /* Block descriptor length (MSB) */
+ mode_buf[7] = 0; /* Block descriptor length (LSB) */
+- aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf));
++ if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
++ ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
++ mode_buf[1] = 9;
++ mode_buf[8] = 8;
++ mode_buf[9] = 1;
++ mode_buf[10] = 0x04; /* WCE */
++ mode_buf_length = 11;
++ if (mode_buf_length > scsicmd->cmnd[8])
++ mode_buf_length = scsicmd->cmnd[8];
++ }
++ aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length);
+
+ scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ scsicmd->scsi_done(scsicmd);
+@@ -2136,10 +2178,10 @@
+ if (!aac_valid_context(scsicmd, fibptr))
+ return;
+
+- dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+-
+ BUG_ON(fibptr == NULL);
+
++ dev = fibptr->dev;
++
+ srbreply = (struct aac_srb_reply *) fib_data(fibptr);
+
+ scsicmd->sense_buffer[0] = '\0'; /* Initialize sense valid flag to false */
+@@ -2147,17 +2189,10 @@
+ * Calculate resid for sg
+ */
+
+- scsicmd->resid = scsicmd->request_bufflen -
+- le32_to_cpu(srbreply->data_xfer_length);
++ scsi_set_resid(scsicmd, scsi_bufflen(scsicmd)
++ - le32_to_cpu(srbreply->data_xfer_length));
+
+- if(scsicmd->use_sg)
+- pci_unmap_sg(dev->pdev,
+- (struct scatterlist *)scsicmd->request_buffer,
+- scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+- else if(scsicmd->request_bufflen)
+- pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
++ scsi_dma_unmap(scsicmd);
+
+ /*
+ * First check the fib status
+@@ -2233,7 +2268,7 @@
+ break;
+
+ case SRB_STATUS_BUSY:
+- scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
++ scsicmd->result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+ break;
+
+ case SRB_STATUS_BUS_RESET:
+@@ -2343,34 +2378,33 @@
+ {
+ struct aac_dev *dev;
+ unsigned long byte_count = 0;
++ int nseg;
+
+ dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ // Get rid of old data
+ psg->count = 0;
+ psg->sg[0].addr = 0;
+ psg->sg[0].count = 0;
+- if (scsicmd->use_sg) {
++
++ nseg = scsi_dma_map(scsicmd);
++ BUG_ON(nseg < 0);
++ if (nseg) {
+ struct scatterlist *sg;
+ int i;
+- int sg_count;
+- sg = (struct scatterlist *) scsicmd->request_buffer;
+
+- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+- psg->count = cpu_to_le32(sg_count);
++ psg->count = cpu_to_le32(nseg);
+
+- for (i = 0; i < sg_count; i++) {
++ scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg));
+ psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+ byte_count += sg_dma_len(sg);
+- sg++;
+ }
+ /* hba wants the size to be exact */
+- if(byte_count > scsicmd->request_bufflen){
++ if (byte_count > scsi_bufflen(scsicmd)) {
+ u32 temp = le32_to_cpu(psg->sg[i-1].count) -
+- (byte_count - scsicmd->request_bufflen);
++ (byte_count - scsi_bufflen(scsicmd));
+ psg->sg[i-1].count = cpu_to_le32(temp);
+- byte_count = scsicmd->request_bufflen;
++ byte_count = scsi_bufflen(scsicmd);
+ }
+ /* Check for command underflow */
+ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2378,18 +2412,6 @@
+ byte_count, scsicmd->underflow);
+ }
+ }
+- else if(scsicmd->request_bufflen) {
+- u32 addr;
+- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+- scsicmd->request_buffer,
+- scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
+- addr = scsicmd->SCp.dma_handle;
+- psg->count = cpu_to_le32(1);
+- psg->sg[0].addr = cpu_to_le32(addr);
+- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+- byte_count = scsicmd->request_bufflen;
+- }
+ return byte_count;
+ }
+
+@@ -2399,6 +2421,7 @@
+ struct aac_dev *dev;
+ unsigned long byte_count = 0;
+ u64 addr;
++ int nseg;
+
+ dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ // Get rid of old data
+@@ -2406,31 +2429,28 @@
+ psg->sg[0].addr[0] = 0;
+ psg->sg[0].addr[1] = 0;
+ psg->sg[0].count = 0;
+- if (scsicmd->use_sg) {
++
++ nseg = scsi_dma_map(scsicmd);
++ BUG_ON(nseg < 0);
++ if (nseg) {
+ struct scatterlist *sg;
+ int i;
+- int sg_count;
+- sg = (struct scatterlist *) scsicmd->request_buffer;
+-
+- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+
+- for (i = 0; i < sg_count; i++) {
++ scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ int count = sg_dma_len(sg);
+ addr = sg_dma_address(sg);
+ psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff);
+ psg->sg[i].addr[1] = cpu_to_le32(addr>>32);
+ psg->sg[i].count = cpu_to_le32(count);
+ byte_count += count;
+- sg++;
+ }
+- psg->count = cpu_to_le32(sg_count);
++ psg->count = cpu_to_le32(nseg);
+ /* hba wants the size to be exact */
+- if(byte_count > scsicmd->request_bufflen){
++ if (byte_count > scsi_bufflen(scsicmd)) {
+ u32 temp = le32_to_cpu(psg->sg[i-1].count) -
+- (byte_count - scsicmd->request_bufflen);
++ (byte_count - scsi_bufflen(scsicmd));
+ psg->sg[i-1].count = cpu_to_le32(temp);
+- byte_count = scsicmd->request_bufflen;
++ byte_count = scsi_bufflen(scsicmd);
+ }
+ /* Check for command underflow */
+ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2438,26 +2458,13 @@
+ byte_count, scsicmd->underflow);
+ }
+ }
+- else if(scsicmd->request_bufflen) {
+- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+- scsicmd->request_buffer,
+- scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
+- addr = scsicmd->SCp.dma_handle;
+- psg->count = cpu_to_le32(1);
+- psg->sg[0].addr[0] = cpu_to_le32(addr & 0xffffffff);
+- psg->sg[0].addr[1] = cpu_to_le32(addr >> 32);
+- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);
+- byte_count = scsicmd->request_bufflen;
+- }
+ return byte_count;
+ }
+
+ static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg)
+ {
+- struct Scsi_Host *host = scsicmd->device->host;
+- struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ unsigned long byte_count = 0;
++ int nseg;
+
+ // Get rid of old data
+ psg->count = 0;
+@@ -2467,16 +2474,14 @@
+ psg->sg[0].addr[1] = 0;
+ psg->sg[0].count = 0;
+ psg->sg[0].flags = 0;
+- if (scsicmd->use_sg) {
++
++ nseg = scsi_dma_map(scsicmd);
++ BUG_ON(nseg < 0);
++ if (nseg) {
+ struct scatterlist *sg;
+ int i;
+- int sg_count;
+- sg = (struct scatterlist *) scsicmd->request_buffer;
+
+- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+- scsicmd->sc_data_direction);
+-
+- for (i = 0; i < sg_count; i++) {
++ scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ int count = sg_dma_len(sg);
+ u64 addr = sg_dma_address(sg);
+ psg->sg[i].next = 0;
+@@ -2486,15 +2491,14 @@
+ psg->sg[i].count = cpu_to_le32(count);
+ psg->sg[i].flags = 0;
+ byte_count += count;
+- sg++;
+ }
+- psg->count = cpu_to_le32(sg_count);
++ psg->count = cpu_to_le32(nseg);
+ /* hba wants the size to be exact */
+- if(byte_count > scsicmd->request_bufflen){
++ if (byte_count > scsi_bufflen(scsicmd)) {
+ u32 temp = le32_to_cpu(psg->sg[i-1].count) -
+- (byte_count - scsicmd->request_bufflen);
++ (byte_count - scsi_bufflen(scsicmd));
+ psg->sg[i-1].count = cpu_to_le32(temp);
+- byte_count = scsicmd->request_bufflen;
++ byte_count = scsi_bufflen(scsicmd);
+ }
+ /* Check for command underflow */
+ if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2502,24 +2506,6 @@
+ byte_count, scsicmd->underflow);
+ }
+ }
+- else if(scsicmd->request_bufflen) {
+- int count;
+- u64 addr;
+- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+- scsicmd->request_buffer,
+- scsicmd->request_bufflen,
+- scsicmd->sc_data_direction);
+- addr = scsicmd->SCp.dma_handle;
+- count = scsicmd->request_bufflen;
+- psg->count = cpu_to_le32(1);
+- psg->sg[0].next = 0;
+- psg->sg[0].prev = 0;
+- psg->sg[0].addr[1] = cpu_to_le32((u32)(addr>>32));
+- psg->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
+- psg->sg[0].count = cpu_to_le32(count);
+- psg->sg[0].flags = 0;
+- byte_count = scsicmd->request_bufflen;
+- }
+ return byte_count;
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h
+--- linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h 2007-12-21 15:36:12.000000000 -0500
+@@ -12,8 +12,8 @@
+ *----------------------------------------------------------------------------*/
+
+ #ifndef AAC_DRIVER_BUILD
+-# define AAC_DRIVER_BUILD 2437
+-# define AAC_DRIVER_BRANCH "-mh4"
++# define AAC_DRIVER_BUILD 2447
++# define AAC_DRIVER_BRANCH "-ms"
+ #endif
+ #define MAXIMUM_NUM_CONTAINERS 32
+
+@@ -464,12 +464,12 @@
+ int (*adapter_restart)(struct aac_dev *dev, int bled);
+ /* Transport operations */
+ int (*adapter_ioremap)(struct aac_dev * dev, u32 size);
+- irqreturn_t (*adapter_intr)(int irq, void *dev_id);
++ irq_handler_t adapter_intr;
+ /* Packet operations */
+ int (*adapter_deliver)(struct fib * fib);
+ int (*adapter_bounds)(struct aac_dev * dev, struct scsi_cmnd * cmd, u64 lba);
+ int (*adapter_read)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count);
+- int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count);
++ int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua);
+ int (*adapter_scsi)(struct fib * fib, struct scsi_cmnd * cmd);
+ /* Administrative operations */
+ int (*adapter_comm)(struct aac_dev * dev, int comm);
+@@ -860,10 +860,12 @@
+ __le32 FlashFirmwareBootBuild;
+ u8 MfgPcbaSerialNo[12];
+ u8 MfgWWNName[8];
+- __le32 MoreFeatureBits;
++ __le32 SupportedOptions2;
+ __le32 ReservedGrowth[1];
+ };
+ #define AAC_FEATURE_FALCON 0x00000010
++#define AAC_OPTION_MU_RESET 0x00000001
++#define AAC_OPTION_IGNORE_RESET 0x00000002
+ #define AAC_SIS_VERSION_V3 3
+ #define AAC_SIS_SLOT_UNKNOWN 0xFF
+
+@@ -1054,8 +1056,8 @@
+ #define aac_adapter_read(fib,cmd,lba,count) \
+ ((fib)->dev)->a_ops.adapter_read(fib,cmd,lba,count)
+
+-#define aac_adapter_write(fib,cmd,lba,count) \
+- ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count)
++#define aac_adapter_write(fib,cmd,lba,count,fua) \
++ ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count,fua)
+
+ #define aac_adapter_scsi(fib,cmd) \
+ ((fib)->dev)->a_ops.adapter_scsi(fib,cmd)
+@@ -1213,6 +1215,9 @@
+ __le32 block;
+ __le16 pad;
+ __le16 flags;
++#define IO_TYPE_WRITE 0x00000000
++#define IO_TYPE_READ 0x00000001
++#define IO_SUREWRITE 0x00000008
+ struct sgmap64 sg; // Must be last in struct because it is variable
+ };
+ struct aac_write_reply
+@@ -1257,6 +1262,19 @@
+ u8 data[16];
+ };
+
++#define CT_PAUSE_IO 65
++#define CT_RELEASE_IO 66
++struct aac_pause {
++ __le32 command; /* VM_ContainerConfig */
++ __le32 type; /* CT_PAUSE_IO */
++ __le32 timeout; /* 10ms ticks */
++ __le32 min;
++ __le32 noRescan;
++ __le32 parm3;
++ __le32 parm4;
++ __le32 count; /* sizeof(((struct aac_pause_reply *)NULL)->data) */
++};
++
+ struct aac_srb
+ {
+ __le32 function;
+@@ -1804,6 +1822,10 @@
+ int aac_get_containers(struct aac_dev *dev);
+ int aac_scsi_cmd(struct scsi_cmnd *cmd);
+ int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
++#ifndef shost_to_class
++#define shost_to_class(shost) &shost->shost_classdev
++#endif
++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf);
+ int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg);
+ int aac_rx_init(struct aac_dev *dev);
+ int aac_rkt_init(struct aac_dev *dev);
+@@ -1813,6 +1835,7 @@
+ unsigned int aac_response_normal(struct aac_queue * q);
+ unsigned int aac_command_normal(struct aac_queue * q);
+ unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
++int aac_reset_adapter(struct aac_dev * dev, int forced);
+ int aac_check_health(struct aac_dev * dev);
+ int aac_command_thread(void *data);
+ int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
+@@ -1832,3 +1855,6 @@
+ extern int expose_physicals;
+ extern int aac_reset_devices;
+ extern int aac_commit;
++extern int update_interval;
++extern int check_interval;
++extern int check_reset;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/commsup.c linux-2.6.22-591/drivers/scsi/aacraid/commsup.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/commsup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aacraid/commsup.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1021,7 +1021,7 @@
+
+ }
+
+-static int _aac_reset_adapter(struct aac_dev *aac)
++static int _aac_reset_adapter(struct aac_dev *aac, int forced)
+ {
+ int index, quirks;
+ int retval;
+@@ -1029,25 +1029,32 @@
+ struct scsi_device *dev;
+ struct scsi_cmnd *command;
+ struct scsi_cmnd *command_list;
++ int jafo = 0;
+
+ /*
+ * Assumptions:
+- * - host is locked.
++ * - host is locked, unless called by the aacraid thread.
++ * (a matter of convenience, due to legacy issues surrounding
++ * eh_host_adapter_reset).
+ * - in_reset is asserted, so no new i/o is getting to the
+ * card.
+- * - The card is dead.
++ * - The card is dead, or will be very shortly ;-/ so no new
++ * commands are completing in the interrupt service.
+ */
+ host = aac->scsi_host_ptr;
+ scsi_block_requests(host);
+ aac_adapter_disable_int(aac);
++ if (aac->thread->pid != current->pid) {
+ spin_unlock_irq(host->host_lock);
+ kthread_stop(aac->thread);
++ jafo = 1;
++ }
+
+ /*
+ * If a positive health, means in a known DEAD PANIC
+ * state and the adapter could be reset to `try again'.
+ */
+- retval = aac_adapter_restart(aac, aac_adapter_check_health(aac));
++ retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac));
+
+ if (retval)
+ goto out;
+@@ -1104,11 +1111,13 @@
+ if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
+ if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
+ goto out;
++ if (jafo) {
+ aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+ if (IS_ERR(aac->thread)) {
+ retval = PTR_ERR(aac->thread);
+ goto out;
+ }
++ }
+ (void)aac_get_adapter_info(aac);
+ quirks = aac_get_driver_ident(index)->quirks;
+ if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) {
+@@ -1150,7 +1159,98 @@
+ out:
+ aac->in_reset = 0;
+ scsi_unblock_requests(host);
++ if (jafo) {
+ spin_lock_irq(host->host_lock);
++ }
++ return retval;
++}
++
++int aac_reset_adapter(struct aac_dev * aac, int forced)
++{
++ unsigned long flagv = 0;
++ int retval;
++ struct Scsi_Host * host;
++
++ if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
++ return -EBUSY;
++
++ if (aac->in_reset) {
++ spin_unlock_irqrestore(&aac->fib_lock, flagv);
++ return -EBUSY;
++ }
++ aac->in_reset = 1;
++ spin_unlock_irqrestore(&aac->fib_lock, flagv);
++
++ /*
++ * Wait for all commands to complete to this specific
++ * target (block maximum 60 seconds). Although not necessary,
++ * it does make us a good storage citizen.
++ */
++ host = aac->scsi_host_ptr;
++ scsi_block_requests(host);
++ if (forced < 2) for (retval = 60; retval; --retval) {
++ struct scsi_device * dev;
++ struct scsi_cmnd * command;
++ int active = 0;
++
++ __shost_for_each_device(dev, host) {
++ spin_lock_irqsave(&dev->list_lock, flagv);
++ list_for_each_entry(command, &dev->cmd_list, list) {
++ if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
++ active++;
++ break;
++ }
++ }
++ spin_unlock_irqrestore(&dev->list_lock, flagv);
++ if (active)
++ break;
++
++ }
++ /*
++ * We can exit If all the commands are complete
++ */
++ if (active == 0)
++ break;
++ ssleep(1);
++ }
++
++ /* Quiesce build, flush cache, write through mode */
++ aac_send_shutdown(aac);
++ spin_lock_irqsave(host->host_lock, flagv);
++ retval = _aac_reset_adapter(aac, forced);
++ spin_unlock_irqrestore(host->host_lock, flagv);
++
++ if (retval == -ENODEV) {
++ /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */
++ struct fib * fibctx = aac_fib_alloc(aac);
++ if (fibctx) {
++ struct aac_pause *cmd;
++ int status;
++
++ aac_fib_init(fibctx);
++
++ cmd = (struct aac_pause *) fib_data(fibctx);
++
++ cmd->command = cpu_to_le32(VM_ContainerConfig);
++ cmd->type = cpu_to_le32(CT_PAUSE_IO);
++ cmd->timeout = cpu_to_le32(1);
++ cmd->min = cpu_to_le32(1);
++ cmd->noRescan = cpu_to_le32(1);
++ cmd->count = cpu_to_le32(0);
++
++ status = aac_fib_send(ContainerCommand,
++ fibctx,
++ sizeof(struct aac_pause),
++ FsaNormal,
++ -2 /* Timeout silently */, 1,
++ NULL, NULL);
++
++ if (status >= 0)
++ aac_fib_complete(fibctx);
++ aac_fib_free(fibctx);
++ }
++ }
++
+ return retval;
+ }
+
+@@ -1270,9 +1370,14 @@
+
+ printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
+
++ if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 &
++ le32_to_cpu(AAC_OPTION_IGNORE_RESET)))
++ goto out;
+ host = aac->scsi_host_ptr;
++ if (aac->thread->pid != current->pid)
+ spin_lock_irqsave(host->host_lock, flagv);
+- BlinkLED = _aac_reset_adapter(aac);
++ BlinkLED = _aac_reset_adapter(aac, 0);
++ if (aac->thread->pid != current->pid)
+ spin_unlock_irqrestore(host->host_lock, flagv);
+ return BlinkLED;
+
+@@ -1300,6 +1405,9 @@
+ struct aac_fib_context *fibctx;
+ unsigned long flags;
+ DECLARE_WAITQUEUE(wait, current);
++ unsigned long next_jiffies = jiffies + HZ;
++ unsigned long next_check_jiffies = next_jiffies;
++ long difference = HZ;
+
+ /*
+ * We can only have one thread per adapter for AIF's.
+@@ -1507,11 +1615,79 @@
+ * There are no more AIF's
+ */
+ spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags);
+- schedule();
+
+- if (kthread_should_stop())
++ /*
++ * Background activity
++ */
++ if ((time_before(next_check_jiffies,next_jiffies))
++ && ((difference = next_check_jiffies - jiffies) <= 0)) {
++ next_check_jiffies = next_jiffies;
++ if (aac_check_health(dev) == 0) {
++ difference = ((long)(unsigned)check_interval)
++ * HZ;
++ next_check_jiffies = jiffies + difference;
++ } else if (!dev->queues)
+ break;
++ }
++ if (!time_before(next_check_jiffies,next_jiffies)
++ && ((difference = next_jiffies - jiffies) <= 0)) {
++ struct timeval now;
++ int ret;
++
++ /* Don't even try to talk to adapter if its sick */
++ ret = aac_check_health(dev);
++ if (!ret && !dev->queues)
++ break;
++ next_check_jiffies = jiffies
++ + ((long)(unsigned)check_interval)
++ * HZ;
++ do_gettimeofday(&now);
++
++ /* Synchronize our watches */
++ if (((1000000 - (1000000 / HZ)) > now.tv_usec)
++ && (now.tv_usec > (1000000 / HZ)))
++ difference = (((1000000 - now.tv_usec) * HZ)
++ + 500000) / 1000000;
++ else if (ret == 0) {
++ struct fib *fibptr;
++
++ if ((fibptr = aac_fib_alloc(dev))) {
++ u32 * info;
++
++ aac_fib_init(fibptr);
++
++ info = (u32 *) fib_data(fibptr);
++ if (now.tv_usec > 500000)
++ ++now.tv_sec;
++
++ *info = cpu_to_le32(now.tv_sec);
++
++ (void)aac_fib_send(SendHostTime,
++ fibptr,
++ sizeof(*info),
++ FsaNormal,
++ 1, 1,
++ NULL,
++ NULL);
++ aac_fib_complete(fibptr);
++ aac_fib_free(fibptr);
++ }
++ difference = (long)(unsigned)update_interval*HZ;
++ } else {
++ /* retry shortly */
++ difference = 10 * HZ;
++ }
++ next_jiffies = jiffies + difference;
++ if (time_before(next_check_jiffies,next_jiffies))
++ difference = next_check_jiffies - jiffies;
++ }
++ if (difference <= 0)
++ difference = 1;
+ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(difference);
++
++ if (kthread_should_stop())
++ break;
+ }
+ if (dev->queues)
+ remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/linit.c linux-2.6.22-591/drivers/scsi/aacraid/linit.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/linit.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/aacraid/linit.c 2007-12-21 15:36:12.000000000 -0500
+@@ -39,10 +39,8 @@
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+-#include <linux/dma-mapping.h>
+ #include <linux/syscalls.h>
+ #include <linux/delay.h>
+-#include <linux/smp_lock.h>
+ #include <linux/kthread.h>
+ #include <asm/semaphore.h>
+
+@@ -223,12 +221,12 @@
+ { aac_rx_init, "percraid", "DELL ", "PERC 320/DC ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Perc 320/DC*/
+ { aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
+ { aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
+- { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell PERC2/QC */
++ { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_34SG }, /* Dell PERC2/QC */
+ { aac_sa_init, "hpnraid", "HP ", "NetRAID ", 4, AAC_QUIRK_34SG }, /* HP NetRAID-4M */
+
+ { aac_rx_init, "aacraid", "DELL ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell Catchall */
+ { aac_rx_init, "aacraid", "Legend ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend Catchall */
+- { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec Catch All */
++ { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Catch All */
+ { aac_rkt_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Rocket Catch All */
+ { aac_nark_init, "aacraid", "ADAPTEC ", "RAID ", 2 } /* Adaptec NEMER/ARK Catch All */
+ };
+@@ -403,10 +401,6 @@
+
+ static int aac_slave_configure(struct scsi_device *sdev)
+ {
+- if (sdev_channel(sdev) == CONTAINER_CHANNEL) {
+- sdev->skip_ms_page_8 = 1;
+- sdev->skip_ms_page_3f = 1;
+- }
+ if ((sdev->type == TYPE_DISK) &&
+ (sdev_channel(sdev) != CONTAINER_CHANNEL)) {
+ if (expose_physicals == 0)
+@@ -450,6 +444,43 @@
+ return 0;
+ }
+
++/**
++ * aac_change_queue_depth - alter queue depths
++ * @sdev: SCSI device we are considering
++ * @depth: desired queue depth
++ *
++ * Alters queue depths for target device based on the host adapter's
++ * total capacity and the queue depth supported by the target device.
++ */
++
++static int aac_change_queue_depth(struct scsi_device *sdev, int depth)
++{
++ if (sdev->tagged_supported && (sdev->type == TYPE_DISK) &&
++ (sdev_channel(sdev) == CONTAINER_CHANNEL)) {
++ struct scsi_device * dev;
++ struct Scsi_Host *host = sdev->host;
++ unsigned num = 0;
++
++ __shost_for_each_device(dev, host) {
++ if (dev->tagged_supported && (dev->type == TYPE_DISK) &&
++ (sdev_channel(dev) == CONTAINER_CHANNEL))
++ ++num;
++ ++num;
++ }
++ if (num >= host->can_queue)
++ num = host->can_queue - 1;
++ if (depth > (host->can_queue - num))
++ depth = host->can_queue - num;
++ if (depth > 256)
++ depth = 256;
++ else if (depth < 2)
++ depth = 2;
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth);
++ } else
++ scsi_adjust_queue_depth(sdev, 0, 1);
++ return sdev->queue_depth;
++}
++
+ static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
+ {
+ struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
+@@ -548,6 +579,14 @@
+ ssleep(1);
+ }
+ printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
++ /*
++ * This adapter needs a blind reset, only do so for Adapters that
++ * support a register, instead of a commanded, reset.
++ */
++ if ((aac->supplement_adapter_info.SupportedOptions2 &
++ le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) ==
++ le32_to_cpu(AAC_OPTION_MU_RESET))
++ aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */
+ return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
+ }
+
+@@ -735,15 +774,21 @@
+ return len;
+ }
+
+-static ssize_t aac_show_serial_number(struct class_device *class_dev,
+- char *buf)
++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf)
+ {
+ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
+ int len = 0;
+
+ if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+- len = snprintf(buf, PAGE_SIZE, "%x\n",
++ len = snprintf(buf, PAGE_SIZE, "%06X\n",
+ le32_to_cpu(dev->adapter_info.serial[0]));
++ if (len &&
++ !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[
++ sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)+2-len],
++ buf, len))
++ len = snprintf(buf, PAGE_SIZE, "%.*s\n",
++ (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo),
++ dev->supplement_adapter_info.MfgPcbaSerialNo);
+ return len;
+ }
+
+@@ -759,6 +804,31 @@
+ class_to_shost(class_dev)->max_id);
+ }
+
++static ssize_t aac_store_reset_adapter(struct class_device *class_dev,
++ const char *buf, size_t count)
++{
++ int retval = -EACCES;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return retval;
++ retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!');
++ if (retval >= 0)
++ retval = count;
++ return retval;
++}
++
++static ssize_t aac_show_reset_adapter(struct class_device *class_dev,
++ char *buf)
++{
++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++ int len, tmp;
++
++ tmp = aac_adapter_check_health(dev);
++ if ((tmp == 0) && dev->in_reset)
++ tmp = -EBUSY;
++ len = snprintf(buf, PAGE_SIZE, "0x%x", tmp);
++ return len;
++}
+
+ static struct class_device_attribute aac_model = {
+ .attr = {
+@@ -816,6 +886,14 @@
+ },
+ .show = aac_show_max_id,
+ };
++static struct class_device_attribute aac_reset = {
++ .attr = {
++ .name = "reset_host",
++ .mode = S_IWUSR|S_IRUGO,
++ },
++ .store = aac_store_reset_adapter,
++ .show = aac_show_reset_adapter,
++};
+
+ static struct class_device_attribute *aac_attrs[] = {
+ &aac_model,
+@@ -826,6 +904,7 @@
+ &aac_serial_number,
+ &aac_max_channel,
+ &aac_max_id,
++ &aac_reset,
+ NULL
+ };
+
+@@ -852,6 +931,7 @@
+ .bios_param = aac_biosparm,
+ .shost_attrs = aac_attrs,
+ .slave_configure = aac_slave_configure,
++ .change_queue_depth = aac_change_queue_depth,
+ .eh_abort_handler = aac_eh_abort,
+ .eh_host_reset_handler = aac_eh_reset,
+ .can_queue = AAC_NUM_IO_FIB,
+@@ -1090,7 +1170,7 @@
+ {
+ int error;
+
+- printk(KERN_INFO "Adaptec %s driver (%s)\n",
++ printk(KERN_INFO "Adaptec %s driver %s\n",
+ AAC_DRIVERNAME, aac_driver_version);
+
+ error = pci_register_driver(&aac_pci_driver);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/rx.c linux-2.6.22-591/drivers/scsi/aacraid/rx.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/rx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aacraid/rx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -464,6 +464,8 @@
+ {
+ u32 var;
+
++ if (!(dev->supplement_adapter_info.SupportedOptions2 &
++ le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) {
+ if (bled)
+ printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
+ dev->name, dev->id, bled);
+@@ -479,6 +481,7 @@
+
+ if (bled && (bled != -ETIMEDOUT))
+ return -EINVAL;
++ }
+ if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */
+ rx_writel(dev, MUnit.reserved2, 3);
+ msleep(5000); /* Delay 5 seconds */
+@@ -596,7 +599,7 @@
+ }
+ msleep(1);
+ }
+- if (restart)
++ if (restart && aac_commit)
+ aac_commit = 1;
+ /*
+ * Fill in the common function dispatch table.
+diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.c linux-2.6.22-591/drivers/scsi/advansys.c
+--- linux-2.6.22-570/drivers/scsi/advansys.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/advansys.c 2007-12-21 15:36:12.000000000 -0500
+@@ -798,7 +798,6 @@
+ #include <scsi/scsi_tcq.h>
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_host.h>
+-#include "advansys.h"
+ #ifdef CONFIG_PCI
+ #include <linux/pci.h>
+ #endif /* CONFIG_PCI */
+@@ -2014,7 +2013,7 @@
+ STATIC void AscEnableIsaDma(uchar);
+ #endif /* CONFIG_ISA */
+ STATIC ASC_DCNT AscGetMaxDmaCount(ushort);
+-
++static const char *advansys_info(struct Scsi_Host *shp);
+
+ /*
+ * --- Adv Library Constants and Macros
+@@ -3970,10 +3969,6 @@
+ ASC_IS_PCI,
+ };
+
+-/*
+- * Used with the LILO 'advansys' option to eliminate or
+- * limit I/O port probing at boot time, cf. advansys_setup().
+- */
+ STATIC int asc_iopflag = ASC_FALSE;
+ STATIC int asc_ioport[ASC_NUM_IOPORT_PROBE] = { 0, 0, 0, 0 };
+
+@@ -4055,10 +4050,6 @@
+ #endif /* ADVANSYS_DEBUG */
+
+
+-/*
+- * --- Linux 'struct scsi_host_template' and advansys_setup() Functions
+- */
+-
+ #ifdef CONFIG_PROC_FS
+ /*
+ * advansys_proc_info() - /proc/scsi/advansys/[0-(ASC_NUM_BOARD_SUPPORTED-1)]
+@@ -4080,7 +4071,7 @@
+ * if 'prtbuf' is too small it will not be overwritten. Instead the
+ * user just won't get all the available statistics.
+ */
+-int
++static int
+ advansys_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
+ off_t offset, int length, int inout)
+ {
+@@ -4296,7 +4287,7 @@
+ * it must not call SCSI mid-level functions including scsi_malloc()
+ * and scsi_free().
+ */
+-int __init
++static int __init
+ advansys_detect(struct scsi_host_template *tpnt)
+ {
+ static int detect_called = ASC_FALSE;
+@@ -5428,7 +5419,7 @@
+ *
+ * Release resources allocated for a single AdvanSys adapter.
+ */
+-int
++static int
+ advansys_release(struct Scsi_Host *shp)
+ {
+ asc_board_t *boardp;
+@@ -5475,7 +5466,7 @@
+ * Note: The information line should not exceed ASC_INFO_SIZE bytes,
+ * otherwise the static 'info' array will be overrun.
+ */
+-const char *
++static const char *
+ advansys_info(struct Scsi_Host *shp)
+ {
+ static char info[ASC_INFO_SIZE];
+@@ -5568,7 +5559,7 @@
+ * This function always returns 0. Command return status is saved
+ * in the 'scp' result field.
+ */
+-int
++static int
+ advansys_queuecommand(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
+ {
+ struct Scsi_Host *shp;
+@@ -5656,7 +5647,7 @@
+ * sleeping is allowed and no locking other than for host structures is
+ * required. Returns SUCCESS or FAILED.
+ */
+-int
++static int
+ advansys_reset(struct scsi_cmnd *scp)
+ {
+ struct Scsi_Host *shp;
+@@ -5841,7 +5832,7 @@
+ * ip[1]: sectors
+ * ip[2]: cylinders
+ */
+-int
++static int
+ advansys_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ sector_t capacity, int ip[])
+ {
+@@ -5875,82 +5866,6 @@
+ }
+
+ /*
+- * advansys_setup()
+- *
+- * This function is called from init/main.c at boot time.
+- * It it passed LILO parameters that can be set from the
+- * LILO command line or in /etc/lilo.conf.
+- *
+- * It is used by the AdvanSys driver to either disable I/O
+- * port scanning or to limit scanning to 1 - 4 I/O ports.
+- * Regardless of the option setting EISA and PCI boards
+- * will still be searched for and detected. This option
+- * only affects searching for ISA and VL boards.
+- *
+- * If ADVANSYS_DEBUG is defined the driver debug level may
+- * be set using the 5th (ASC_NUM_IOPORT_PROBE + 1) I/O Port.
+- *
+- * Examples:
+- * 1. Eliminate I/O port scanning:
+- * boot: linux advansys=
+- * or
+- * boot: linux advansys=0x0
+- * 2. Limit I/O port scanning to one I/O port:
+- * boot: linux advansys=0x110
+- * 3. Limit I/O port scanning to four I/O ports:
+- * boot: linux advansys=0x110,0x210,0x230,0x330
+- * 4. If ADVANSYS_DEBUG, limit I/O port scanning to four I/O ports and
+- * set the driver debug level to 2.
+- * boot: linux advansys=0x110,0x210,0x230,0x330,0xdeb2
+- *
+- * ints[0] - number of arguments
+- * ints[1] - first argument
+- * ints[2] - second argument
+- * ...
+- */
+-void __init
+-advansys_setup(char *str, int *ints)
+-{
+- int i;
+-
+- if (asc_iopflag == ASC_TRUE) {
+- printk("AdvanSys SCSI: 'advansys' LILO option may appear only once\n");
+- return;
+- }
+-
+- asc_iopflag = ASC_TRUE;
+-
+- if (ints[0] > ASC_NUM_IOPORT_PROBE) {
+-#ifdef ADVANSYS_DEBUG
+- if ((ints[0] == ASC_NUM_IOPORT_PROBE + 1) &&
+- (ints[ASC_NUM_IOPORT_PROBE + 1] >> 4 == 0xdeb)) {
+- asc_dbglvl = ints[ASC_NUM_IOPORT_PROBE + 1] & 0xf;
+- } else {
+-#endif /* ADVANSYS_DEBUG */
+- printk("AdvanSys SCSI: only %d I/O ports accepted\n",
+- ASC_NUM_IOPORT_PROBE);
+-#ifdef ADVANSYS_DEBUG
+- }
+-#endif /* ADVANSYS_DEBUG */
+- }
+-
+-#ifdef ADVANSYS_DEBUG
+- ASC_DBG1(1, "advansys_setup: ints[0] %d\n", ints[0]);
+- for (i = 1; i < ints[0]; i++) {
+- ASC_DBG2(1, " ints[%d] 0x%x", i, ints[i]);
+- }
+- ASC_DBG(1, "\n");
+-#endif /* ADVANSYS_DEBUG */
+-
+- for (i = 1; i <= ints[0] && i <= ASC_NUM_IOPORT_PROBE; i++) {
+- asc_ioport[i-1] = ints[i];
+- ASC_DBG2(1, "advansys_setup: asc_ioport[%d] 0x%x\n",
+- i - 1, asc_ioport[i-1]);
+- }
+-}
+-
+-
+-/*
+ * --- Loadable Driver Support
+ */
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.h linux-2.6.22-591/drivers/scsi/advansys.h
+--- linux-2.6.22-570/drivers/scsi/advansys.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/advansys.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,36 +0,0 @@
+-/*
+- * advansys.h - Linux Host Driver for AdvanSys SCSI Adapters
+- *
+- * Copyright (c) 1995-2000 Advanced System Products, Inc.
+- * Copyright (c) 2000-2001 ConnectCom Solutions, Inc.
+- * All Rights Reserved.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that redistributions of source
+- * code retain the above copyright notice and this comment without
+- * modification.
+- *
+- * As of March 8, 2000 Advanced System Products, Inc. (AdvanSys)
+- * changed its name to ConnectCom Solutions, Inc.
+- *
+- */
+-
+-#ifndef _ADVANSYS_H
+-#define _ADVANSYS_H
+-
+-/*
+- * struct scsi_host_template function prototypes.
+- */
+-int advansys_detect(struct scsi_host_template *);
+-int advansys_release(struct Scsi_Host *);
+-const char *advansys_info(struct Scsi_Host *);
+-int advansys_queuecommand(struct scsi_cmnd *, void (* done)(struct scsi_cmnd *));
+-int advansys_reset(struct scsi_cmnd *);
+-int advansys_biosparam(struct scsi_device *, struct block_device *,
+- sector_t, int[]);
+-static int advansys_slave_configure(struct scsi_device *);
+-
+-/* init/main.c setup function */
+-void advansys_setup(char *, int *);
+-
+-#endif /* _ADVANSYS_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/aha152x.c linux-2.6.22-591/drivers/scsi/aha152x.c
+--- linux-2.6.22-570/drivers/scsi/aha152x.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aha152x.c 2007-12-21 15:36:12.000000000 -0500
+@@ -240,6 +240,7 @@
+ #include <linux/io.h>
+ #include <linux/blkdev.h>
+ #include <asm/system.h>
++#include <linux/completion.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/wait.h>
+@@ -253,7 +254,6 @@
+ #include <linux/spinlock.h>
+ #include <linux/workqueue.h>
+ #include <linux/list.h>
+-#include <asm/semaphore.h>
+ #include <scsi/scsicam.h>
+
+ #include "scsi.h"
+@@ -551,7 +551,7 @@
+ */
+ struct aha152x_scdata {
+ Scsi_Cmnd *next; /* next sc in queue */
+- struct semaphore *sem; /* semaphore to block on */
++ struct completion *done;/* semaphore to block on */
+ unsigned char cmd_len;
+ unsigned char cmnd[MAX_COMMAND_SIZE];
+ unsigned short use_sg;
+@@ -608,7 +608,7 @@
+
+ #define SCDATA(SCpnt) ((struct aha152x_scdata *) (SCpnt)->host_scribble)
+ #define SCNEXT(SCpnt) SCDATA(SCpnt)->next
+-#define SCSEM(SCpnt) SCDATA(SCpnt)->sem
++#define SCSEM(SCpnt) SCDATA(SCpnt)->done
+
+ #define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset))
+
+@@ -969,7 +969,8 @@
+ /*
+ * Queue a command and setup interrupts for a free bus.
+ */
+-static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct semaphore *sem, int phase, void (*done)(Scsi_Cmnd *))
++static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct completion *complete,
++ int phase, void (*done)(Scsi_Cmnd *))
+ {
+ struct Scsi_Host *shpnt = SCpnt->device->host;
+ unsigned long flags;
+@@ -1013,7 +1014,7 @@
+ }
+
+ SCNEXT(SCpnt) = NULL;
+- SCSEM(SCpnt) = sem;
++ SCSEM(SCpnt) = complete;
+
+ /* setup scratch area
+ SCp.ptr : buffer pointer
+@@ -1084,9 +1085,9 @@
+ DPRINTK(debug_eh, INFO_LEAD "reset_done called\n", CMDINFO(SCpnt));
+ #endif
+ if(SCSEM(SCpnt)) {
+- up(SCSEM(SCpnt));
++ complete(SCSEM(SCpnt));
+ } else {
+- printk(KERN_ERR "aha152x: reset_done w/o semaphore\n");
++ printk(KERN_ERR "aha152x: reset_done w/o completion\n");
+ }
+ }
+
+@@ -1139,21 +1140,6 @@
+ return FAILED;
+ }
+
+-static void timer_expired(unsigned long p)
+-{
+- Scsi_Cmnd *SCp = (Scsi_Cmnd *)p;
+- struct semaphore *sem = SCSEM(SCp);
+- struct Scsi_Host *shpnt = SCp->device->host;
+- unsigned long flags;
+-
+- /* remove command from issue queue */
+- DO_LOCK(flags);
+- remove_SC(&ISSUE_SC, SCp);
+- DO_UNLOCK(flags);
+-
+- up(sem);
+-}
+-
+ /*
+ * Reset a device
+ *
+@@ -1161,14 +1147,14 @@
+ static int aha152x_device_reset(Scsi_Cmnd * SCpnt)
+ {
+ struct Scsi_Host *shpnt = SCpnt->device->host;
+- DECLARE_MUTEX_LOCKED(sem);
+- struct timer_list timer;
++ DECLARE_COMPLETION(done);
+ int ret, issued, disconnected;
+ unsigned char old_cmd_len = SCpnt->cmd_len;
+ unsigned short old_use_sg = SCpnt->use_sg;
+ void *old_buffer = SCpnt->request_buffer;
+ unsigned old_bufflen = SCpnt->request_bufflen;
+ unsigned long flags;
++ unsigned long timeleft;
+
+ #if defined(AHA152X_DEBUG)
+ if(HOSTDATA(shpnt)->debug & debug_eh) {
+@@ -1192,15 +1178,15 @@
+ SCpnt->request_buffer = NULL;
+ SCpnt->request_bufflen = 0;
+
+- init_timer(&timer);
+- timer.data = (unsigned long) SCpnt;
+- timer.expires = jiffies + 100*HZ; /* 10s */
+- timer.function = (void (*)(unsigned long)) timer_expired;
+-
+- aha152x_internal_queue(SCpnt, &sem, resetting, reset_done);
+- add_timer(&timer);
+- down(&sem);
+- del_timer(&timer);
++ aha152x_internal_queue(SCpnt, &done, resetting, reset_done);
++
++ timeleft = wait_for_completion_timeout(&done, 100*HZ);
++ if (!timeleft) {
++ /* remove command from issue queue */
++ DO_LOCK(flags);
++ remove_SC(&ISSUE_SC, SCpnt);
++ DO_UNLOCK(flags);
++ }
+
+ SCpnt->cmd_len = old_cmd_len;
+ SCpnt->use_sg = old_use_sg;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aha1740.c linux-2.6.22-591/drivers/scsi/aha1740.c
+--- linux-2.6.22-570/drivers/scsi/aha1740.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aha1740.c 2007-12-21 15:36:12.000000000 -0500
+@@ -271,19 +271,7 @@
+ continue;
+ }
+ sgptr = (struct aha1740_sg *) SCtmp->host_scribble;
+- if (SCtmp->use_sg) {
+- /* We used scatter-gather.
+- Do the unmapping dance. */
+- dma_unmap_sg (&edev->dev,
+- (struct scatterlist *) SCtmp->request_buffer,
+- SCtmp->use_sg,
+- SCtmp->sc_data_direction);
+- } else {
+- dma_unmap_single (&edev->dev,
+- sgptr->buf_dma_addr,
+- SCtmp->request_bufflen,
+- DMA_BIDIRECTIONAL);
+- }
++ scsi_dma_unmap(SCtmp);
+
+ /* Free the sg block */
+ dma_free_coherent (&edev->dev,
+@@ -349,11 +337,9 @@
+ unchar target = scmd_id(SCpnt);
+ struct aha1740_hostdata *host = HOSTDATA(SCpnt->device->host);
+ unsigned long flags;
+- void *buff = SCpnt->request_buffer;
+- int bufflen = SCpnt->request_bufflen;
+ dma_addr_t sg_dma;
+ struct aha1740_sg *sgptr;
+- int ecbno;
++ int ecbno, nseg;
+ DEB(int i);
+
+ if(*cmd == REQUEST_SENSE) {
+@@ -424,23 +410,22 @@
+ sgptr = (struct aha1740_sg *) SCpnt->host_scribble;
+ sgptr->sg_dma_addr = sg_dma;
+
+- if (SCpnt->use_sg) {
+- struct scatterlist * sgpnt;
++ nseg = scsi_dma_map(SCpnt);
++ BUG_ON(nseg < 0);
++ if (nseg) {
++ struct scatterlist *sg;
+ struct aha1740_chain * cptr;
+- int i, count;
++ int i;
+ DEB(unsigned char * ptr);
+
+ host->ecb[ecbno].sg = 1; /* SCSI Initiator Command
+ * w/scatter-gather*/
+- sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+ cptr = sgptr->sg_chain;
+- count = dma_map_sg (&host->edev->dev, sgpnt, SCpnt->use_sg,
+- SCpnt->sc_data_direction);
+- for(i=0; i < count; i++) {
+- cptr[i].datalen = sg_dma_len (sgpnt + i);
+- cptr[i].dataptr = sg_dma_address (sgpnt + i);
++ scsi_for_each_sg(SCpnt, sg, nseg, i) {
++ cptr[i].datalen = sg_dma_len (sg);
++ cptr[i].dataptr = sg_dma_address (sg);
+ }
+- host->ecb[ecbno].datalen = count*sizeof(struct aha1740_chain);
++ host->ecb[ecbno].datalen = nseg * sizeof(struct aha1740_chain);
+ host->ecb[ecbno].dataptr = sg_dma;
+ #ifdef DEBUG
+ printk("cptr %x: ",cptr);
+@@ -448,11 +433,8 @@
+ for(i=0;i<24;i++) printk("%02x ", ptr[i]);
+ #endif
+ } else {
+- host->ecb[ecbno].datalen = bufflen;
+- sgptr->buf_dma_addr = dma_map_single (&host->edev->dev,
+- buff, bufflen,
+- DMA_BIDIRECTIONAL);
+- host->ecb[ecbno].dataptr = sgptr->buf_dma_addr;
++ host->ecb[ecbno].datalen = 0;
++ host->ecb[ecbno].dataptr = 0;
+ }
+ host->ecb[ecbno].lun = SCpnt->device->lun;
+ host->ecb[ecbno].ses = 1; /* Suppress underrun errors */
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -376,21 +376,10 @@
+ ahd_linux_unmap_scb(struct ahd_softc *ahd, struct scb *scb)
+ {
+ struct scsi_cmnd *cmd;
+- int direction;
+
+ cmd = scb->io_ctx;
+- direction = cmd->sc_data_direction;
+ ahd_sync_sglist(ahd, scb, BUS_DMASYNC_POSTWRITE);
+- if (cmd->use_sg != 0) {
+- struct scatterlist *sg;
+-
+- sg = (struct scatterlist *)cmd->request_buffer;
+- pci_unmap_sg(ahd->dev_softc, sg, cmd->use_sg, direction);
+- } else if (cmd->request_bufflen != 0) {
+- pci_unmap_single(ahd->dev_softc,
+- scb->platform_data->buf_busaddr,
+- cmd->request_bufflen, direction);
+- }
++ scsi_dma_unmap(cmd);
+ }
+
+ /******************************** Macros **************************************/
+@@ -1422,6 +1411,7 @@
+ u_int col_idx;
+ uint16_t mask;
+ unsigned long flags;
++ int nseg;
+
+ ahd_lock(ahd, &flags);
+
+@@ -1494,18 +1484,17 @@
+ ahd_set_residual(scb, 0);
+ ahd_set_sense_residual(scb, 0);
+ scb->sg_count = 0;
+- if (cmd->use_sg != 0) {
+- void *sg;
++
++ nseg = scsi_dma_map(cmd);
++ BUG_ON(nseg < 0);
++ if (nseg > 0) {
++ void *sg = scb->sg_list;
+ struct scatterlist *cur_seg;
+- u_int nseg;
+- int dir;
++ int i;
+
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- dir = cmd->sc_data_direction;
+- nseg = pci_map_sg(ahd->dev_softc, cur_seg,
+- cmd->use_sg, dir);
+ scb->platform_data->xfer_len = 0;
+- for (sg = scb->sg_list; nseg > 0; nseg--, cur_seg++) {
++
++ scsi_for_each_sg(cmd, cur_seg, nseg, i) {
+ dma_addr_t addr;
+ bus_size_t len;
+
+@@ -1513,22 +1502,8 @@
+ len = sg_dma_len(cur_seg);
+ scb->platform_data->xfer_len += len;
+ sg = ahd_sg_setup(ahd, scb, sg, addr, len,
+- /*last*/nseg == 1);
++ i == (nseg - 1));
+ }
+- } else if (cmd->request_bufflen != 0) {
+- void *sg;
+- dma_addr_t addr;
+- int dir;
+-
+- sg = scb->sg_list;
+- dir = cmd->sc_data_direction;
+- addr = pci_map_single(ahd->dev_softc,
+- cmd->request_buffer,
+- cmd->request_bufflen, dir);
+- scb->platform_data->xfer_len = cmd->request_bufflen;
+- scb->platform_data->buf_busaddr = addr;
+- sg = ahd_sg_setup(ahd, scb, sg, addr,
+- cmd->request_bufflen, /*last*/TRUE);
+ }
+
+ LIST_INSERT_HEAD(&ahd->pending_scbs, scb, pending_links);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-12-21 15:36:12.000000000 -0500
+@@ -781,7 +781,7 @@
+ static __inline
+ void ahd_set_residual(struct scb *scb, u_long resid)
+ {
+- scb->io_ctx->resid = resid;
++ scsi_set_resid(scb->io_ctx, resid);
+ }
+
+ static __inline
+@@ -793,7 +793,7 @@
+ static __inline
+ u_long ahd_get_residual(struct scb *scb)
+ {
+- return (scb->io_ctx->resid);
++ return scsi_get_resid(scb->io_ctx);
+ }
+
+ static __inline
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -402,18 +402,8 @@
+
+ cmd = scb->io_ctx;
+ ahc_sync_sglist(ahc, scb, BUS_DMASYNC_POSTWRITE);
+- if (cmd->use_sg != 0) {
+- struct scatterlist *sg;
+
+- sg = (struct scatterlist *)cmd->request_buffer;
+- pci_unmap_sg(ahc->dev_softc, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- } else if (cmd->request_bufflen != 0) {
+- pci_unmap_single(ahc->dev_softc,
+- scb->platform_data->buf_busaddr,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+ }
+
+ static __inline int
+@@ -1381,6 +1371,7 @@
+ struct ahc_tmode_tstate *tstate;
+ uint16_t mask;
+ struct scb_tailq *untagged_q = NULL;
++ int nseg;
+
+ /*
+ * Schedule us to run later. The only reason we are not
+@@ -1472,23 +1463,21 @@
+ ahc_set_residual(scb, 0);
+ ahc_set_sense_residual(scb, 0);
+ scb->sg_count = 0;
+- if (cmd->use_sg != 0) {
++
++ nseg = scsi_dma_map(cmd);
++ BUG_ON(nseg < 0);
++ if (nseg > 0) {
+ struct ahc_dma_seg *sg;
+ struct scatterlist *cur_seg;
+- struct scatterlist *end_seg;
+- int nseg;
++ int i;
+
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- nseg = pci_map_sg(ahc->dev_softc, cur_seg, cmd->use_sg,
+- cmd->sc_data_direction);
+- end_seg = cur_seg + nseg;
+ /* Copy the segments into the SG list. */
+ sg = scb->sg_list;
+ /*
+ * The sg_count may be larger than nseg if
+ * a transfer crosses a 32bit page.
+ */
+- while (cur_seg < end_seg) {
++ scsi_for_each_sg(cmd, cur_seg, nseg, i) {
+ dma_addr_t addr;
+ bus_size_t len;
+ int consumed;
+@@ -1499,7 +1488,6 @@
+ sg, addr, len);
+ sg += consumed;
+ scb->sg_count += consumed;
+- cur_seg++;
+ }
+ sg--;
+ sg->len |= ahc_htole32(AHC_DMA_LAST_SEG);
+@@ -1516,33 +1504,6 @@
+ */
+ scb->hscb->dataptr = scb->sg_list->addr;
+ scb->hscb->datacnt = scb->sg_list->len;
+- } else if (cmd->request_bufflen != 0) {
+- struct ahc_dma_seg *sg;
+- dma_addr_t addr;
+-
+- sg = scb->sg_list;
+- addr = pci_map_single(ahc->dev_softc,
+- cmd->request_buffer,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- scb->platform_data->buf_busaddr = addr;
+- scb->sg_count = ahc_linux_map_seg(ahc, scb,
+- sg, addr,
+- cmd->request_bufflen);
+- sg->len |= ahc_htole32(AHC_DMA_LAST_SEG);
+-
+- /*
+- * Reset the sg list pointer.
+- */
+- scb->hscb->sgptr =
+- ahc_htole32(scb->sg_list_phys | SG_FULL_RESID);
+-
+- /*
+- * Copy the first SG into the "current"
+- * data pointer area.
+- */
+- scb->hscb->dataptr = sg->addr;
+- scb->hscb->datacnt = sg->len;
+ } else {
+ scb->hscb->sgptr = ahc_htole32(SG_LIST_NULL);
+ scb->hscb->dataptr = 0;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-12-21 15:36:12.000000000 -0500
+@@ -751,7 +751,7 @@
+ static __inline
+ void ahc_set_residual(struct scb *scb, u_long resid)
+ {
+- scb->io_ctx->resid = resid;
++ scsi_set_resid(scb->io_ctx, resid);
+ }
+
+ static __inline
+@@ -763,7 +763,7 @@
+ static __inline
+ u_long ahc_get_residual(struct scb *scb)
+ {
+- return (scb->io_ctx->resid);
++ return scsi_get_resid(scb->io_ctx);
+ }
+
+ static __inline
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx_old.c linux-2.6.22-591/drivers/scsi/aic7xxx_old.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx_old.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/aic7xxx_old.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2690,17 +2690,8 @@
+ struct aic7xxx_scb *scbp;
+ unsigned char queue_depth;
+
+- if (cmd->use_sg > 1)
+- {
+- struct scatterlist *sg;
++ scsi_dma_unmap(cmd);
+
+- sg = (struct scatterlist *)cmd->request_buffer;
+- pci_unmap_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
+- }
+- else if (cmd->request_bufflen)
+- pci_unmap_single(p->pdev, aic7xxx_mapping(cmd),
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+ if (scb->flags & SCB_SENSE)
+ {
+ pci_unmap_single(p->pdev,
+@@ -3869,7 +3860,7 @@
+ * the mid layer didn't check residual data counts to see if the
+ * command needs retried.
+ */
+- cmd->resid = scb->sg_length - actual;
++ scsi_set_resid(cmd, scb->sg_length - actual);
+ aic7xxx_status(cmd) = hscb->target_status;
+ }
+ }
+@@ -10137,6 +10128,7 @@
+ struct scsi_device *sdptr = cmd->device;
+ unsigned char tindex = TARGET_INDEX(cmd);
+ struct request *req = cmd->request;
++ int use_sg;
+
+ mask = (0x01 << tindex);
+ hscb = scb->hscb;
+@@ -10209,8 +10201,10 @@
+ memcpy(scb->cmnd, cmd->cmnd, cmd->cmd_len);
+ hscb->SCSI_cmd_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, scb->cmnd));
+
+- if (cmd->use_sg)
+- {
++ use_sg = scsi_dma_map(cmd);
++ BUG_ON(use_sg < 0);
++
++ if (use_sg) {
+ struct scatterlist *sg; /* Must be mid-level SCSI code scatterlist */
+
+ /*
+@@ -10219,11 +10213,11 @@
+ * differences and the kernel SG list uses virtual addresses where
+ * we need physical addresses.
+ */
+- int i, use_sg;
++ int i;
+
+- sg = (struct scatterlist *)cmd->request_buffer;
+ scb->sg_length = 0;
+- use_sg = pci_map_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
++
++
+ /*
+ * Copy the segments into the SG array. NOTE!!! - We used to
+ * have the first entry both in the data_pointer area and the first
+@@ -10231,10 +10225,9 @@
+ * entry in both places, but now we download the address of
+ * scb->sg_list[1] instead of 0 to the sg pointer in the hscb.
+ */
+- for (i = 0; i < use_sg; i++)
+- {
+- unsigned int len = sg_dma_len(sg+i);
+- scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg+i));
++ scsi_for_each_sg(cmd, sg, use_sg, i) {
++ unsigned int len = sg_dma_len(sg);
++ scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg));
+ scb->sg_list[i].length = cpu_to_le32(len);
+ scb->sg_length += len;
+ }
+@@ -10244,26 +10237,7 @@
+ scb->sg_count = i;
+ hscb->SG_segment_count = i;
+ hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[1]));
+- }
+- else
+- {
+- if (cmd->request_bufflen)
+- {
+- unsigned int address = pci_map_single(p->pdev, cmd->request_buffer,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- aic7xxx_mapping(cmd) = address;
+- scb->sg_list[0].address = cpu_to_le32(address);
+- scb->sg_list[0].length = cpu_to_le32(cmd->request_bufflen);
+- scb->sg_count = 1;
+- scb->sg_length = cmd->request_bufflen;
+- hscb->SG_segment_count = 1;
+- hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[0]));
+- hscb->data_count = scb->sg_list[0].length;
+- hscb->data_pointer = scb->sg_list[0].address;
+- }
+- else
+- {
++ } else {
+ scb->sg_count = 0;
+ scb->sg_length = 0;
+ hscb->SG_segment_count = 0;
+@@ -10271,7 +10245,6 @@
+ hscb->data_count = 0;
+ hscb->data_pointer = 0;
+ }
+- }
+ }
+
+ /*+F*************************************************************************
+diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.c linux-2.6.22-591/drivers/scsi/amiga7xx.c
+--- linux-2.6.22-570/drivers/scsi/amiga7xx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/amiga7xx.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,138 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
+- * Amiga MacroSystemUS WarpEngine SCSI controller.
+- * Amiga Technologies A4000T SCSI controller.
+- * Amiga Technologies/DKB A4091 SCSI controller.
+- *
+- * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
+- * plus modifications of the 53c7xx.c driver to support the Amiga.
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-#include <linux/zorro.h>
+-#include <linux/stat.h>
+-
+-#include <asm/setup.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/amigaints.h>
+-#include <asm/amigahw.h>
+-#include <asm/dma.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "amiga7xx.h"
+-
+-
+-static int amiga7xx_register_one(struct scsi_host_template *tpnt,
+- unsigned long address)
+-{
+- long long options;
+- int clock;
+-
+- if (!request_mem_region(address, 0x1000, "ncr53c710"))
+- return 0;
+-
+- address = (unsigned long)z_ioremap(address, 0x1000);
+- options = OPTION_MEMORY_MAPPED | OPTION_DEBUG_TEST1 | OPTION_INTFLY |
+- OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS |
+- OPTION_DISCONNECT;
+- clock = 50000000; /* 50 MHz SCSI Clock */
+- ncr53c7xx_init(tpnt, 0, 710, address, 0, IRQ_AMIGA_PORTS, DMA_NONE,
+- options, clock);
+- return 1;
+-}
+-
+-
+-#ifdef CONFIG_ZORRO
+-
+-static struct {
+- zorro_id id;
+- unsigned long offset;
+- int absolute; /* offset is absolute address */
+-} amiga7xx_table[] = {
+- { .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, .offset = 0xf40000,
+- .absolute = 1 },
+- { .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, .offset = 0x40000 },
+- { .id = ZORRO_PROD_CBM_A4091_1, .offset = 0x800000 },
+- { .id = ZORRO_PROD_CBM_A4091_2, .offset = 0x800000 },
+- { .id = ZORRO_PROD_GVP_GFORCE_040_060, .offset = 0x40000 },
+- { 0 }
+-};
+-
+-static int __init amiga7xx_zorro_detect(struct scsi_host_template *tpnt)
+-{
+- int num = 0, i;
+- struct zorro_dev *z = NULL;
+- unsigned long address;
+-
+- while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
+- for (i = 0; amiga7xx_table[i].id; i++)
+- if (z->id == amiga7xx_table[i].id)
+- break;
+- if (!amiga7xx_table[i].id)
+- continue;
+- if (amiga7xx_table[i].absolute)
+- address = amiga7xx_table[i].offset;
+- else
+- address = z->resource.start + amiga7xx_table[i].offset;
+- num += amiga7xx_register_one(tpnt, address);
+- }
+- return num;
+-}
+-
+-#endif /* CONFIG_ZORRO */
+-
+-
+-int __init amiga7xx_detect(struct scsi_host_template *tpnt)
+-{
+- static unsigned char called = 0;
+- int num = 0;
+-
+- if (called || !MACH_IS_AMIGA)
+- return 0;
+-
+- tpnt->proc_name = "Amiga7xx";
+-
+- if (AMIGAHW_PRESENT(A4000_SCSI))
+- num += amiga7xx_register_one(tpnt, 0xdd0040);
+-
+-#ifdef CONFIG_ZORRO
+- num += amiga7xx_zorro_detect(tpnt);
+-#endif
+-
+- called = 1;
+- return num;
+-}
+-
+-static int amiga7xx_release(struct Scsi_Host *shost)
+-{
+- if (shost->irq)
+- free_irq(shost->irq, NULL);
+- if (shost->dma_channel != 0xff)
+- free_dma(shost->dma_channel);
+- if (shost->io_port && shost->n_io_port)
+- release_region(shost->io_port, shost->n_io_port);
+- scsi_unregister(shost);
+- return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+- .name = "Amiga NCR53c710 SCSI",
+- .detect = amiga7xx_detect,
+- .release = amiga7xx_release,
+- .queuecommand = NCR53c7xx_queue_command,
+- .abort = NCR53c7xx_abort,
+- .reset = NCR53c7xx_reset,
+- .can_queue = 24,
+- .this_id = 7,
+- .sg_tablesize = 63,
+- .cmd_per_lun = 3,
+- .use_clustering = DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.h linux-2.6.22-591/drivers/scsi/amiga7xx.h
+--- linux-2.6.22-570/drivers/scsi/amiga7xx.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/amiga7xx.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,23 +0,0 @@
+-#ifndef AMIGA7XX_H
+-
+-#include <linux/types.h>
+-
+-int amiga7xx_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* AMIGA7XX_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h 2007-12-21 15:36:12.000000000 -0500
+@@ -48,9 +48,10 @@
+
+ #define ARCMSR_MAX_OUTSTANDING_CMD 256
+ #define ARCMSR_MAX_FREECCB_NUM 288
+-#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.13"
++#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.14"
+ #define ARCMSR_SCSI_INITIATOR_ID 255
+ #define ARCMSR_MAX_XFER_SECTORS 512
++#define ARCMSR_MAX_XFER_SECTORS_B 4096
+ #define ARCMSR_MAX_TARGETID 17
+ #define ARCMSR_MAX_TARGETLUN 8
+ #define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD
+@@ -469,4 +470,3 @@
+ extern struct class_device_attribute *arcmsr_host_attrs[];
+ extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb);
+ void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb);
+-
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -59,8 +59,9 @@
+ struct class_device_attribute *arcmsr_host_attrs[];
+
+ static ssize_t
+-arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++arcmsr_sysfs_iop_message_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *host = class_to_shost(cdev);
+@@ -105,8 +106,9 @@
+ }
+
+ static ssize_t
+-arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++arcmsr_sysfs_iop_message_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *host = class_to_shost(cdev);
+@@ -152,8 +154,9 @@
+ }
+
+ static ssize_t
+-arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++arcmsr_sysfs_iop_message_clear(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *host = class_to_shost(cdev);
+@@ -188,7 +191,6 @@
+ .attr = {
+ .name = "mu_read",
+ .mode = S_IRUSR ,
+- .owner = THIS_MODULE,
+ },
+ .size = 1032,
+ .read = arcmsr_sysfs_iop_message_read,
+@@ -198,7 +200,6 @@
+ .attr = {
+ .name = "mu_write",
+ .mode = S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 1032,
+ .write = arcmsr_sysfs_iop_message_write,
+@@ -208,7 +209,6 @@
+ .attr = {
+ .name = "mu_clear",
+ .mode = S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 1,
+ .write = arcmsr_sysfs_iop_message_clear,
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c 2007-12-21 15:36:12.000000000 -0500
+@@ -57,6 +57,7 @@
+ #include <linux/dma-mapping.h>
+ #include <linux/timer.h>
+ #include <linux/pci.h>
++#include <linux/aer.h>
+ #include <asm/dma.h>
+ #include <asm/io.h>
+ #include <asm/system.h>
+@@ -71,7 +72,7 @@
+ #include "arcmsr.h"
+
+ MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
+-MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
++MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter");
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_VERSION(ARCMSR_DRIVER_VERSION);
+
+@@ -93,7 +94,9 @@
+ static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb);
+ static const char *arcmsr_info(struct Scsi_Host *);
+ static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
+-
++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
++ pci_channel_state_t state);
++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev);
+ static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
+ {
+ if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
+@@ -104,7 +107,8 @@
+
+ static struct scsi_host_template arcmsr_scsi_host_template = {
+ .module = THIS_MODULE,
+- .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION,
++ .name = "ARCMSR ARECA SATA/SAS RAID HOST Adapter"
++ ARCMSR_DRIVER_VERSION,
+ .info = arcmsr_info,
+ .queuecommand = arcmsr_queue_command,
+ .eh_abort_handler = arcmsr_abort,
+@@ -119,6 +123,10 @@
+ .use_clustering = ENABLE_CLUSTERING,
+ .shost_attrs = arcmsr_host_attrs,
+ };
++static struct pci_error_handlers arcmsr_pci_error_handlers = {
++ .error_detected = arcmsr_pci_error_detected,
++ .slot_reset = arcmsr_pci_slot_reset,
++};
+
+ static struct pci_device_id arcmsr_device_id_table[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
+@@ -144,7 +152,8 @@
+ .id_table = arcmsr_device_id_table,
+ .probe = arcmsr_probe,
+ .remove = arcmsr_remove,
+- .shutdown = arcmsr_shutdown
++ .shutdown = arcmsr_shutdown,
++ .err_handler = &arcmsr_pci_error_handlers,
+ };
+
+ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
+@@ -328,6 +337,8 @@
+
+ arcmsr_iop_init(acb);
+ pci_set_drvdata(pdev, host);
++ if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
++ host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B;
+
+ error = scsi_add_host(host, &pdev->dev);
+ if (error)
+@@ -338,6 +349,7 @@
+ goto out_free_sysfs;
+
+ scsi_scan_host(host);
++ pci_enable_pcie_error_reporting(pdev);
+ return 0;
+ out_free_sysfs:
+ out_free_irq:
+@@ -369,19 +381,9 @@
+
+ static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb)
+ {
+- struct AdapterControlBlock *acb = ccb->acb;
+ struct scsi_cmnd *pcmd = ccb->pcmd;
+
+- if (pcmd->use_sg != 0) {
+- struct scatterlist *sl;
+-
+- sl = (struct scatterlist *)pcmd->request_buffer;
+- pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction);
+- }
+- else if (pcmd->request_bufflen != 0)
+- pci_unmap_single(acb->pdev,
+- pcmd->SCp.dma_handle,
+- pcmd->request_bufflen, pcmd->sc_data_direction);
++ scsi_dma_unmap(pcmd);
+ }
+
+ static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag)
+@@ -498,7 +500,7 @@
+
+ static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb)
+ {
+- struct MessageUnit __iomem *reg=acb->pmu;
++ struct MessageUnit __iomem *reg = acb->pmu;
+
+ writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, ®->inbound_msgaddr0);
+ if (arcmsr_wait_msgint_ready(acb))
+@@ -551,6 +553,7 @@
+ int8_t *psge = (int8_t *)&arcmsr_cdb->u;
+ uint32_t address_lo, address_hi;
+ int arccdbsize = 0x30;
++ int nseg;
+
+ ccb->pcmd = pcmd;
+ memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB));
+@@ -561,20 +564,20 @@
+ arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len;
+ arcmsr_cdb->Context = (unsigned long)arcmsr_cdb;
+ memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len);
+- if (pcmd->use_sg) {
+- int length, sgcount, i, cdb_sgcount = 0;
+- struct scatterlist *sl;
+-
+- /* Get Scatter Gather List from scsiport. */
+- sl = (struct scatterlist *) pcmd->request_buffer;
+- sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg,
+- pcmd->sc_data_direction);
++
++ nseg = scsi_dma_map(pcmd);
++ BUG_ON(nseg < 0);
++
++ if (nseg) {
++ int length, i, cdb_sgcount = 0;
++ struct scatterlist *sg;
++
+ /* map stor port SG list to our iop SG List. */
+- for (i = 0; i < sgcount; i++) {
++ scsi_for_each_sg(pcmd, sg, nseg, i) {
+ /* Get the physical address of the current data pointer */
+- length = cpu_to_le32(sg_dma_len(sl));
+- address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl)));
+- address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl)));
++ length = cpu_to_le32(sg_dma_len(sg));
++ address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sg)));
++ address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sg)));
+ if (address_hi == 0) {
+ struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+
+@@ -591,32 +594,12 @@
+ psge += sizeof (struct SG64ENTRY);
+ arccdbsize += sizeof (struct SG64ENTRY);
+ }
+- sl++;
+ cdb_sgcount++;
+ }
+ arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount;
+- arcmsr_cdb->DataLength = pcmd->request_bufflen;
++ arcmsr_cdb->DataLength = scsi_bufflen(pcmd);
+ if ( arccdbsize > 256)
+ arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE;
+- } else if (pcmd->request_bufflen) {
+- dma_addr_t dma_addr;
+- dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer,
+- pcmd->request_bufflen, pcmd->sc_data_direction);
+- pcmd->SCp.dma_handle = dma_addr;
+- address_lo = cpu_to_le32(dma_addr_lo32(dma_addr));
+- address_hi = cpu_to_le32(dma_addr_hi32(dma_addr));
+- if (address_hi == 0) {
+- struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+- pdma_sg->address = address_lo;
+- pdma_sg->length = pcmd->request_bufflen;
+- } else {
+- struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+- pdma_sg->addresshigh = address_hi;
+- pdma_sg->address = address_lo;
+- pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR;
+- }
+- arcmsr_cdb->sgcount = 1;
+- arcmsr_cdb->DataLength = pcmd->request_bufflen;
+ }
+ if (pcmd->sc_data_direction == DMA_TO_DEVICE ) {
+ arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE;
+@@ -758,20 +741,20 @@
+ (flag_ccb << 5));
+ if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) {
+ if (ccb->startdone == ARCMSR_CCB_ABORTED) {
+- struct scsi_cmnd *abortcmd=ccb->pcmd;
++ struct scsi_cmnd *abortcmd = ccb->pcmd;
+ if (abortcmd) {
+ abortcmd->result |= DID_ABORT >> 16;
+ arcmsr_ccb_complete(ccb, 1);
+ printk(KERN_NOTICE
+- "arcmsr%d: ccb='0x%p' isr got aborted command \n"
++ "arcmsr%d: ccb ='0x%p' isr got aborted command \n"
+ , acb->host->host_no, ccb);
+ }
+ continue;
+ }
+ printk(KERN_NOTICE
+- "arcmsr%d: isr get an illegal ccb command done acb='0x%p'"
+- "ccb='0x%p' ccbacb='0x%p' startdone = 0x%x"
+- " ccboutstandingcount=%d \n"
++ "arcmsr%d: isr get an illegal ccb command done acb = '0x%p'"
++ "ccb = '0x%p' ccbacb = '0x%p' startdone = 0x%x"
++ " ccboutstandingcount = %d \n"
+ , acb->host->host_no
+ , acb
+ , ccb
+@@ -791,7 +774,7 @@
+ switch(ccb->arcmsr_cdb.DeviceStatus) {
+ case ARCMSR_DEV_SELECT_TIMEOUT: {
+ acb->devstate[id][lun] = ARECA_RAID_GONE;
+- ccb->pcmd->result = DID_TIME_OUT << 16;
++ ccb->pcmd->result = DID_NO_CONNECT << 16;
+ arcmsr_ccb_complete(ccb, 1);
+ }
+ break;
+@@ -810,8 +793,8 @@
+ break;
+ default:
+ printk(KERN_NOTICE
+- "arcmsr%d: scsi id=%d lun=%d"
+- " isr get command error done,"
++ "arcmsr%d: scsi id = %d lun = %d"
++ " isr get command error done, "
+ "but got unknown DeviceStatus = 0x%x \n"
+ , acb->host->host_no
+ , id
+@@ -848,24 +831,21 @@
+ struct CMD_MESSAGE_FIELD *pcmdmessagefld;
+ int retvalue = 0, transfer_len = 0;
+ char *buffer;
++ struct scatterlist *sg;
+ uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 |
+ (uint32_t ) cmd->cmnd[6] << 16 |
+ (uint32_t ) cmd->cmnd[7] << 8 |
+ (uint32_t ) cmd->cmnd[8];
+ /* 4 bytes: Areca io control code */
+- if (cmd->use_sg) {
+- struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+
++ sg = scsi_sglist(cmd);
+ buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+- if (cmd->use_sg > 1) {
++ if (scsi_sg_count(cmd) > 1) {
+ retvalue = ARCMSR_MESSAGE_FAIL;
+ goto message_out;
+ }
+ transfer_len += sg->length;
+- } else {
+- buffer = cmd->request_buffer;
+- transfer_len = cmd->request_bufflen;
+- }
++
+ if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) {
+ retvalue = ARCMSR_MESSAGE_FAIL;
+ goto message_out;
+@@ -1057,12 +1037,9 @@
+ retvalue = ARCMSR_MESSAGE_FAIL;
+ }
+ message_out:
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
+-
+- sg = (struct scatterlist *) cmd->request_buffer;
++ sg = scsi_sglist(cmd);
+ kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+- }
++
+ return retvalue;
+ }
+
+@@ -1085,6 +1062,7 @@
+ case INQUIRY: {
+ unsigned char inqdata[36];
+ char *buffer;
++ struct scatterlist *sg;
+
+ if (cmd->device->lun) {
+ cmd->result = (DID_TIME_OUT << 16);
+@@ -1096,7 +1074,7 @@
+ inqdata[1] = 0;
+ /* rem media bit & Dev Type Modifier */
+ inqdata[2] = 0;
+- /* ISO,ECMA,& ANSI versions */
++ /* ISO, ECMA, & ANSI versions */
+ inqdata[4] = 31;
+ /* length of additional data */
+ strncpy(&inqdata[8], "Areca ", 8);
+@@ -1104,21 +1082,14 @@
+ strncpy(&inqdata[16], "RAID controller ", 16);
+ /* Product Identification */
+ strncpy(&inqdata[32], "R001", 4); /* Product Revision */
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
+
+- sg = (struct scatterlist *) cmd->request_buffer;
++ sg = scsi_sglist(cmd);
+ buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+- } else {
+- buffer = cmd->request_buffer;
+- }
+- memcpy(buffer, inqdata, sizeof(inqdata));
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
+
+- sg = (struct scatterlist *) cmd->request_buffer;
++ memcpy(buffer, inqdata, sizeof(inqdata));
++ sg = scsi_sglist(cmd);
+ kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+- }
++
+ cmd->scsi_done(cmd);
+ }
+ break;
+@@ -1153,7 +1124,7 @@
+ , acb->host->host_no);
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
+- if(target == 16) {
++ if (target == 16) {
+ /* virtual device for iop message transfer */
+ arcmsr_handle_virtual_command(acb, cmd);
+ return 0;
+@@ -1166,7 +1137,7 @@
+ printk(KERN_NOTICE
+ "arcmsr%d: block 'read/write'"
+ "command with gone raid volume"
+- " Cmd=%2x, TargetId=%d, Lun=%d \n"
++ " Cmd = %2x, TargetId = %d, Lun = %d \n"
+ , acb->host->host_no
+ , cmd->cmnd[0]
+ , target, lun);
+@@ -1257,7 +1228,7 @@
+ if ((ccb->startdone == ARCMSR_CCB_ABORTED) ||
+ (ccb == poll_ccb)) {
+ printk(KERN_NOTICE
+- "arcmsr%d: scsi id=%d lun=%d ccb='0x%p'"
++ "arcmsr%d: scsi id = %d lun = %d ccb = '0x%p'"
+ " poll command abort successfully \n"
+ , acb->host->host_no
+ , ccb->pcmd->device->id
+@@ -1270,8 +1241,8 @@
+ }
+ printk(KERN_NOTICE
+ "arcmsr%d: polling get an illegal ccb"
+- " command done ccb='0x%p'"
+- "ccboutstandingcount=%d \n"
++ " command done ccb ='0x%p'"
++ "ccboutstandingcount = %d \n"
+ , acb->host->host_no
+ , ccb
+ , atomic_read(&acb->ccboutstandingcount));
+@@ -1288,7 +1259,7 @@
+ switch(ccb->arcmsr_cdb.DeviceStatus) {
+ case ARCMSR_DEV_SELECT_TIMEOUT: {
+ acb->devstate[id][lun] = ARECA_RAID_GONE;
+- ccb->pcmd->result = DID_TIME_OUT << 16;
++ ccb->pcmd->result = DID_NO_CONNECT << 16;
+ arcmsr_ccb_complete(ccb, 1);
+ }
+ break;
+@@ -1307,7 +1278,7 @@
+ break;
+ default:
+ printk(KERN_NOTICE
+- "arcmsr%d: scsi id=%d lun=%d"
++ "arcmsr%d: scsi id = %d lun = %d"
+ " polling and getting command error done"
+ "but got unknown DeviceStatus = 0x%x \n"
+ , acb->host->host_no
+@@ -1322,6 +1293,94 @@
+ }
+ }
+ }
++static void arcmsr_done4_abort_postqueue(struct AdapterControlBlock *acb)
++{
++ int i = 0, found = 0;
++ int id, lun;
++ uint32_t flag_ccb, outbound_intstatus;
++ struct MessageUnit __iomem *reg = acb->pmu;
++ struct CommandControlBlock *ccb;
++ /*clear and abort all outbound posted Q*/
++
++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) &&
++(i++ < 256)){
++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
++(flag_ccb << 5));
++ if (ccb){
++ if ((ccb->acb != acb)||(ccb->startdone != \
++ARCMSR_CCB_START)){
++ printk(KERN_NOTICE "arcmsr%d: polling get \
++an illegal ccb" "command done ccb = '0x%p'""ccboutstandingcount = %d \n",
++ acb->host->host_no, ccb,
++ atomic_read(&acb->ccboutstandingcount));
++ continue;
++ }
++
++ id = ccb->pcmd->device->id;
++ lun = ccb->pcmd->device->lun;
++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)){
++ if (acb->devstate[id][lun] == ARECA_RAID_GONE)
++ acb->devstate[id][lun] = ARECA_RAID_GOOD;
++ ccb->pcmd->result = DID_OK << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ else {
++ switch(ccb->arcmsr_cdb.DeviceStatus) {
++ case ARCMSR_DEV_SELECT_TIMEOUT: {
++ acb->devstate[id][lun] = ARECA_RAID_GONE;
++ ccb->pcmd->result = DID_NO_CONNECT << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_ABORTED:
++
++ case ARCMSR_DEV_INIT_FAIL: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_CHECK_CONDITION: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GOOD;
++ arcmsr_report_sense_info(ccb);
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ default:
++ printk(KERN_NOTICE
++ "arcmsr%d: scsi id = %d \
++ lun = %d""polling and \
++ getting command error \
++ done""but got unknown \
++ DeviceStatus = 0x%x \n",
++ acb->host->host_no, id,
++ lun, ccb->arcmsr_cdb.DeviceStatus);
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ break;
++ }
++ }
++ found = 1;
++ }
++ }
++ if (found){
++ outbound_intstatus = readl(®->outbound_intstatus) & \
++ acb->outbound_int_enable;
++ writel(outbound_intstatus, ®->outbound_intstatus);
++ /*clear interrupt*/
++ }
++ return;
++}
++
+
+ static void arcmsr_iop_init(struct AdapterControlBlock *acb)
+ {
+@@ -1355,7 +1414,6 @@
+
+ static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
+ {
+- struct MessageUnit __iomem *reg = acb->pmu;
+ struct CommandControlBlock *ccb;
+ uint32_t intmask_org;
+ int i = 0;
+@@ -1368,21 +1426,17 @@
+ /* disable all outbound interrupt */
+ intmask_org = arcmsr_disable_outbound_ints(acb);
+ /* clear all outbound posted Q */
+- for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+- readl(®->outbound_queueport);
++ arcmsr_done4_abort_postqueue(acb);
+ for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+ ccb = acb->pccb_pool[i];
+- if ((ccb->startdone == ARCMSR_CCB_START) ||
+- (ccb->startdone == ARCMSR_CCB_ABORTED)) {
++ if (ccb->startdone == ARCMSR_CCB_START) {
+ ccb->startdone = ARCMSR_CCB_ABORTED;
+- ccb->pcmd->result = DID_ABORT << 16;
+- arcmsr_ccb_complete(ccb, 1);
+ }
+ }
+ /* enable all outbound interrupt */
+ arcmsr_enable_outbound_ints(acb, intmask_org);
+ }
+- atomic_set(&acb->ccboutstandingcount, 0);
++
+ }
+
+ static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
+@@ -1428,10 +1482,9 @@
+ int i = 0;
+
+ printk(KERN_NOTICE
+- "arcmsr%d: abort device command of scsi id=%d lun=%d \n",
++ "arcmsr%d: abort device command of scsi id = %d lun = %d \n",
+ acb->host->host_no, cmd->device->id, cmd->device->lun);
+ acb->num_aborts++;
+-
+ /*
+ ************************************************
+ ** the all interrupt service routine is locked
+@@ -1492,4 +1545,300 @@
+ return buf;
+ }
+
++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev)
++{
++ struct Scsi_Host *host;
++ struct AdapterControlBlock *acb;
++ uint8_t bus, dev_fun;
++ int error;
++
++ error = pci_enable_device(pdev);
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
++ pci_set_master(pdev);
++
++ host = scsi_host_alloc(&arcmsr_scsi_host_template, sizeof \
++(struct AdapterControlBlock));
++ if (!host)
++ return PCI_ERS_RESULT_DISCONNECT;
++ acb = (struct AdapterControlBlock *)host->hostdata;
++ memset(acb, 0, sizeof (struct AdapterControlBlock));
++
++ error = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
++ if (error) {
++ error = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
++ if (error) {
++ printk(KERN_WARNING
++ "scsi%d: No suitable DMA mask available\n",
++ host->host_no);
++ return PCI_ERS_RESULT_DISCONNECT;
++ }
++ }
++ bus = pdev->bus->number;
++ dev_fun = pdev->devfn;
++ acb = (struct AdapterControlBlock *) host->hostdata;
++ memset(acb, 0, sizeof(struct AdapterControlBlock));
++ acb->pdev = pdev;
++ acb->host = host;
++ host->max_sectors = ARCMSR_MAX_XFER_SECTORS;
++ host->max_lun = ARCMSR_MAX_TARGETLUN;
++ host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/
++ host->max_cmd_len = 16; /*this is issue of 64bit LBA, over 2T byte*/
++ host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES;
++ host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */
++ host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN;
++ host->this_id = ARCMSR_SCSI_INITIATOR_ID;
++ host->unique_id = (bus << 8) | dev_fun;
++ host->irq = pdev->irq;
++ error = pci_request_regions(pdev, "arcmsr");
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
+
++ acb->pmu = ioremap(pci_resource_start(pdev, 0),
++ pci_resource_len(pdev, 0));
++ if (!acb->pmu) {
++ printk(KERN_NOTICE "arcmsr%d: memory"
++ " mapping region fail \n", acb->host->host_no);
++ return PCI_ERS_RESULT_DISCONNECT;
++ }
++ acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
++ ACB_F_MESSAGE_RQBUFFER_CLEARED |
++ ACB_F_MESSAGE_WQBUFFER_READED);
++ acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
++ INIT_LIST_HEAD(&acb->ccb_free_list);
++
++ error = arcmsr_alloc_ccb_pool(acb);
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
++
++ error = request_irq(pdev->irq, arcmsr_do_interrupt,
++ IRQF_DISABLED | IRQF_SHARED, "arcmsr", acb);
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
++
++ arcmsr_iop_init(acb);
++ if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
++ host->max_sectors = ARCMSR_MAX_XFER_SECTORS_B;
++
++ pci_set_drvdata(pdev, host);
++
++ error = scsi_add_host(host, &pdev->dev);
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
++
++ error = arcmsr_alloc_sysfs_attr(acb);
++ if (error)
++ return PCI_ERS_RESULT_DISCONNECT;
++
++ scsi_scan_host(host);
++ return PCI_ERS_RESULT_RECOVERED;
++}
++
++static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev)
++{
++ struct Scsi_Host *host = pci_get_drvdata(pdev);
++ struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
++ struct MessageUnit __iomem *reg = acb->pmu;
++ struct CommandControlBlock *ccb;
++ /*clear and abort all outbound posted Q*/
++ int i = 0, found = 0;
++ int id, lun;
++ uint32_t flag_ccb, outbound_intstatus;
++
++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) &&
++ (i++ < 256)){
++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset
++ + (flag_ccb << 5));
++ if (ccb){
++ if ((ccb->acb != acb)||(ccb->startdone !=
++ ARCMSR_CCB_START)){
++ printk(KERN_NOTICE "arcmsr%d: polling \
++ get an illegal ccb"" command done ccb = '0x%p'"
++ "ccboutstandingcount = %d \n",
++ acb->host->host_no, ccb,
++ atomic_read(&acb->ccboutstandingcount));
++ continue;
++ }
++
++ id = ccb->pcmd->device->id;
++ lun = ccb->pcmd->device->lun;
++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
++ if (acb->devstate[id][lun] ==
++ ARECA_RAID_GONE)
++ acb->devstate[id][lun] =
++ ARECA_RAID_GOOD;
++ ccb->pcmd->result = DID_OK << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ else {
++ switch(ccb->arcmsr_cdb.DeviceStatus) {
++ case ARCMSR_DEV_SELECT_TIMEOUT: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_NO_CONNECT << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_ABORTED:
++
++ case ARCMSR_DEV_INIT_FAIL: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_CHECK_CONDITION: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GOOD;
++ arcmsr_report_sense_info(ccb);
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ default:
++ printk(KERN_NOTICE
++ "arcmsr%d: scsi \
++ id = %d lun = %d"
++ " polling and \
++ getting command \
++ error done"
++ "but got unknown \
++ DeviceStatus = 0x%x \n"
++ , acb->host->host_no,
++ id, lun,
++ ccb->arcmsr_cdb.DeviceStatus);
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ break;
++ }
++ }
++ found = 1;
++ }
++ }
++ if (found){
++ outbound_intstatus = readl(®->outbound_intstatus) &
++ acb->outbound_int_enable;
++ writel(outbound_intstatus, ®->outbound_intstatus);
++ /*clear interrupt*/
++ }
++ return;
++}
++
++
++static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev)
++{
++ struct Scsi_Host *host = pci_get_drvdata(pdev);
++ struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
++ struct MessageUnit __iomem *reg = acb->pmu;
++ struct CommandControlBlock *ccb;
++ /*clear and abort all outbound posted Q*/
++ int i = 0, found = 0;
++ int id, lun;
++ uint32_t flag_ccb, outbound_intstatus;
++
++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) &&
++ (i++ < 256)){
++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
++ (flag_ccb << 5));
++ if (ccb){
++ if ((ccb->acb != acb)||(ccb->startdone !=
++ ARCMSR_CCB_START)){
++ printk(KERN_NOTICE
++ "arcmsr%d: polling get an illegal ccb"
++ " command done ccb = '0x%p'"
++ "ccboutstandingcount = %d \n",
++ acb->host->host_no, ccb,
++ atomic_read(&acb->ccboutstandingcount));
++ continue;
++ }
++
++ id = ccb->pcmd->device->id;
++ lun = ccb->pcmd->device->lun;
++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
++ if (acb->devstate[id][lun] == ARECA_RAID_GONE)
++ acb->devstate[id][lun] = ARECA_RAID_GOOD;
++ ccb->pcmd->result = DID_OK << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ else {
++ switch(ccb->arcmsr_cdb.DeviceStatus) {
++ case ARCMSR_DEV_SELECT_TIMEOUT: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_NO_CONNECT << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_ABORTED:
++
++ case ARCMSR_DEV_INIT_FAIL: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ case ARCMSR_DEV_CHECK_CONDITION: {
++ acb->devstate[id][lun] =
++ ARECA_RAID_GOOD;
++ arcmsr_report_sense_info(ccb);
++ arcmsr_ccb_complete(ccb, 1);
++ }
++ break;
++
++ default:
++ printk(KERN_NOTICE "arcmsr%d: \
++ scsi id = %d lun = %d"
++ " polling and \
++ getting command error done"
++ "but got unknown \
++ DeviceStatus = 0x%x \n"
++ , acb->host->host_no,
++ id, lun, ccb->arcmsr_cdb.DeviceStatus);
++ acb->devstate[id][lun] =
++ ARECA_RAID_GONE;
++ ccb->pcmd->result =
++ DID_BAD_TARGET << 16;
++ arcmsr_ccb_complete(ccb, 1);
++ break;
++ }
++ }
++ found = 1;
++ }
++ }
++ if (found){
++ outbound_intstatus = readl(®->outbound_intstatus) &
++ acb->outbound_int_enable;
++ writel(outbound_intstatus, ®->outbound_intstatus);
++ /*clear interrupt*/
++ }
++ return;
++}
++
++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
++ pci_channel_state_t state)
++{
++ switch (state) {
++ case pci_channel_io_frozen:
++ arcmsr_pci_ers_need_reset_forepart(pdev);
++ return PCI_ERS_RESULT_NEED_RESET;
++ case pci_channel_io_perm_failure:
++ arcmsr_pci_ers_disconnect_forepart(pdev);
++ return PCI_ERS_RESULT_DISCONNECT;
++ break;
++ default:
++ return PCI_ERS_RESULT_NEED_RESET;
++ }
++}
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.c linux-2.6.22-591/drivers/scsi/bvme6000.c
+--- linux-2.6.22-570/drivers/scsi/bvme6000.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/bvme6000.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,76 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux.
+- *
+- * Based on work by Alan Hourihane
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-#include <linux/zorro.h>
+-
+-#include <asm/setup.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/bvme6000hw.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "bvme6000.h"
+-
+-#include<linux/stat.h>
+-
+-
+-int bvme6000_scsi_detect(struct scsi_host_template *tpnt)
+-{
+- static unsigned char called = 0;
+- int clock;
+- long long options;
+-
+- if (called)
+- return 0;
+- if (!MACH_IS_BVME6000)
+- return 0;
+-
+- tpnt->proc_name = "BVME6000";
+-
+- options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT;
+-
+- clock = 40000000; /* 66MHz SCSI Clock */
+-
+- ncr53c7xx_init(tpnt, 0, 710, (unsigned long)BVME_NCR53C710_BASE,
+- 0, BVME_IRQ_SCSI, DMA_NONE,
+- options, clock);
+- called = 1;
+- return 1;
+-}
+-
+-static int bvme6000_scsi_release(struct Scsi_Host *shost)
+-{
+- if (shost->irq)
+- free_irq(shost->irq, NULL);
+- if (shost->dma_channel != 0xff)
+- free_dma(shost->dma_channel);
+- if (shost->io_port && shost->n_io_port)
+- release_region(shost->io_port, shost->n_io_port);
+- scsi_unregister(shost);
+- return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+- .name = "BVME6000 NCR53c710 SCSI",
+- .detect = bvme6000_scsi_detect,
+- .release = bvme6000_scsi_release,
+- .queuecommand = NCR53c7xx_queue_command,
+- .abort = NCR53c7xx_abort,
+- .reset = NCR53c7xx_reset,
+- .can_queue = 24,
+- .this_id = 7,
+- .sg_tablesize = 63,
+- .cmd_per_lun = 3,
+- .use_clustering = DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.h linux-2.6.22-591/drivers/scsi/bvme6000.h
+--- linux-2.6.22-570/drivers/scsi/bvme6000.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/bvme6000.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,24 +0,0 @@
+-#ifndef BVME6000_SCSI_H
+-#define BVME6000_SCSI_H
+-
+-#include <linux/types.h>
+-
+-int bvme6000_scsi_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* BVME6000_SCSI_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c
+--- linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,135 @@
++/*
++ * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux.
++ *
++ * Based on work by Alan Hourihane and Kars de Jong
++ *
++ * Rewritten to use 53c700.c by Richard Hirst <richard@sleepie.demon.co.uk>
++ */
++
++#include <linux/module.h>
++#include <linux/blkdev.h>
++#include <linux/device.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/bvme6000hw.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Richard Hirst <richard@sleepie.demon.co.uk>");
++MODULE_DESCRIPTION("BVME6000 NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++static struct scsi_host_template bvme6000_scsi_driver_template = {
++ .name = "BVME6000 NCR53c710 SCSI",
++ .proc_name = "BVME6000",
++ .this_id = 7,
++ .module = THIS_MODULE,
++};
++
++static struct platform_device *bvme6000_scsi_device;
++
++static __devinit int
++bvme6000_probe(struct device *dev)
++{
++ struct Scsi_Host * host = NULL;
++ struct NCR_700_Host_Parameters *hostdata;
++
++ if (!MACH_IS_BVME6000)
++ goto out;
++
++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++ if (hostdata == NULL) {
++ printk(KERN_ERR "bvme6000-scsi: "
++ "Failed to allocate host data\n");
++ goto out;
++ }
++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++ /* Fill in the required pieces of hostdata */
++ hostdata->base = (void __iomem *)BVME_NCR53C710_BASE;
++ hostdata->clock = 40; /* XXX - depends on the CPU clock! */
++ hostdata->chip710 = 1;
++ hostdata->dmode_extra = DMODE_FC2;
++ hostdata->dcntl_extra = EA_710;
++ hostdata->ctest7_extra = CTEST7_TT1;
++
++ /* and register the chip */
++ host = NCR_700_detect(&bvme6000_scsi_driver_template, hostdata, dev);
++ if (!host) {
++ printk(KERN_ERR "bvme6000-scsi: No host detected; "
++ "board configuration problem?\n");
++ goto out_free;
++ }
++ host->base = BVME_NCR53C710_BASE;
++ host->this_id = 7;
++ host->irq = BVME_IRQ_SCSI;
++ if (request_irq(BVME_IRQ_SCSI, NCR_700_intr, 0, "bvme6000-scsi",
++ host)) {
++ printk(KERN_ERR "bvme6000-scsi: request_irq failed\n");
++ goto out_put_host;
++ }
++
++ scsi_scan_host(host);
++
++ return 0;
++
++ out_put_host:
++ scsi_host_put(host);
++ out_free:
++ kfree(hostdata);
++ out:
++ return -ENODEV;
++}
++
++static __devexit int
++bvme6000_device_remove(struct device *dev)
++{
++ struct Scsi_Host *host = dev_to_shost(dev);
++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++ scsi_remove_host(host);
++ NCR_700_release(host);
++ kfree(hostdata);
++ free_irq(host->irq, host);
++
++ return 0;
++}
++
++static struct device_driver bvme6000_scsi_driver = {
++ .name = "bvme6000-scsi",
++ .bus = &platform_bus_type,
++ .probe = bvme6000_probe,
++ .remove = __devexit_p(bvme6000_device_remove),
++};
++
++static int __init bvme6000_scsi_init(void)
++{
++ int err;
++
++ err = driver_register(&bvme6000_scsi_driver);
++ if (err)
++ return err;
++
++ bvme6000_scsi_device = platform_device_register_simple("bvme6000-scsi",
++ -1, NULL, 0);
++ if (IS_ERR(bvme6000_scsi_device)) {
++ driver_unregister(&bvme6000_scsi_driver);
++ return PTR_ERR(bvme6000_scsi_device);
++ }
++
++ return 0;
++}
++
++static void __exit bvme6000_scsi_exit(void)
++{
++ platform_device_unregister(bvme6000_scsi_device);
++ driver_unregister(&bvme6000_scsi_driver);
++}
++
++module_init(bvme6000_scsi_init);
++module_exit(bvme6000_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/dpt_i2o.c linux-2.6.22-591/drivers/scsi/dpt_i2o.c
+--- linux-2.6.22-570/drivers/scsi/dpt_i2o.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/dpt_i2o.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2078,12 +2078,13 @@
+ u32 *lenptr;
+ int direction;
+ int scsidir;
++ int nseg;
+ u32 len;
+ u32 reqlen;
+ s32 rcode;
+
+ memset(msg, 0 , sizeof(msg));
+- len = cmd->request_bufflen;
++ len = scsi_bufflen(cmd);
+ direction = 0x00000000;
+
+ scsidir = 0x00000000; // DATA NO XFER
+@@ -2140,21 +2141,21 @@
+ lenptr=mptr++; /* Remember me - fill in when we know */
+ reqlen = 14; // SINGLE SGE
+ /* Now fill in the SGList and command */
+- if(cmd->use_sg) {
+- struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+- int sg_count = pci_map_sg(pHba->pDev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+
++ nseg = scsi_dma_map(cmd);
++ BUG_ON(nseg < 0);
++ if (nseg) {
++ struct scatterlist *sg;
+
+ len = 0;
+- for(i = 0 ; i < sg_count; i++) {
++ scsi_for_each_sg(cmd, sg, nseg, i) {
+ *mptr++ = direction|0x10000000|sg_dma_len(sg);
+ len+=sg_dma_len(sg);
+ *mptr++ = sg_dma_address(sg);
+- sg++;
+- }
+ /* Make this an end of list */
+- mptr[-2] = direction|0xD0000000|sg_dma_len(sg-1);
++ if (i == nseg - 1)
++ mptr[-2] = direction|0xD0000000|sg_dma_len(sg);
++ }
+ reqlen = mptr - msg;
+ *lenptr = len;
+
+@@ -2163,16 +2164,8 @@
+ len, cmd->underflow);
+ }
+ } else {
+- *lenptr = len = cmd->request_bufflen;
+- if(len == 0) {
++ *lenptr = len = 0;
+ reqlen = 12;
+- } else {
+- *mptr++ = 0xD0000000|direction|cmd->request_bufflen;
+- *mptr++ = pci_map_single(pHba->pDev,
+- cmd->request_buffer,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- }
+ }
+
+ /* Stick the headers on */
+@@ -2232,7 +2225,7 @@
+ hba_status = detailed_status >> 8;
+
+ // calculate resid for sg
+- cmd->resid = cmd->request_bufflen - readl(reply+5);
++ scsi_set_resid(cmd, scsi_bufflen(cmd) - readl(reply+5));
+
+ pHba = (adpt_hba*) cmd->device->host->hostdata[0];
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/eata.c linux-2.6.22-591/drivers/scsi/eata.c
+--- linux-2.6.22-570/drivers/scsi/eata.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/eata.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1609,8 +1609,9 @@
+
+ static void map_dma(unsigned int i, struct hostdata *ha)
+ {
+- unsigned int k, count, pci_dir;
+- struct scatterlist *sgpnt;
++ unsigned int k, pci_dir;
++ int count;
++ struct scatterlist *sg;
+ struct mscp *cpp;
+ struct scsi_cmnd *SCpnt;
+
+@@ -1625,38 +1626,19 @@
+
+ cpp->sense_len = sizeof SCpnt->sense_buffer;
+
+- if (!SCpnt->use_sg) {
+-
+- /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */
+- if (!SCpnt->request_bufflen)
+- pci_dir = PCI_DMA_BIDIRECTIONAL;
+-
+- if (SCpnt->request_buffer)
+- cpp->data_address = H2DEV(pci_map_single(ha->pdev,
+- SCpnt->
+- request_buffer,
+- SCpnt->
+- request_bufflen,
+- pci_dir));
+-
+- cpp->data_len = H2DEV(SCpnt->request_bufflen);
+- return;
+- }
+-
+- sgpnt = (struct scatterlist *)SCpnt->request_buffer;
+- count = pci_map_sg(ha->pdev, sgpnt, SCpnt->use_sg, pci_dir);
+-
+- for (k = 0; k < count; k++) {
+- cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k]));
+- cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k]));
++ count = scsi_dma_map(SCpnt);
++ BUG_ON(count < 0);
++ scsi_for_each_sg(SCpnt, sg, count, k) {
++ cpp->sglist[k].address = H2DEV(sg_dma_address(sg));
++ cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg));
+ }
+
+ cpp->sg = 1;
+ cpp->data_address = H2DEV(pci_map_single(ha->pdev, cpp->sglist,
+- SCpnt->use_sg *
++ scsi_sg_count(SCpnt) *
+ sizeof(struct sg_list),
+ pci_dir));
+- cpp->data_len = H2DEV((SCpnt->use_sg * sizeof(struct sg_list)));
++ cpp->data_len = H2DEV((scsi_sg_count(SCpnt) * sizeof(struct sg_list)));
+ }
+
+ static void unmap_dma(unsigned int i, struct hostdata *ha)
+@@ -1673,9 +1655,7 @@
+ pci_unmap_single(ha->pdev, DEV2H(cpp->sense_addr),
+ DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+
+- if (SCpnt->use_sg)
+- pci_unmap_sg(ha->pdev, SCpnt->request_buffer, SCpnt->use_sg,
+- pci_dir);
++ scsi_dma_unmap(SCpnt);
+
+ if (!DEV2H(cpp->data_len))
+ pci_dir = PCI_DMA_BIDIRECTIONAL;
+@@ -1700,9 +1680,9 @@
+ DEV2H(cpp->sense_len),
+ PCI_DMA_FROMDEVICE);
+
+- if (SCpnt->use_sg)
+- pci_dma_sync_sg_for_cpu(ha->pdev, SCpnt->request_buffer,
+- SCpnt->use_sg, pci_dir);
++ if (scsi_sg_count(SCpnt))
++ pci_dma_sync_sg_for_cpu(ha->pdev, scsi_sglist(SCpnt),
++ scsi_sg_count(SCpnt), pci_dir);
+
+ if (!DEV2H(cpp->data_len))
+ pci_dir = PCI_DMA_BIDIRECTIONAL;
+diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.c linux-2.6.22-591/drivers/scsi/esp_scsi.c
+--- linux-2.6.22-570/drivers/scsi/esp_scsi.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/esp_scsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -324,17 +324,14 @@
+ static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd)
+ {
+ struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+- struct scatterlist *sg = cmd->request_buffer;
++ struct scatterlist *sg = scsi_sglist(cmd);
+ int dir = cmd->sc_data_direction;
+ int total, i;
+
+ if (dir == DMA_NONE)
+ return;
+
+- BUG_ON(cmd->use_sg == 0);
+-
+- spriv->u.num_sg = esp->ops->map_sg(esp, sg,
+- cmd->use_sg, dir);
++ spriv->u.num_sg = esp->ops->map_sg(esp, sg, scsi_sg_count(cmd), dir);
+ spriv->cur_residue = sg_dma_len(sg);
+ spriv->cur_sg = sg;
+
+@@ -407,8 +404,7 @@
+ if (dir == DMA_NONE)
+ return;
+
+- esp->ops->unmap_sg(esp, cmd->request_buffer,
+- spriv->u.num_sg, dir);
++ esp->ops->unmap_sg(esp, scsi_sglist(cmd), spriv->u.num_sg, dir);
+ }
+
+ static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+@@ -921,7 +917,7 @@
+ static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+ {
+ struct scsi_device *dev = cmd->device;
+- struct esp *esp = host_to_esp(dev->host);
++ struct esp *esp = shost_priv(dev->host);
+ struct esp_cmd_priv *spriv;
+ struct esp_cmd_entry *ent;
+
+@@ -2358,7 +2354,7 @@
+
+ static int esp_slave_alloc(struct scsi_device *dev)
+ {
+- struct esp *esp = host_to_esp(dev->host);
++ struct esp *esp = shost_priv(dev->host);
+ struct esp_target_data *tp = &esp->target[dev->id];
+ struct esp_lun_data *lp;
+
+@@ -2382,7 +2378,7 @@
+
+ static int esp_slave_configure(struct scsi_device *dev)
+ {
+- struct esp *esp = host_to_esp(dev->host);
++ struct esp *esp = shost_priv(dev->host);
+ struct esp_target_data *tp = &esp->target[dev->id];
+ int goal_tags, queue_depth;
+
+@@ -2424,7 +2420,7 @@
+
+ static int esp_eh_abort_handler(struct scsi_cmnd *cmd)
+ {
+- struct esp *esp = host_to_esp(cmd->device->host);
++ struct esp *esp = shost_priv(cmd->device->host);
+ struct esp_cmd_entry *ent, *tmp;
+ struct completion eh_done;
+ unsigned long flags;
+@@ -2540,7 +2536,7 @@
+
+ static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd)
+ {
+- struct esp *esp = host_to_esp(cmd->device->host);
++ struct esp *esp = shost_priv(cmd->device->host);
+ struct completion eh_reset;
+ unsigned long flags;
+
+@@ -2576,7 +2572,7 @@
+ /* All bets are off, reset the entire device. */
+ static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd)
+ {
+- struct esp *esp = host_to_esp(cmd->device->host);
++ struct esp *esp = shost_priv(cmd->device->host);
+ unsigned long flags;
+
+ spin_lock_irqsave(esp->host->host_lock, flags);
+@@ -2616,7 +2612,7 @@
+
+ static void esp_get_signalling(struct Scsi_Host *host)
+ {
+- struct esp *esp = host_to_esp(host);
++ struct esp *esp = shost_priv(host);
+ enum spi_signal_type type;
+
+ if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+@@ -2630,7 +2626,7 @@
+ static void esp_set_offset(struct scsi_target *target, int offset)
+ {
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+- struct esp *esp = host_to_esp(host);
++ struct esp *esp = shost_priv(host);
+ struct esp_target_data *tp = &esp->target[target->id];
+
+ tp->nego_goal_offset = offset;
+@@ -2640,7 +2636,7 @@
+ static void esp_set_period(struct scsi_target *target, int period)
+ {
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+- struct esp *esp = host_to_esp(host);
++ struct esp *esp = shost_priv(host);
+ struct esp_target_data *tp = &esp->target[target->id];
+
+ tp->nego_goal_period = period;
+@@ -2650,7 +2646,7 @@
+ static void esp_set_width(struct scsi_target *target, int width)
+ {
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+- struct esp *esp = host_to_esp(host);
++ struct esp *esp = shost_priv(host);
+ struct esp_target_data *tp = &esp->target[target->id];
+
+ tp->nego_goal_width = (width ? 1 : 0);
+diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.h linux-2.6.22-591/drivers/scsi/esp_scsi.h
+--- linux-2.6.22-570/drivers/scsi/esp_scsi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/esp_scsi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -517,8 +517,6 @@
+ struct sbus_dma *dma;
+ };
+
+-#define host_to_esp(host) ((struct esp *)(host)->hostdata)
+-
+ /* A front-end driver for the ESP chip should do the following in
+ * it's device probe routine:
+ * 1) Allocate the host and private area using scsi_host_alloc()
+diff -Nurb linux-2.6.22-570/drivers/scsi/fdomain.c linux-2.6.22-591/drivers/scsi/fdomain.c
+--- linux-2.6.22-570/drivers/scsi/fdomain.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/fdomain.c 2007-12-21 15:36:12.000000000 -0500
+@@ -410,6 +410,8 @@
+ static char * fdomain = NULL;
+ module_param(fdomain, charp, 0);
+
++#ifndef PCMCIA
++
+ static unsigned long addresses[] = {
+ 0xc8000,
+ 0xca000,
+@@ -426,6 +428,8 @@
+
+ static unsigned short ints[] = { 3, 5, 10, 11, 12, 14, 15, 0 };
+
++#endif /* !PCMCIA */
++
+ /*
+
+ READ THIS BEFORE YOU ADD A SIGNATURE!
+@@ -458,6 +462,8 @@
+
+ */
+
++#ifndef PCMCIA
++
+ static struct signature {
+ const char *signature;
+ int sig_offset;
+@@ -503,6 +509,8 @@
+
+ #define SIGNATURE_COUNT ARRAY_SIZE(signatures)
+
++#endif /* !PCMCIA */
++
+ static void print_banner( struct Scsi_Host *shpnt )
+ {
+ if (!shpnt) return; /* This won't ever happen */
+@@ -633,6 +641,8 @@
+ return 0;
+ }
+
++#ifndef PCMCIA
++
+ /* fdomain_get_irq assumes that we have a valid MCA ID for a
+ TMC-1660/TMC-1680 Future Domain board. Now, check to be sure the
+ bios_base matches these ports. If someone was unlucky enough to have
+@@ -667,7 +677,6 @@
+
+ static int fdomain_isa_detect( int *irq, int *iobase )
+ {
+-#ifndef PCMCIA
+ int i, j;
+ int base = 0xdeadbeef;
+ int flag = 0;
+@@ -786,11 +795,22 @@
+ *iobase = base;
+
+ return 1; /* success */
+-#else
++}
++
++#else /* PCMCIA */
++
++static int fdomain_isa_detect( int *irq, int *iobase )
++{
++ if (irq)
++ *irq = 0;
++ if (iobase)
++ *iobase = 0;
+ return 0;
+-#endif
+ }
+
++#endif /* !PCMCIA */
++
++
+ /* PCI detection function: int fdomain_pci_bios_detect(int* irq, int*
+ iobase) This function gets the Interrupt Level and I/O base address from
+ the PCI configuration registers. */
+@@ -1345,16 +1365,15 @@
+
+ #if ERRORS_ONLY
+ if (current_SC->cmnd[0] == REQUEST_SENSE && !current_SC->SCp.Status) {
+- if ((unsigned char)(*((char *)current_SC->request_buffer+2)) & 0x0f) {
++ char *buf = scsi_sglist(current_SC);
++ if ((unsigned char)(*(buf + 2)) & 0x0f) {
+ unsigned char key;
+ unsigned char code;
+ unsigned char qualifier;
+
+- key = (unsigned char)(*((char *)current_SC->request_buffer + 2))
+- & 0x0f;
+- code = (unsigned char)(*((char *)current_SC->request_buffer + 12));
+- qualifier = (unsigned char)(*((char *)current_SC->request_buffer
+- + 13));
++ key = (unsigned char)(*(buf + 2)) & 0x0f;
++ code = (unsigned char)(*(buf + 12));
++ qualifier = (unsigned char)(*(buf + 13));
+
+ if (key != UNIT_ATTENTION
+ && !(key == NOT_READY
+@@ -1405,8 +1424,8 @@
+ printk( "queue: target = %d cmnd = 0x%02x pieces = %d size = %u\n",
+ SCpnt->target,
+ *(unsigned char *)SCpnt->cmnd,
+- SCpnt->use_sg,
+- SCpnt->request_bufflen );
++ scsi_sg_count(SCpnt),
++ scsi_bufflen(SCpnt));
+ #endif
+
+ fdomain_make_bus_idle();
+@@ -1416,20 +1435,19 @@
+
+ /* Initialize static data */
+
+- if (current_SC->use_sg) {
+- current_SC->SCp.buffer =
+- (struct scatterlist *)current_SC->request_buffer;
+- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
++ if (scsi_sg_count(current_SC)) {
++ current_SC->SCp.buffer = scsi_sglist(current_SC);
++ current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page)
++ + current_SC->SCp.buffer->offset;
+ current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
+- current_SC->SCp.buffers_residual = current_SC->use_sg - 1;
++ current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
+ } else {
+- current_SC->SCp.ptr = (char *)current_SC->request_buffer;
+- current_SC->SCp.this_residual = current_SC->request_bufflen;
++ current_SC->SCp.ptr = 0;
++ current_SC->SCp.this_residual = 0;
+ current_SC->SCp.buffer = NULL;
+ current_SC->SCp.buffers_residual = 0;
+ }
+
+-
+ current_SC->SCp.Status = 0;
+ current_SC->SCp.Message = 0;
+ current_SC->SCp.have_data_in = 0;
+@@ -1472,8 +1490,8 @@
+ SCpnt->SCp.phase,
+ SCpnt->device->id,
+ *(unsigned char *)SCpnt->cmnd,
+- SCpnt->use_sg,
+- SCpnt->request_bufflen );
++ scsi_sg_count(SCpnt),
++ scsi_bufflen(SCpnt));
+ printk( "sent_command = %d, have_data_in = %d, timeout = %d\n",
+ SCpnt->SCp.sent_command,
+ SCpnt->SCp.have_data_in,
+diff -Nurb linux-2.6.22-570/drivers/scsi/gdth.c linux-2.6.22-591/drivers/scsi/gdth.c
+--- linux-2.6.22-570/drivers/scsi/gdth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/gdth.c 2007-12-21 15:36:12.000000000 -0500
+@@ -876,7 +876,7 @@
+ /* Vortex only makes RAID controllers.
+ * We do not really want to specify all 550 ids here, so wildcard match.
+ */
+-static struct pci_device_id gdthtable[] __attribute_used__ = {
++static struct pci_device_id gdthtable[] __maybe_unused = {
+ {PCI_VENDOR_ID_VORTEX,PCI_ANY_ID,PCI_ANY_ID, PCI_ANY_ID},
+ {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC,PCI_ANY_ID,PCI_ANY_ID},
+ {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC_XSCALE,PCI_ANY_ID,PCI_ANY_ID},
+@@ -1955,7 +1955,7 @@
+ for (j = 0; j < 12; ++j)
+ rtc[j] = CMOS_READ(j);
+ } while (rtc[0] != CMOS_READ(0));
+- spin_lock_irqrestore(&rtc_lock, flags);
++ spin_unlock_irqrestore(&rtc_lock, flags);
+ TRACE2(("gdth_search_drives(): RTC: %x/%x/%x\n",*(ulong32 *)&rtc[0],
+ *(ulong32 *)&rtc[4], *(ulong32 *)&rtc[8]));
+ /* 3. send to controller firmware */
+diff -Nurb linux-2.6.22-570/drivers/scsi/hptiop.c linux-2.6.22-591/drivers/scsi/hptiop.c
+--- linux-2.6.22-570/drivers/scsi/hptiop.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/hptiop.c 2007-12-21 15:36:12.000000000 -0500
+@@ -339,20 +339,8 @@
+
+ scp = hba->reqs[tag].scp;
+
+- if (HPT_SCP(scp)->mapped) {
+- if (scp->use_sg)
+- pci_unmap_sg(hba->pcidev,
+- (struct scatterlist *)scp->request_buffer,
+- scp->use_sg,
+- scp->sc_data_direction
+- );
+- else
+- pci_unmap_single(hba->pcidev,
+- HPT_SCP(scp)->dma_handle,
+- scp->request_bufflen,
+- scp->sc_data_direction
+- );
+- }
++ if (HPT_SCP(scp)->mapped)
++ scsi_dma_unmap(scp);
+
+ switch (le32_to_cpu(req->header.result)) {
+ case IOP_RESULT_SUCCESS:
+@@ -449,43 +437,26 @@
+ {
+ struct Scsi_Host *host = scp->device->host;
+ struct hptiop_hba *hba = (struct hptiop_hba *)host->hostdata;
+- struct scatterlist *sglist = (struct scatterlist *)scp->request_buffer;
++ struct scatterlist *sg;
++ int idx, nseg;
+
+- /*
+- * though we'll not get non-use_sg fields anymore,
+- * keep use_sg checking anyway
+- */
+- if (scp->use_sg) {
+- int idx;
+-
+- HPT_SCP(scp)->sgcnt = pci_map_sg(hba->pcidev,
+- sglist, scp->use_sg,
+- scp->sc_data_direction);
++ nseg = scsi_dma_map(scp);
++ BUG_ON(nseg < 0);
++ if (!nseg)
++ return 0;
++
++ HPT_SCP(scp)->sgcnt = nseg;
+ HPT_SCP(scp)->mapped = 1;
++
+ BUG_ON(HPT_SCP(scp)->sgcnt > hba->max_sg_descriptors);
+
+- for (idx = 0; idx < HPT_SCP(scp)->sgcnt; idx++) {
+- psg[idx].pci_address =
+- cpu_to_le64(sg_dma_address(&sglist[idx]));
+- psg[idx].size = cpu_to_le32(sg_dma_len(&sglist[idx]));
++ scsi_for_each_sg(scp, sg, HPT_SCP(scp)->sgcnt, idx) {
++ psg[idx].pci_address = cpu_to_le64(sg_dma_address(sg));
++ psg[idx].size = cpu_to_le32(sg_dma_len(sg));
+ psg[idx].eot = (idx == HPT_SCP(scp)->sgcnt - 1) ?
+ cpu_to_le32(1) : 0;
+ }
+-
+ return HPT_SCP(scp)->sgcnt;
+- } else {
+- HPT_SCP(scp)->dma_handle = pci_map_single(
+- hba->pcidev,
+- scp->request_buffer,
+- scp->request_bufflen,
+- scp->sc_data_direction
+- );
+- HPT_SCP(scp)->mapped = 1;
+- psg->pci_address = cpu_to_le64(HPT_SCP(scp)->dma_handle);
+- psg->size = cpu_to_le32(scp->request_bufflen);
+- psg->eot = cpu_to_le32(1);
+- return 1;
+- }
+ }
+
+ static int hptiop_queuecommand(struct scsi_cmnd *scp,
+@@ -530,9 +501,8 @@
+ req = (struct hpt_iop_request_scsi_command *)_req->req_virt;
+
+ /* build S/G table */
+- if (scp->request_bufflen)
+ sg_count = hptiop_buildsgl(scp, req->sg_list);
+- else
++ if (!sg_count)
+ HPT_SCP(scp)->mapped = 0;
+
+ req->header.flags = cpu_to_le32(IOP_REQUEST_FLAG_OUTPUT_CONTEXT);
+@@ -541,7 +511,7 @@
+ req->header.context = cpu_to_le32(IOPMU_QUEUE_ADDR_HOST_BIT |
+ (u32)_req->index);
+ req->header.context_hi32 = 0;
+- req->dataxfer_length = cpu_to_le32(scp->request_bufflen);
++ req->dataxfer_length = cpu_to_le32(scsi_bufflen(scp));
+ req->channel = scp->device->channel;
+ req->target = scp->device->id;
+ req->lun = scp->device->lun;
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.c linux-2.6.22-591/drivers/scsi/ibmmca.c
+--- linux-2.6.22-570/drivers/scsi/ibmmca.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ibmmca.c 2007-12-21 15:36:12.000000000 -0500
+@@ -31,14 +31,21 @@
+ #include <linux/mca.h>
+ #include <linux/spinlock.h>
+ #include <linux/init.h>
+-#include <linux/mca-legacy.h>
+
+ #include <asm/system.h>
+ #include <asm/io.h>
+
+ #include "scsi.h"
+ #include <scsi/scsi_host.h>
+-#include "ibmmca.h"
++
++/* Common forward declarations for all Linux-versions: */
++static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *));
++static int ibmmca_abort (Scsi_Cmnd *);
++static int ibmmca_host_reset (Scsi_Cmnd *);
++static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *);
++static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout);
++
++
+
+ /* current version of this driver-source: */
+ #define IBMMCA_SCSI_DRIVER_VERSION "4.0b-ac"
+@@ -65,11 +72,11 @@
+ #define IM_DEBUG_CMD_DEVICE TYPE_TAPE
+
+ /* relative addresses of hardware registers on a subsystem */
+-#define IM_CMD_REG(hi) (hosts[(hi)]->io_port) /*Command Interface, (4 bytes long) */
+-#define IM_ATTN_REG(hi) (hosts[(hi)]->io_port+4) /*Attention (1 byte) */
+-#define IM_CTR_REG(hi) (hosts[(hi)]->io_port+5) /*Basic Control (1 byte) */
+-#define IM_INTR_REG(hi) (hosts[(hi)]->io_port+6) /*Interrupt Status (1 byte, r/o) */
+-#define IM_STAT_REG(hi) (hosts[(hi)]->io_port+7) /*Basic Status (1 byte, read only) */
++#define IM_CMD_REG(h) ((h)->io_port) /*Command Interface, (4 bytes long) */
++#define IM_ATTN_REG(h) ((h)->io_port+4) /*Attention (1 byte) */
++#define IM_CTR_REG(h) ((h)->io_port+5) /*Basic Control (1 byte) */
++#define IM_INTR_REG(h) ((h)->io_port+6) /*Interrupt Status (1 byte, r/o) */
++#define IM_STAT_REG(h) ((h)->io_port+7) /*Basic Status (1 byte, read only) */
+
+ /* basic I/O-port of first adapter */
+ #define IM_IO_PORT 0x3540
+@@ -266,30 +273,36 @@
+ if ((display_mode & LED_ACTIVITY)||(!display_mode)) \
+ outb(inb(PS2_SYS_CTR) & 0x3f, PS2_SYS_CTR); }
+
+-/*list of supported subsystems */
+-struct subsys_list_struct {
+- unsigned short mca_id;
+- char *description;
+-};
+-
+ /* types of different supported hardware that goes to hostdata special */
+ #define IBM_SCSI2_FW 0
+ #define IBM_7568_WCACHE 1
+ #define IBM_EXP_UNIT 2
+ #define IBM_SCSI_WCACHE 3
+ #define IBM_SCSI 4
++#define IBM_INTEGSCSI 5
+
+ /* other special flags for hostdata structure */
+ #define FORCED_DETECTION 100
+ #define INTEGRATED_SCSI 101
+
+ /* List of possible IBM-SCSI-adapters */
+-static struct subsys_list_struct subsys_list[] = {
+- {0x8efc, "IBM SCSI-2 F/W Adapter"}, /* special = 0 */
+- {0x8efd, "IBM 7568 Industrial Computer SCSI Adapter w/Cache"}, /* special = 1 */
+- {0x8ef8, "IBM Expansion Unit SCSI Controller"}, /* special = 2 */
+- {0x8eff, "IBM SCSI Adapter w/Cache"}, /* special = 3 */
+- {0x8efe, "IBM SCSI Adapter"}, /* special = 4 */
++static short ibmmca_id_table[] = {
++ 0x8efc,
++ 0x8efd,
++ 0x8ef8,
++ 0x8eff,
++ 0x8efe,
++ /* No entry for integrated SCSI, that's part of the register */
++ 0
++};
++
++static const char *ibmmca_description[] = {
++ "IBM SCSI-2 F/W Adapter", /* special = 0 */
++ "IBM 7568 Industrial Computer SCSI Adapter w/Cache", /* special = 1 */
++ "IBM Expansion Unit SCSI Controller", /* special = 2 */
++ "IBM SCSI Adapter w/Cache", /* special = 3 */
++ "IBM SCSI Adapter", /* special = 4 */
++ "IBM Integrated SCSI Controller", /* special = 5 */
+ };
+
+ /* Max number of logical devices (can be up from 0 to 14). 15 is the address
+@@ -375,30 +388,30 @@
+ };
+
+ /* macros to access host data structure */
+-#define subsystem_pun(hi) (hosts[(hi)]->this_id)
+-#define subsystem_maxid(hi) (hosts[(hi)]->max_id)
+-#define ld(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_ld)
+-#define get_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_ldn)
+-#define get_scsi(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_scsi)
+-#define local_checking_phase_flag(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_local_checking_phase_flag)
+-#define got_interrupt(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_got_interrupt)
+-#define stat_result(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_stat_result)
+-#define reset_status(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_reset_status)
+-#define last_scsi_command(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_command)
+-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type)
+-#define last_scsi_blockcount(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_blockcount)
+-#define last_scsi_logical_block(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_logical_block)
+-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type)
+-#define next_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_next_ldn)
+-#define IBM_DS(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_IBM_DS)
+-#define special(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_special)
+-#define subsystem_connector_size(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_connector_size)
+-#define adapter_speed(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_adapter_speed)
+-#define pos2(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[2])
+-#define pos3(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[3])
+-#define pos4(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[4])
+-#define pos5(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[5])
+-#define pos6(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[6])
++#define subsystem_pun(h) ((h)->this_id)
++#define subsystem_maxid(h) ((h)->max_id)
++#define ld(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_ld)
++#define get_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_ldn)
++#define get_scsi(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_scsi)
++#define local_checking_phase_flag(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_local_checking_phase_flag)
++#define got_interrupt(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_got_interrupt)
++#define stat_result(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_stat_result)
++#define reset_status(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_reset_status)
++#define last_scsi_command(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_command)
++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type)
++#define last_scsi_blockcount(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_blockcount)
++#define last_scsi_logical_block(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_logical_block)
++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type)
++#define next_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_next_ldn)
++#define IBM_DS(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_IBM_DS)
++#define special(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_special)
++#define subsystem_connector_size(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_connector_size)
++#define adapter_speed(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_adapter_speed)
++#define pos2(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[2])
++#define pos3(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[3])
++#define pos4(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[4])
++#define pos5(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[5])
++#define pos6(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[6])
+
+ /* Define a arbitrary number as subsystem-marker-type. This number is, as
+ described in the ANSI-SCSI-standard, not occupied by other device-types. */
+@@ -459,11 +472,6 @@
+ /*counter of concurrent disk read/writes, to turn on/off disk led */
+ static int disk_rw_in_progress = 0;
+
+-/* host information */
+-static int found = 0;
+-static struct Scsi_Host *hosts[IM_MAX_HOSTS + 1] = {
+- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+-};
+ static unsigned int pos[8]; /* whole pos register-line for diagnosis */
+ /* Taking into account the additions, made by ZP Gu.
+ * This selects now the preset value from the configfile and
+@@ -474,70 +482,68 @@
+ static char ibm_ansi_order = 0;
+ #endif
+
+-static void issue_cmd(int, unsigned long, unsigned char);
++static void issue_cmd(struct Scsi_Host *, unsigned long, unsigned char);
+ static void internal_done(Scsi_Cmnd * cmd);
+-static void check_devices(int, int);
+-static int immediate_assign(int, unsigned int, unsigned int, unsigned int, unsigned int);
+-static int immediate_feature(int, unsigned int, unsigned int);
++static void check_devices(struct Scsi_Host *, int);
++static int immediate_assign(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, unsigned int);
++static int immediate_feature(struct Scsi_Host *, unsigned int, unsigned int);
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+-static int immediate_reset(int, unsigned int);
++static int immediate_reset(struct Scsi_Host *, unsigned int);
+ #endif
+-static int device_inquiry(int, int);
+-static int read_capacity(int, int);
+-static int get_pos_info(int);
++static int device_inquiry(struct Scsi_Host *, int);
++static int read_capacity(struct Scsi_Host *, int);
++static int get_pos_info(struct Scsi_Host *);
+ static char *ti_p(int);
+ static char *ti_l(int);
+ static char *ibmrate(unsigned int, int);
+ static int probe_display(int);
+-static int probe_bus_mode(int);
+-static int device_exists(int, int, int *, int *);
+-static struct Scsi_Host *ibmmca_register(struct scsi_host_template *, int, int, int, char *);
++static int probe_bus_mode(struct Scsi_Host *);
++static int device_exists(struct Scsi_Host *, int, int *, int *);
+ static int option_setup(char *);
+ /* local functions needed for proc_info */
+-static int ldn_access_load(int, int);
+-static int ldn_access_total_read_write(int);
++static int ldn_access_load(struct Scsi_Host *, int);
++static int ldn_access_total_read_write(struct Scsi_Host *);
+
+ static irqreturn_t interrupt_handler(int irq, void *dev_id)
+ {
+- int host_index, ihost_index;
+ unsigned int intr_reg;
+ unsigned int cmd_result;
+ unsigned int ldn;
++ unsigned long flags;
+ Scsi_Cmnd *cmd;
+ int lastSCSI;
+- struct Scsi_Host *dev = dev_id;
++ struct device *dev = dev_id;
++ struct Scsi_Host *shpnt = dev_get_drvdata(dev);
++
++ spin_lock_irqsave(shpnt->host_lock, flags);
+
+- spin_lock(dev->host_lock);
+- /* search for one adapter-response on shared interrupt */
+- for (host_index = 0; hosts[host_index] && !(inb(IM_STAT_REG(host_index)) & IM_INTR_REQUEST); host_index++);
+- /* return if some other device on this IRQ caused the interrupt */
+- if (!hosts[host_index]) {
+- spin_unlock(dev->host_lock);
++ if(!(inb(IM_STAT_REG(shpnt)) & IM_INTR_REQUEST)) {
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_NONE;
+ }
+
+ /* the reset-function already did all the job, even ints got
+ renabled on the subsystem, so just return */
+- if ((reset_status(host_index) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(host_index) == IM_RESET_FINISHED_OK_NO_INT)) {
+- reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS;
+- spin_unlock(dev->host_lock);
++ if ((reset_status(shpnt) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(shpnt) == IM_RESET_FINISHED_OK_NO_INT)) {
++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS;
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ /*must wait for attention reg not busy, then send EOI to subsystem */
+ while (1) {
+- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ break;
+ cpu_relax();
+ }
+- ihost_index = host_index;
++
+ /*get command result and logical device */
+- intr_reg = (unsigned char) (inb(IM_INTR_REG(ihost_index)));
++ intr_reg = (unsigned char) (inb(IM_INTR_REG(shpnt)));
+ cmd_result = intr_reg & 0xf0;
+ ldn = intr_reg & 0x0f;
+ /* get the last_scsi_command here */
+- lastSCSI = last_scsi_command(ihost_index)[ldn];
+- outb(IM_EOI | ldn, IM_ATTN_REG(ihost_index));
++ lastSCSI = last_scsi_command(shpnt)[ldn];
++ outb(IM_EOI | ldn, IM_ATTN_REG(shpnt));
+
+ /*these should never happen (hw fails, or a local programming bug) */
+ if (!global_command_error_excuse) {
+@@ -547,38 +553,38 @@
+ case IM_SOFTWARE_SEQUENCING_ERROR:
+ case IM_CMD_ERROR:
+ printk(KERN_ERR "IBM MCA SCSI: Fatal Subsystem ERROR!\n");
+- printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(ihost_index)[ldn].scb.enable);
+- if (ld(ihost_index)[ldn].cmd)
+- printk("%ld/%ld,", (long) (ld(ihost_index)[ldn].cmd->request_bufflen), (long) (ld(ihost_index)[ldn].scb.sys_buf_length));
++ printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(shpnt)[ldn].scb.enable);
++ if (ld(shpnt)[ldn].cmd)
++ printk("%ld/%ld,", (long) (scsi_bufflen(ld(shpnt)[ldn].cmd)), (long) (ld(shpnt)[ldn].scb.sys_buf_length));
+ else
+ printk("none,");
+- if (ld(ihost_index)[ldn].cmd)
+- printk("Blocksize=%d", ld(ihost_index)[ldn].scb.u2.blk.length);
++ if (ld(shpnt)[ldn].cmd)
++ printk("Blocksize=%d", ld(shpnt)[ldn].scb.u2.blk.length);
+ else
+ printk("Blocksize=none");
+- printk(", host=0x%x, ldn=0x%x\n", ihost_index, ldn);
+- if (ld(ihost_index)[ldn].cmd) {
+- printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u2.blk.count);
+- printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u1.log_blk_adr);
++ printk(", host=%p, ldn=0x%x\n", shpnt, ldn);
++ if (ld(shpnt)[ldn].cmd) {
++ printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(shpnt)[ldn], ld(shpnt)[ldn].scb.u2.blk.count);
++ printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(shpnt)[ldn], ld(shpnt)[ldn].scb.u1.log_blk_adr);
+ }
+ printk(KERN_ERR "Reason given: %s\n", (cmd_result == IM_ADAPTER_HW_FAILURE) ? "HARDWARE FAILURE" : (cmd_result == IM_SOFTWARE_SEQUENCING_ERROR) ? "SOFTWARE SEQUENCING ERROR" : (cmd_result == IM_CMD_ERROR) ? "COMMAND ERROR" : "UNKNOWN");
+ /* if errors appear, enter this section to give detailed info */
+ printk(KERN_ERR "IBM MCA SCSI: Subsystem Error-Status follows:\n");
+- printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(ihost_index)[ldn]);
+- printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(ihost_index)));
+- printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(ihost_index)));
++ printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(shpnt)[ldn]);
++ printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(shpnt)));
++ printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(shpnt)));
+ printk(KERN_ERR " Interrupt Status Register...: %x\n", intr_reg);
+- printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(ihost_index)));
+- if ((last_scsi_type(ihost_index)[ldn] == IM_SCB) || (last_scsi_type(ihost_index)[ldn] == IM_LONG_SCB)) {
+- printk(KERN_ERR " SCB-Command.................: %x\n", ld(ihost_index)[ldn].scb.command);
+- printk(KERN_ERR " SCB-Enable..................: %x\n", ld(ihost_index)[ldn].scb.enable);
+- printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(ihost_index)[ldn].scb.u1.log_blk_adr);
+- printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_adr);
+- printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_length);
+- printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(ihost_index)[ldn].scb.tsb_adr);
+- printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(ihost_index)[ldn].scb.scb_chain_adr);
+- printk(KERN_ERR " SCB-block count.............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.count);
+- printk(KERN_ERR " SCB-block length............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.length);
++ printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(shpnt)));
++ if ((last_scsi_type(shpnt)[ldn] == IM_SCB) || (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB)) {
++ printk(KERN_ERR " SCB-Command.................: %x\n", ld(shpnt)[ldn].scb.command);
++ printk(KERN_ERR " SCB-Enable..................: %x\n", ld(shpnt)[ldn].scb.enable);
++ printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(shpnt)[ldn].scb.u1.log_blk_adr);
++ printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(shpnt)[ldn].scb.sys_buf_adr);
++ printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(shpnt)[ldn].scb.sys_buf_length);
++ printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(shpnt)[ldn].scb.tsb_adr);
++ printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(shpnt)[ldn].scb.scb_chain_adr);
++ printk(KERN_ERR " SCB-block count.............: %x\n", ld(shpnt)[ldn].scb.u2.blk.count);
++ printk(KERN_ERR " SCB-block length............: %x\n", ld(shpnt)[ldn].scb.u2.blk.length);
+ }
+ printk(KERN_ERR " Send this report to the maintainer.\n");
+ panic("IBM MCA SCSI: Fatal error message from the subsystem (0x%X,0x%X)!\n", lastSCSI, cmd_result);
+@@ -600,72 +606,73 @@
+ }
+ }
+ /* if no panic appeared, increase the interrupt-counter */
+- IBM_DS(ihost_index).total_interrupts++;
++ IBM_DS(shpnt).total_interrupts++;
+ /*only for local checking phase */
+- if (local_checking_phase_flag(ihost_index)) {
+- stat_result(ihost_index) = cmd_result;
+- got_interrupt(ihost_index) = 1;
+- reset_status(ihost_index) = IM_RESET_FINISHED_OK;
+- last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+- spin_unlock(dev->host_lock);
++ if (local_checking_phase_flag(shpnt)) {
++ stat_result(shpnt) = cmd_result;
++ got_interrupt(shpnt) = 1;
++ reset_status(shpnt) = IM_RESET_FINISHED_OK;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI;
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ }
+ /* handling of commands coming from upper level of scsi driver */
+- if (last_scsi_type(ihost_index)[ldn] == IM_IMM_CMD) {
++ if (last_scsi_type(shpnt)[ldn] == IM_IMM_CMD) {
+ /* verify ldn, and may handle rare reset immediate command */
+- if ((reset_status(ihost_index) == IM_RESET_IN_PROGRESS) && (last_scsi_command(ihost_index)[ldn] == IM_RESET_IMM_CMD)) {
++ if ((reset_status(shpnt) == IM_RESET_IN_PROGRESS) && (last_scsi_command(shpnt)[ldn] == IM_RESET_IMM_CMD)) {
+ if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) {
+ disk_rw_in_progress = 0;
+ PS2_DISK_LED_OFF();
+- reset_status(ihost_index) = IM_RESET_FINISHED_FAIL;
++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ } else {
+ /*reset disk led counter, turn off disk led */
+ disk_rw_in_progress = 0;
+ PS2_DISK_LED_OFF();
+- reset_status(ihost_index) = IM_RESET_FINISHED_OK;
++ reset_status(shpnt) = IM_RESET_FINISHED_OK;
+ }
+- stat_result(ihost_index) = cmd_result;
+- last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+- last_scsi_type(ihost_index)[ldn] = 0;
+- spin_unlock(dev->host_lock);
++ stat_result(shpnt) = cmd_result;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI;
++ last_scsi_type(shpnt)[ldn] = 0;
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+- } else if (last_scsi_command(ihost_index)[ldn] == IM_ABORT_IMM_CMD) {
++ } else if (last_scsi_command(shpnt)[ldn] == IM_ABORT_IMM_CMD) {
+ /* react on SCSI abort command */
+ #ifdef IM_DEBUG_PROBE
+ printk("IBM MCA SCSI: Interrupt from SCSI-abort.\n");
+ #endif
+ disk_rw_in_progress = 0;
+ PS2_DISK_LED_OFF();
+- cmd = ld(ihost_index)[ldn].cmd;
+- ld(ihost_index)[ldn].cmd = NULL;
++ cmd = ld(shpnt)[ldn].cmd;
++ ld(shpnt)[ldn].cmd = NULL;
+ if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE)
+ cmd->result = DID_NO_CONNECT << 16;
+ else
+ cmd->result = DID_ABORT << 16;
+- stat_result(ihost_index) = cmd_result;
+- last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+- last_scsi_type(ihost_index)[ldn] = 0;
++ stat_result(shpnt) = cmd_result;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI;
++ last_scsi_type(shpnt)[ldn] = 0;
+ if (cmd->scsi_done)
+ (cmd->scsi_done) (cmd); /* should be the internal_done */
+- spin_unlock(dev->host_lock);
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ } else {
+ disk_rw_in_progress = 0;
+ PS2_DISK_LED_OFF();
+- reset_status(ihost_index) = IM_RESET_FINISHED_OK;
+- stat_result(ihost_index) = cmd_result;
+- last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+- spin_unlock(dev->host_lock);
++ reset_status(shpnt) = IM_RESET_FINISHED_OK;
++ stat_result(shpnt) = cmd_result;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI;
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ }
+ }
+- last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+- last_scsi_type(ihost_index)[ldn] = 0;
+- cmd = ld(ihost_index)[ldn].cmd;
+- ld(ihost_index)[ldn].cmd = NULL;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI;
++ last_scsi_type(shpnt)[ldn] = 0;
++ cmd = ld(shpnt)[ldn].cmd;
++ ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_TIMEOUT
+ if (cmd) {
+ if ((cmd->target == TIMEOUT_PUN) && (cmd->device->lun == TIMEOUT_LUN)) {
++ spin_unlock_irqsave(shpnt->host_lock, flags);
+ printk("IBM MCA SCSI: Ignoring interrupt from pun=%x, lun=%x.\n", cmd->target, cmd->device->lun);
+ return IRQ_HANDLED;
+ }
+@@ -674,15 +681,15 @@
+ /*if no command structure, just return, else clear cmd */
+ if (!cmd)
+ {
+- spin_unlock(dev->host_lock);
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ #ifdef IM_DEBUG_INT
+- printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(ihost_index)[ldn].tsb.dev_status, ld(ihost_index)[ldn].tsb.cmd_status, ld(ihost_index)[ldn].tsb.dev_error, ld(ihost_index)[ldn].tsb.cmd_error);
++ printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(shpnt)[ldn].tsb.dev_status, ld(shpnt)[ldn].tsb.cmd_status, ld(shpnt)[ldn].tsb.dev_error, ld(shpnt)[ldn].tsb.cmd_error);
+ #endif
+ /*if this is end of media read/write, may turn off PS/2 disk led */
+- if ((ld(ihost_index)[ldn].device_type != TYPE_NO_LUN) && (ld(ihost_index)[ldn].device_type != TYPE_NO_DEVICE)) {
++ if ((ld(shpnt)[ldn].device_type != TYPE_NO_LUN) && (ld(shpnt)[ldn].device_type != TYPE_NO_DEVICE)) {
+ /* only access this, if there was a valid device addressed */
+ if (--disk_rw_in_progress == 0)
+ PS2_DISK_LED_OFF();
+@@ -693,8 +700,8 @@
+ * adapters do not support CMD_TERMINATED, TASK_SET_FULL and
+ * ACA_ACTIVE as returning statusbyte information. (ML) */
+ if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) {
+- cmd->result = (unsigned char) (ld(ihost_index)[ldn].tsb.dev_status & 0x1e);
+- IBM_DS(ihost_index).total_errors++;
++ cmd->result = (unsigned char) (ld(shpnt)[ldn].tsb.dev_status & 0x1e);
++ IBM_DS(shpnt).total_errors++;
+ } else
+ cmd->result = 0;
+ /* write device status into cmd->result, and call done function */
+@@ -705,24 +712,25 @@
+ cmd->result |= DID_OK << 16;
+ if (cmd->scsi_done)
+ (cmd->scsi_done) (cmd);
+- spin_unlock(dev->host_lock);
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+-static void issue_cmd(int host_index, unsigned long cmd_reg, unsigned char attn_reg)
++static void issue_cmd(struct Scsi_Host *shpnt, unsigned long cmd_reg,
++ unsigned char attn_reg)
+ {
+ unsigned long flags;
+ /* must wait for attention reg not busy */
+ while (1) {
+- spin_lock_irqsave(hosts[host_index]->host_lock, flags);
+- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++ spin_lock_irqsave(shpnt->host_lock, flags);
++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ break;
+- spin_unlock_irqrestore(hosts[host_index]->host_lock, flags);
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ }
+ /* write registers and enable system interrupts */
+- outl(cmd_reg, IM_CMD_REG(host_index));
+- outb(attn_reg, IM_ATTN_REG(host_index));
+- spin_unlock_irqrestore(hosts[host_index]->host_lock, flags);
++ outl(cmd_reg, IM_CMD_REG(shpnt));
++ outb(attn_reg, IM_ATTN_REG(shpnt));
++ spin_unlock_irqrestore(shpnt->host_lock, flags);
+ }
+
+ static void internal_done(Scsi_Cmnd * cmd)
+@@ -732,34 +740,34 @@
+ }
+
+ /* SCSI-SCB-command for device_inquiry */
+-static int device_inquiry(int host_index, int ldn)
++static int device_inquiry(struct Scsi_Host *shpnt, int ldn)
+ {
+ int retr;
+ struct im_scb *scb;
+ struct im_tsb *tsb;
+ unsigned char *buf;
+
+- scb = &(ld(host_index)[ldn].scb);
+- tsb = &(ld(host_index)[ldn].tsb);
+- buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
+- ld(host_index)[ldn].tsb.dev_status = 0; /* prepare statusblock */
++ scb = &(ld(shpnt)[ldn].scb);
++ tsb = &(ld(shpnt)[ldn].tsb);
++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
++ ld(shpnt)[ldn].tsb.dev_status = 0; /* prepare statusblock */
+ for (retr = 0; retr < 3; retr++) {
+ /* fill scb with inquiry command */
+ scb->command = IM_DEVICE_INQUIRY_CMD | IM_NO_DISCONNECT;
+ scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+- last_scsi_command(host_index)[ldn] = IM_DEVICE_INQUIRY_CMD;
+- last_scsi_type(host_index)[ldn] = IM_SCB;
++ last_scsi_command(shpnt)[ldn] = IM_DEVICE_INQUIRY_CMD;
++ last_scsi_type(shpnt)[ldn] = IM_SCB;
+ scb->sys_buf_adr = isa_virt_to_bus(buf);
+ scb->sys_buf_length = 255; /* maximum bufferlength gives max info */
+ scb->tsb_adr = isa_virt_to_bus(tsb);
+ /* issue scb to passed ldn, and busy wait for interrupt */
+- got_interrupt(host_index) = 0;
+- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+- while (!got_interrupt(host_index))
++ got_interrupt(shpnt) = 0;
++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++ while (!got_interrupt(shpnt))
+ barrier();
+
+ /*if command successful, break */
+- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ return 1;
+ }
+ /*if all three retries failed, return "no device at this ldn" */
+@@ -769,34 +777,34 @@
+ return 1;
+ }
+
+-static int read_capacity(int host_index, int ldn)
++static int read_capacity(struct Scsi_Host *shpnt, int ldn)
+ {
+ int retr;
+ struct im_scb *scb;
+ struct im_tsb *tsb;
+ unsigned char *buf;
+
+- scb = &(ld(host_index)[ldn].scb);
+- tsb = &(ld(host_index)[ldn].tsb);
+- buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
+- ld(host_index)[ldn].tsb.dev_status = 0;
++ scb = &(ld(shpnt)[ldn].scb);
++ tsb = &(ld(shpnt)[ldn].tsb);
++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
++ ld(shpnt)[ldn].tsb.dev_status = 0;
+ for (retr = 0; retr < 3; retr++) {
+ /*fill scb with read capacity command */
+ scb->command = IM_READ_CAPACITY_CMD;
+ scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+- last_scsi_command(host_index)[ldn] = IM_READ_CAPACITY_CMD;
+- last_scsi_type(host_index)[ldn] = IM_SCB;
++ last_scsi_command(shpnt)[ldn] = IM_READ_CAPACITY_CMD;
++ last_scsi_type(shpnt)[ldn] = IM_SCB;
+ scb->sys_buf_adr = isa_virt_to_bus(buf);
+ scb->sys_buf_length = 8;
+ scb->tsb_adr = isa_virt_to_bus(tsb);
+ /*issue scb to passed ldn, and busy wait for interrupt */
+- got_interrupt(host_index) = 0;
+- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+- while (!got_interrupt(host_index))
++ got_interrupt(shpnt) = 0;
++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++ while (!got_interrupt(shpnt))
+ barrier();
+
+ /*if got capacity, get block length and return one device found */
+- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ return 1;
+ }
+ /*if all three retries failed, return "no device at this ldn" */
+@@ -806,39 +814,39 @@
+ return 1;
+ }
+
+-static int get_pos_info(int host_index)
++static int get_pos_info(struct Scsi_Host *shpnt)
+ {
+ int retr;
+ struct im_scb *scb;
+ struct im_tsb *tsb;
+ unsigned char *buf;
+
+- scb = &(ld(host_index)[MAX_LOG_DEV].scb);
+- tsb = &(ld(host_index)[MAX_LOG_DEV].tsb);
+- buf = (unsigned char *) (&(ld(host_index)[MAX_LOG_DEV].buf));
+- ld(host_index)[MAX_LOG_DEV].tsb.dev_status = 0;
++ scb = &(ld(shpnt)[MAX_LOG_DEV].scb);
++ tsb = &(ld(shpnt)[MAX_LOG_DEV].tsb);
++ buf = (unsigned char *) (&(ld(shpnt)[MAX_LOG_DEV].buf));
++ ld(shpnt)[MAX_LOG_DEV].tsb.dev_status = 0;
+ for (retr = 0; retr < 3; retr++) {
+ /*fill scb with get_pos_info command */
+ scb->command = IM_GET_POS_INFO_CMD;
+ scb->enable = IM_READ_CONTROL | IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD;
+- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_SCB;
++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD;
++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_SCB;
+ scb->sys_buf_adr = isa_virt_to_bus(buf);
+- if (special(host_index) == IBM_SCSI2_FW)
++ if (special(shpnt) == IBM_SCSI2_FW)
+ scb->sys_buf_length = 256; /* get all info from F/W adapter */
+ else
+ scb->sys_buf_length = 18; /* get exactly 18 bytes for other SCSI */
+ scb->tsb_adr = isa_virt_to_bus(tsb);
+ /*issue scb to ldn=15, and busy wait for interrupt */
+- got_interrupt(host_index) = 0;
+- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV);
++ got_interrupt(shpnt) = 0;
++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV);
+
+ /* FIXME: timeout */
+- while (!got_interrupt(host_index))
++ while (!got_interrupt(shpnt))
+ barrier();
+
+ /*if got POS-stuff, get block length and return one device found */
+- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ return 1;
+ }
+ /* if all three retries failed, return "no device at this ldn" */
+@@ -851,14 +859,16 @@
+ /* SCSI-immediate-command for assign. This functions maps/unmaps specific
+ ldn-numbers on SCSI (PUN,LUN). It is needed for presetting of the
+ subsystem and for dynamical remapping od ldns. */
+-static int immediate_assign(int host_index, unsigned int pun, unsigned int lun, unsigned int ldn, unsigned int operation)
++static int immediate_assign(struct Scsi_Host *shpnt, unsigned int pun,
++ unsigned int lun, unsigned int ldn,
++ unsigned int operation)
+ {
+ int retr;
+ unsigned long imm_cmd;
+
+ for (retr = 0; retr < 3; retr++) {
+ /* select mutation level of the SCSI-adapter */
+- switch (special(host_index)) {
++ switch (special(shpnt)) {
+ case IBM_SCSI2_FW:
+ imm_cmd = (unsigned long) (IM_ASSIGN_IMM_CMD);
+ imm_cmd |= (unsigned long) ((lun & 7) << 24);
+@@ -867,7 +877,7 @@
+ imm_cmd |= (unsigned long) ((ldn & 15) << 16);
+ break;
+ default:
+- imm_cmd = inl(IM_CMD_REG(host_index));
++ imm_cmd = inl(IM_CMD_REG(shpnt));
+ imm_cmd &= (unsigned long) (0xF8000000); /* keep reserved bits */
+ imm_cmd |= (unsigned long) (IM_ASSIGN_IMM_CMD);
+ imm_cmd |= (unsigned long) ((lun & 7) << 24);
+@@ -876,15 +886,15 @@
+ imm_cmd |= (unsigned long) ((ldn & 15) << 16);
+ break;
+ }
+- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD;
+- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD;
+- got_interrupt(host_index) = 0;
+- issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
+- while (!got_interrupt(host_index))
++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD;
++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD;
++ got_interrupt(shpnt) = 0;
++ issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
++ while (!got_interrupt(shpnt))
+ barrier();
+
+ /*if command successful, break */
+- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ return 1;
+ }
+ if (retr >= 3)
+@@ -893,7 +903,7 @@
+ return 1;
+ }
+
+-static int immediate_feature(int host_index, unsigned int speed, unsigned int timeout)
++static int immediate_feature(struct Scsi_Host *shpnt, unsigned int speed, unsigned int timeout)
+ {
+ int retr;
+ unsigned long imm_cmd;
+@@ -903,16 +913,16 @@
+ imm_cmd = IM_FEATURE_CTR_IMM_CMD;
+ imm_cmd |= (unsigned long) ((speed & 0x7) << 29);
+ imm_cmd |= (unsigned long) ((timeout & 0x1fff) << 16);
+- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD;
+- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD;
+- got_interrupt(host_index) = 0;
++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD;
++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD;
++ got_interrupt(shpnt) = 0;
+ /* we need to run into command errors in order to probe for the
+ * right speed! */
+ global_command_error_excuse = 1;
+- issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
++ issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
+
+ /* FIXME: timeout */
+- while (!got_interrupt(host_index))
++ while (!got_interrupt(shpnt))
+ barrier();
+ if (global_command_error_excuse == CMD_FAIL) {
+ global_command_error_excuse = 0;
+@@ -920,7 +930,7 @@
+ } else
+ global_command_error_excuse = 0;
+ /*if command successful, break */
+- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ return 1;
+ }
+ if (retr >= 3)
+@@ -930,35 +940,35 @@
+ }
+
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+-static int immediate_reset(int host_index, unsigned int ldn)
++static int immediate_reset(struct Scsi_Host *shpnt, unsigned int ldn)
+ {
+ int retries;
+ int ticks;
+ unsigned long imm_command;
+
+ for (retries = 0; retries < 3; retries++) {
+- imm_command = inl(IM_CMD_REG(host_index));
++ imm_command = inl(IM_CMD_REG(shpnt));
+ imm_command &= (unsigned long) (0xFFFF0000); /* keep reserved bits */
+ imm_command |= (unsigned long) (IM_RESET_IMM_CMD);
+- last_scsi_command(host_index)[ldn] = IM_RESET_IMM_CMD;
+- last_scsi_type(host_index)[ldn] = IM_IMM_CMD;
+- got_interrupt(host_index) = 0;
+- reset_status(host_index) = IM_RESET_IN_PROGRESS;
+- issue_cmd(host_index, (unsigned long) (imm_command), IM_IMM_CMD | ldn);
++ last_scsi_command(shpnt)[ldn] = IM_RESET_IMM_CMD;
++ last_scsi_type(shpnt)[ldn] = IM_IMM_CMD;
++ got_interrupt(shpnt) = 0;
++ reset_status(shpnt) = IM_RESET_IN_PROGRESS;
++ issue_cmd(shpnt, (unsigned long) (imm_command), IM_IMM_CMD | ldn);
+ ticks = IM_RESET_DELAY * HZ;
+- while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks) {
++ while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks) {
+ udelay((1 + 999 / HZ) * 1000);
+ barrier();
+ }
+ /* if reset did not complete, just complain */
+ if (!ticks) {
+ printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY);
+- reset_status(host_index) = IM_RESET_FINISHED_OK;
++ reset_status(shpnt) = IM_RESET_FINISHED_OK;
+ /* did not work, finish */
+ return 1;
+ }
+ /*if command successful, break */
+- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ return 1;
+ }
+ if (retries >= 3)
+@@ -1060,35 +1070,35 @@
+ return 0;
+ }
+
+-static int probe_bus_mode(int host_index)
++static int probe_bus_mode(struct Scsi_Host *shpnt)
+ {
+ struct im_pos_info *info;
+ int num_bus = 0;
+ int ldn;
+
+- info = (struct im_pos_info *) (&(ld(host_index)[MAX_LOG_DEV].buf));
+- if (get_pos_info(host_index)) {
++ info = (struct im_pos_info *) (&(ld(shpnt)[MAX_LOG_DEV].buf));
++ if (get_pos_info(shpnt)) {
+ if (info->connector_size & 0xf000)
+- subsystem_connector_size(host_index) = 16;
++ subsystem_connector_size(shpnt) = 16;
+ else
+- subsystem_connector_size(host_index) = 32;
++ subsystem_connector_size(shpnt) = 32;
+ num_bus |= (info->pos_4b & 8) >> 3;
+ for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+- if ((special(host_index) == IBM_SCSI_WCACHE) || (special(host_index) == IBM_7568_WCACHE)) {
++ if ((special(shpnt) == IBM_SCSI_WCACHE) || (special(shpnt) == IBM_7568_WCACHE)) {
+ if (!((info->cache_stat >> ldn) & 1))
+- ld(host_index)[ldn].cache_flag = 0;
++ ld(shpnt)[ldn].cache_flag = 0;
+ }
+ if (!((info->retry_stat >> ldn) & 1))
+- ld(host_index)[ldn].retry_flag = 0;
++ ld(shpnt)[ldn].retry_flag = 0;
+ }
+ #ifdef IM_DEBUG_PROBE
+ printk("IBM MCA SCSI: SCSI-Cache bits: ");
+ for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+- printk("%d", ld(host_index)[ldn].cache_flag);
++ printk("%d", ld(shpnt)[ldn].cache_flag);
+ }
+ printk("\nIBM MCA SCSI: SCSI-Retry bits: ");
+ for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+- printk("%d", ld(host_index)[ldn].retry_flag);
++ printk("%d", ld(shpnt)[ldn].retry_flag);
+ }
+ printk("\n");
+ #endif
+@@ -1097,7 +1107,7 @@
+ }
+
+ /* probing scsi devices */
+-static void check_devices(int host_index, int adaptertype)
++static void check_devices(struct Scsi_Host *shpnt, int adaptertype)
+ {
+ int id, lun, ldn, ticks;
+ int count_devices; /* local counter for connected device */
+@@ -1108,24 +1118,24 @@
+ /* assign default values to certain variables */
+ ticks = 0;
+ count_devices = 0;
+- IBM_DS(host_index).dyn_flag = 0; /* normally no need for dynamical ldn management */
+- IBM_DS(host_index).total_errors = 0; /* set errorcounter to 0 */
+- next_ldn(host_index) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */
++ IBM_DS(shpnt).dyn_flag = 0; /* normally no need for dynamical ldn management */
++ IBM_DS(shpnt).total_errors = 0; /* set errorcounter to 0 */
++ next_ldn(shpnt) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */
+
+ /* initialize the very important driver-informational arrays/structs */
+- memset(ld(host_index), 0, sizeof(ld(host_index)));
++ memset(ld(shpnt), 0, sizeof(ld(shpnt)));
+ for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+- last_scsi_command(host_index)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */
+- last_scsi_type(host_index)[ldn] = 0;
+- ld(host_index)[ldn].cache_flag = 1;
+- ld(host_index)[ldn].retry_flag = 1;
++ last_scsi_command(shpnt)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */
++ last_scsi_type(shpnt)[ldn] = 0;
++ ld(shpnt)[ldn].cache_flag = 1;
++ ld(shpnt)[ldn].retry_flag = 1;
+ }
+- memset(get_ldn(host_index), TYPE_NO_DEVICE, sizeof(get_ldn(host_index))); /* this is essential ! */
+- memset(get_scsi(host_index), TYPE_NO_DEVICE, sizeof(get_scsi(host_index))); /* this is essential ! */
++ memset(get_ldn(shpnt), TYPE_NO_DEVICE, sizeof(get_ldn(shpnt))); /* this is essential ! */
++ memset(get_scsi(shpnt), TYPE_NO_DEVICE, sizeof(get_scsi(shpnt))); /* this is essential ! */
+ for (lun = 0; lun < 8; lun++) {
+ /* mark the adapter at its pun on all luns */
+- get_scsi(host_index)[subsystem_pun(host_index)][lun] = TYPE_IBM_SCSI_ADAPTER;
+- get_ldn(host_index)[subsystem_pun(host_index)][lun] = MAX_LOG_DEV; /* make sure, the subsystem
++ get_scsi(shpnt)[subsystem_pun(shpnt)][lun] = TYPE_IBM_SCSI_ADAPTER;
++ get_ldn(shpnt)[subsystem_pun(shpnt)][lun] = MAX_LOG_DEV; /* make sure, the subsystem
+ ldn is active for all
+ luns. */
+ }
+@@ -1134,9 +1144,9 @@
+ /* monitor connected on model XX95. */
+
+ /* STEP 1: */
+- adapter_speed(host_index) = global_adapter_speed;
+- speedrun = adapter_speed(host_index);
+- while (immediate_feature(host_index, speedrun, adapter_timeout) == 2) {
++ adapter_speed(shpnt) = global_adapter_speed;
++ speedrun = adapter_speed(shpnt);
++ while (immediate_feature(shpnt, speedrun, adapter_timeout) == 2) {
+ probe_display(1);
+ if (speedrun == 7)
+ panic("IBM MCA SCSI: Cannot set Synchronous-Transfer-Rate!\n");
+@@ -1144,30 +1154,30 @@
+ if (speedrun > 7)
+ speedrun = 7;
+ }
+- adapter_speed(host_index) = speedrun;
++ adapter_speed(shpnt) = speedrun;
+ /* Get detailed information about the current adapter, necessary for
+ * device operations: */
+- num_bus = probe_bus_mode(host_index);
++ num_bus = probe_bus_mode(shpnt);
+
+ /* num_bus contains only valid data for the F/W adapter! */
+ if (adaptertype == IBM_SCSI2_FW) { /* F/W SCSI adapter: */
+ /* F/W adapter PUN-space extension evaluation: */
+ if (num_bus) {
+ printk(KERN_INFO "IBM MCA SCSI: Separate bus mode (wide-addressing enabled)\n");
+- subsystem_maxid(host_index) = 16;
++ subsystem_maxid(shpnt) = 16;
+ } else {
+ printk(KERN_INFO "IBM MCA SCSI: Combined bus mode (wide-addressing disabled)\n");
+- subsystem_maxid(host_index) = 8;
++ subsystem_maxid(shpnt) = 8;
+ }
+ printk(KERN_INFO "IBM MCA SCSI: Sync.-Rate (F/W: 20, Int.: 10, Ext.: %s) MBytes/s\n", ibmrate(speedrun, adaptertype));
+ } else /* all other IBM SCSI adapters: */
+ printk(KERN_INFO "IBM MCA SCSI: Synchronous-SCSI-Transfer-Rate: %s MBytes/s\n", ibmrate(speedrun, adaptertype));
+
+ /* assign correct PUN device space */
+- max_pun = subsystem_maxid(host_index);
++ max_pun = subsystem_maxid(shpnt);
+
+ #ifdef IM_DEBUG_PROBE
+- printk("IBM MCA SCSI: Current SCSI-host index: %d\n", host_index);
++ printk("IBM MCA SCSI: Current SCSI-host index: %d\n", shpnt);
+ printk("IBM MCA SCSI: Removing default logical SCSI-device mapping.");
+ #else
+ printk(KERN_INFO "IBM MCA SCSI: Dev. Order: %s, Mapping (takes <2min): ", (ibm_ansi_order) ? "ANSI" : "New");
+@@ -1177,7 +1187,7 @@
+ #ifdef IM_DEBUG_PROBE
+ printk(".");
+ #endif
+- immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */
++ immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */
+ }
+ lun = 0; /* default lun is 0 */
+ #ifndef IM_DEBUG_PROBE
+@@ -1196,18 +1206,18 @@
+ #ifdef IM_DEBUG_PROBE
+ printk(".");
+ #endif
+- if (id != subsystem_pun(host_index)) {
++ if (id != subsystem_pun(shpnt)) {
+ /* if pun is not the adapter: */
+ /* set ldn=0 to pun,lun */
+- immediate_assign(host_index, id, lun, PROBE_LDN, SET_LDN);
+- if (device_inquiry(host_index, PROBE_LDN)) { /* probe device */
+- get_scsi(host_index)[id][lun] = (unsigned char) (ld(host_index)[PROBE_LDN].buf[0]);
++ immediate_assign(shpnt, id, lun, PROBE_LDN, SET_LDN);
++ if (device_inquiry(shpnt, PROBE_LDN)) { /* probe device */
++ get_scsi(shpnt)[id][lun] = (unsigned char) (ld(shpnt)[PROBE_LDN].buf[0]);
+ /* entry, even for NO_LUN */
+- if (ld(host_index)[PROBE_LDN].buf[0] != TYPE_NO_LUN)
++ if (ld(shpnt)[PROBE_LDN].buf[0] != TYPE_NO_LUN)
+ count_devices++; /* a existing device is found */
+ }
+ /* remove ldn */
+- immediate_assign(host_index, id, lun, PROBE_LDN, REMOVE_LDN);
++ immediate_assign(shpnt, id, lun, PROBE_LDN, REMOVE_LDN);
+ }
+ }
+ #ifndef IM_DEBUG_PROBE
+@@ -1227,16 +1237,16 @@
+ #ifdef IM_DEBUG_PROBE
+ printk(".");
+ #endif
+- if (id != subsystem_pun(host_index)) {
+- if (get_scsi(host_index)[id][lun] != TYPE_NO_LUN && get_scsi(host_index)[id][lun] != TYPE_NO_DEVICE) {
++ if (id != subsystem_pun(shpnt)) {
++ if (get_scsi(shpnt)[id][lun] != TYPE_NO_LUN && get_scsi(shpnt)[id][lun] != TYPE_NO_DEVICE) {
+ /* Only map if accepted type. Always enter for
+ lun == 0 to get no gaps into ldn-mapping for ldn<7. */
+- immediate_assign(host_index, id, lun, ldn, SET_LDN);
+- get_ldn(host_index)[id][lun] = ldn; /* map ldn */
+- if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) {
++ immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++ get_ldn(shpnt)[id][lun] = ldn; /* map ldn */
++ if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) {
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+ printk("resetting device at ldn=%x ... ", ldn);
+- immediate_reset(host_index, ldn);
++ immediate_reset(shpnt, ldn);
+ #endif
+ ldn++;
+ } else {
+@@ -1244,15 +1254,15 @@
+ * handle it or because it has problems */
+ if (lun > 0) {
+ /* remove mapping */
+- get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE;
+- immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN);
++ get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE;
++ immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN);
+ } else
+ ldn++;
+ }
+ } else if (lun == 0) {
+ /* map lun == 0, even if no device exists */
+- immediate_assign(host_index, id, lun, ldn, SET_LDN);
+- get_ldn(host_index)[id][lun] = ldn; /* map ldn */
++ immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++ get_ldn(shpnt)[id][lun] = ldn; /* map ldn */
+ ldn++;
+ }
+ }
+@@ -1262,14 +1272,14 @@
+ /* map remaining ldns to non-existing devices */
+ for (lun = 1; lun < 8 && ldn < MAX_LOG_DEV; lun++)
+ for (id = 0; id < max_pun && ldn < MAX_LOG_DEV; id++) {
+- if (get_scsi(host_index)[id][lun] == TYPE_NO_LUN || get_scsi(host_index)[id][lun] == TYPE_NO_DEVICE) {
++ if (get_scsi(shpnt)[id][lun] == TYPE_NO_LUN || get_scsi(shpnt)[id][lun] == TYPE_NO_DEVICE) {
+ probe_display(1);
+ /* Map remaining ldns only to NON-existing pun,lun
+ combinations to make sure an inquiry will fail.
+ For MULTI_LUN, it is needed to avoid adapter autonome
+ SCSI-remapping. */
+- immediate_assign(host_index, id, lun, ldn, SET_LDN);
+- get_ldn(host_index)[id][lun] = ldn;
++ immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++ get_ldn(shpnt)[id][lun] = ldn;
+ ldn++;
+ }
+ }
+@@ -1292,51 +1302,51 @@
+ for (id = 0; id < max_pun; id++) {
+ printk("%2d ", id);
+ for (lun = 0; lun < 8; lun++)
+- printk("%2s ", ti_p(get_scsi(host_index)[id][lun]));
++ printk("%2s ", ti_p(get_scsi(shpnt)[id][lun]));
+ printk(" %2d ", id);
+ for (lun = 0; lun < 8; lun++)
+- printk("%2s ", ti_l(get_ldn(host_index)[id][lun]));
++ printk("%2s ", ti_l(get_ldn(shpnt)[id][lun]));
+ printk("\n");
+ }
+ #endif
+
+ /* assign total number of found SCSI-devices to the statistics struct */
+- IBM_DS(host_index).total_scsi_devices = count_devices;
++ IBM_DS(shpnt).total_scsi_devices = count_devices;
+
+ /* decide for output in /proc-filesystem, if the configuration of
+ SCSI-devices makes dynamical reassignment of devices necessary */
+ if (count_devices >= MAX_LOG_DEV)
+- IBM_DS(host_index).dyn_flag = 1; /* dynamical assignment is necessary */
++ IBM_DS(shpnt).dyn_flag = 1; /* dynamical assignment is necessary */
+ else
+- IBM_DS(host_index).dyn_flag = 0; /* dynamical assignment is not necessary */
++ IBM_DS(shpnt).dyn_flag = 0; /* dynamical assignment is not necessary */
+
+ /* If no SCSI-devices are assigned, return 1 in order to cause message. */
+ if (ldn == 0)
+ printk("IBM MCA SCSI: Warning: No SCSI-devices found/assigned!\n");
+
+ /* reset the counters for statistics on the current adapter */
+- IBM_DS(host_index).scbs = 0;
+- IBM_DS(host_index).long_scbs = 0;
+- IBM_DS(host_index).total_accesses = 0;
+- IBM_DS(host_index).total_interrupts = 0;
+- IBM_DS(host_index).dynamical_assignments = 0;
+- memset(IBM_DS(host_index).ldn_access, 0x0, sizeof(IBM_DS(host_index).ldn_access));
+- memset(IBM_DS(host_index).ldn_read_access, 0x0, sizeof(IBM_DS(host_index).ldn_read_access));
+- memset(IBM_DS(host_index).ldn_write_access, 0x0, sizeof(IBM_DS(host_index).ldn_write_access));
+- memset(IBM_DS(host_index).ldn_inquiry_access, 0x0, sizeof(IBM_DS(host_index).ldn_inquiry_access));
+- memset(IBM_DS(host_index).ldn_modeselect_access, 0x0, sizeof(IBM_DS(host_index).ldn_modeselect_access));
+- memset(IBM_DS(host_index).ldn_assignments, 0x0, sizeof(IBM_DS(host_index).ldn_assignments));
++ IBM_DS(shpnt).scbs = 0;
++ IBM_DS(shpnt).long_scbs = 0;
++ IBM_DS(shpnt).total_accesses = 0;
++ IBM_DS(shpnt).total_interrupts = 0;
++ IBM_DS(shpnt).dynamical_assignments = 0;
++ memset(IBM_DS(shpnt).ldn_access, 0x0, sizeof(IBM_DS(shpnt).ldn_access));
++ memset(IBM_DS(shpnt).ldn_read_access, 0x0, sizeof(IBM_DS(shpnt).ldn_read_access));
++ memset(IBM_DS(shpnt).ldn_write_access, 0x0, sizeof(IBM_DS(shpnt).ldn_write_access));
++ memset(IBM_DS(shpnt).ldn_inquiry_access, 0x0, sizeof(IBM_DS(shpnt).ldn_inquiry_access));
++ memset(IBM_DS(shpnt).ldn_modeselect_access, 0x0, sizeof(IBM_DS(shpnt).ldn_modeselect_access));
++ memset(IBM_DS(shpnt).ldn_assignments, 0x0, sizeof(IBM_DS(shpnt).ldn_assignments));
+ probe_display(0);
+ return;
+ }
+
+-static int device_exists(int host_index, int ldn, int *block_length, int *device_type)
++static int device_exists(struct Scsi_Host *shpnt, int ldn, int *block_length, int *device_type)
+ {
+ unsigned char *buf;
+ /* if no valid device found, return immediately with 0 */
+- if (!(device_inquiry(host_index, ldn)))
++ if (!(device_inquiry(shpnt, ldn)))
+ return 0;
+- buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
+ if (*buf == TYPE_ROM) {
+ *device_type = TYPE_ROM;
+ *block_length = 2048; /* (standard blocksize for yellow-/red-book) */
+@@ -1349,7 +1359,7 @@
+ }
+ if (*buf == TYPE_DISK) {
+ *device_type = TYPE_DISK;
+- if (read_capacity(host_index, ldn)) {
++ if (read_capacity(shpnt, ldn)) {
+ *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24);
+ return 1;
+ } else
+@@ -1357,7 +1367,7 @@
+ }
+ if (*buf == TYPE_MOD) {
+ *device_type = TYPE_MOD;
+- if (read_capacity(host_index, ldn)) {
++ if (read_capacity(shpnt, ldn)) {
+ *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24);
+ return 1;
+ } else
+@@ -1430,6 +1440,9 @@
+ return;
+ }
+
++#if 0
++ FIXME NEED TO MOVE TO SYSFS
++
+ static int ibmmca_getinfo(char *buf, int slot, void *dev_id)
+ {
+ struct Scsi_Host *shpnt;
+@@ -1480,58 +1493,34 @@
+
+ return len;
+ }
++#endif
+
+-int ibmmca_detect(struct scsi_host_template * scsi_template)
++static struct scsi_host_template ibmmca_driver_template = {
++ .proc_name = "ibmmca",
++ .proc_info = ibmmca_proc_info,
++ .name = "IBM SCSI-Subsystem",
++ .queuecommand = ibmmca_queuecommand,
++ .eh_abort_handler = ibmmca_abort,
++ .eh_host_reset_handler = ibmmca_host_reset,
++ .bios_param = ibmmca_biosparam,
++ .can_queue = 16,
++ .this_id = 7,
++ .sg_tablesize = 16,
++ .cmd_per_lun = 1,
++ .use_clustering = ENABLE_CLUSTERING,
++};
++
++static int ibmmca_probe(struct device *dev)
+ {
+ struct Scsi_Host *shpnt;
+- int port, id, i, j, k, slot;
+- int devices_on_irq_11 = 0;
+- int devices_on_irq_14 = 0;
+- int IRQ14_registered = 0;
+- int IRQ11_registered = 0;
+-
+- found = 0; /* make absolutely sure, that found is set to 0 */
++ int port, id, i, j, k, irq, enabled, ret = -EINVAL;
++ struct mca_device *mca_dev = to_mca_device(dev);
++ const char *description = ibmmca_description[mca_dev->index];
+
+ /* First of all, print the version number of the driver. This is
+ * important to allow better user bugreports in case of already
+ * having problems with the MCA_bus probing. */
+ printk(KERN_INFO "IBM MCA SCSI: Version %s\n", IBMMCA_SCSI_DRIVER_VERSION);
+- /* if this is not MCA machine, return "nothing found" */
+- if (!MCA_bus) {
+- printk(KERN_INFO "IBM MCA SCSI: No Microchannel-bus present --> Aborting.\n" " This machine does not have any IBM MCA-bus\n" " or the MCA-Kernel-support is not enabled!\n");
+- return 0;
+- }
+-
+-#ifdef MODULE
+- /* If the driver is run as module, read from conf.modules or cmd-line */
+- if (boot_options)
+- option_setup(boot_options);
+-#endif
+-
+- /* get interrupt request level */
+- if (request_irq(IM_IRQ, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) {
+- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ);
+- return 0;
+- } else
+- IRQ14_registered++;
+-
+- /* if ibmmcascsi setup option was passed to kernel, return "found" */
+- for (i = 0; i < IM_MAX_HOSTS; i++)
+- if (io_port[i] > 0 && scsi_id[i] >= 0 && scsi_id[i] < 8) {
+- printk("IBM MCA SCSI: forced detected SCSI Adapter, io=0x%x, scsi id=%d.\n", io_port[i], scsi_id[i]);
+- if ((shpnt = ibmmca_register(scsi_template, io_port[i], scsi_id[i], FORCED_DETECTION, "forced detected SCSI Adapter"))) {
+- for (k = 2; k < 7; k++)
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = 0;
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = FORCED_DETECTION;
+- mca_set_adapter_name(MCA_INTEGSCSI, "forced detected SCSI Adapter");
+- mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+- mca_mark_as_used(MCA_INTEGSCSI);
+- devices_on_irq_14++;
+- }
+- }
+- if (found)
+- return found;
+-
+ /* The POS2-register of all PS/2 model SCSI-subsystems has the following
+ * interpretation of bits:
+ * Bit 7 - 4 : Chip Revision ID (Release)
+@@ -1558,7 +1547,14 @@
+
+ /* first look for the IBM SCSI integrated subsystem on the motherboard */
+ for (j = 0; j < 8; j++) /* read the pos-information */
+- pos[j] = mca_read_stored_pos(MCA_INTEGSCSI, j);
++ pos[j] = mca_device_read_pos(mca_dev, j);
++ id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */
++ enabled = (pos[2] &0x01);
++ if (!enabled) {
++ printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
++ printk(KERN_WARNING " SCSI-operations may not work.\n");
++ }
++
+ /* pos2 = pos3 = 0xff if there is no integrated SCSI-subsystem present, but
+ * if we ignore the settings of all surrounding pos registers, it is not
+ * completely sufficient to only check pos2 and pos3. */
+@@ -1566,232 +1562,137 @@
+ * make sure, we see a real integrated onboard SCSI-interface and no
+ * internal system information, which gets mapped to some pos registers
+ * on models 95xx. */
+- if ((!pos[0] && !pos[1] && pos[2] > 0 && pos[3] > 0 && !pos[4] && !pos[5] && !pos[6] && !pos[7]) || (pos[0] == 0xff && pos[1] == 0xff && pos[2] < 0xff && pos[3] < 0xff && pos[4] == 0xff && pos[5] == 0xff && pos[6] == 0xff && pos[7] == 0xff)) {
+- if ((pos[2] & 1) == 1) /* is the subsystem chip enabled ? */
++ if (mca_dev->slot == MCA_INTEGSCSI &&
++ ((!pos[0] && !pos[1] && pos[2] > 0 &&
++ pos[3] > 0 && !pos[4] && !pos[5] &&
++ !pos[6] && !pos[7]) ||
++ (pos[0] == 0xff && pos[1] == 0xff &&
++ pos[2] < 0xff && pos[3] < 0xff &&
++ pos[4] == 0xff && pos[5] == 0xff &&
++ pos[6] == 0xff && pos[7] == 0xff))) {
++ irq = IM_IRQ;
+ port = IM_IO_PORT;
+- else { /* if disabled, no IRQs will be generated, as the chip won't
+- * listen to the incoming commands and will do really nothing,
+- * except for listening to the pos-register settings. If this
+- * happens, I need to hugely think about it, as one has to
+- * write something to the MCA-Bus pos register in order to
+- * enable the chip. Normally, IBM-SCSI won't pass the POST,
+- * when the chip is disabled (see IBM tech. ref.). */
+- port = IM_IO_PORT; /* anyway, set the portnumber and warn */
+- printk("IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n" " SCSI-operations may not work.\n");
+- }
+- id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */
+- /* give detailed information on the subsystem. This helps me
+- * additionally during debugging and analyzing bug-reports. */
+- printk(KERN_INFO "IBM MCA SCSI: IBM Integrated SCSI Controller ffound, io=0x%x, scsi id=%d,\n", port, id);
+- printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled.");
+-
+- /* register the found integrated SCSI-subsystem */
+- if ((shpnt = ibmmca_register(scsi_template, port, id, INTEGRATED_SCSI, "IBM Integrated SCSI Controller")))
+- {
+- for (k = 2; k < 7; k++)
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI;
+- mca_set_adapter_name(MCA_INTEGSCSI, "IBM Integrated SCSI Controller");
+- mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+- mca_mark_as_used(MCA_INTEGSCSI);
+- devices_on_irq_14++;
+- }
+- }
+-
+- /* now look for other adapters in MCA slots, */
+- /* determine the number of known IBM-SCSI-subsystem types */
+- /* see the pos[2] dependence to get the adapter port-offset. */
+- for (i = 0; i < ARRAY_SIZE(subsys_list); i++) {
+- /* scan each slot for a fitting adapter id */
+- slot = 0; /* start at slot 0 */
+- while ((slot = mca_find_adapter(subsys_list[i].mca_id, slot))
+- != MCA_NOTFOUND) { /* scan through all slots */
+- for (j = 0; j < 8; j++) /* read the pos-information */
+- pos[j] = mca_read_stored_pos(slot, j);
+- if ((pos[2] & 1) == 1)
+- /* is the subsystem chip enabled ? */
+- /* (explanations see above) */
+- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+- else {
+- /* anyway, set the portnumber and warn */
+- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+- printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
+- printk(KERN_WARNING " SCSI-operations may not work.\n");
+- }
+- if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) {
++ } else {
++ irq = IM_IRQ;
++ port = IM_IO_PORT + ((pos[2] &0x0e) << 2);
++ if ((mca_dev->index == IBM_SCSI2_FW) && (pos[6] != 0)) {
+ printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n");
+ printk(KERN_ERR " Impossible to determine adapter PUN!\n");
+ printk(KERN_ERR " Guessing adapter PUN = 7.\n");
+ id = 7;
+ } else {
+ id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */
+- if (i == IBM_SCSI2_FW) {
++ if (mca_dev->index == IBM_SCSI2_FW) {
+ id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit
+ * for F/W adapters */
+ }
+ }
+- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) {
++ if ((mca_dev->index == IBM_SCSI2_FW) &&
++ (pos[4] & 0x01) && (pos[6] == 0)) {
+ /* IRQ11 is used by SCSI-2 F/W Adapter/A */
+ printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n");
+- /* get interrupt request level */
+- if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) {
+- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW);
+- } else
+- IRQ11_registered++;
++ irq = IM_IRQ_FW;
+ }
+- printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id);
+- if ((pos[2] & 0xf0) == 0xf0)
+- printk(KERN_DEBUG" ROM Addr.=off,");
+- else
+- printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000);
+- printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+-
+- /* register the hostadapter */
+- if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) {
+- for (k = 2; k < 8; k++)
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i;
+- mca_set_adapter_name(slot, subsys_list[i].description);
+- mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+- mca_mark_as_used(slot);
+- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0))
+- devices_on_irq_11++;
+- else
+- devices_on_irq_14++;
+- }
+- slot++; /* advance to next slot */
+- } /* advance to next adapter id in the list of IBM-SCSI-subsystems */
+ }
+
+- /* now check for SCSI-adapters, mapped to the integrated SCSI
+- * area. E.g. a W/Cache in MCA-slot 9(!). Do the check correct here,
+- * as this is a known effect on some models 95xx. */
+- for (i = 0; i < ARRAY_SIZE(subsys_list); i++) {
+- /* scan each slot for a fitting adapter id */
+- slot = mca_find_adapter(subsys_list[i].mca_id, MCA_INTEGSCSI);
+- if (slot != MCA_NOTFOUND) { /* scan through all slots */
+- for (j = 0; j < 8; j++) /* read the pos-information */
+- pos[j] = mca_read_stored_pos(slot, j);
+- if ((pos[2] & 1) == 1) { /* is the subsystem chip enabled ? */
+- /* (explanations see above) */
+- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+- } else { /* anyway, set the portnumber and warn */
+- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+- printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
+- printk(KERN_WARNING " SCSI-operations may not work.\n");
+- }
+- if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) {
+- printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n");
+- printk(KERN_ERR " Impossible to determine adapter PUN!\n");
+- printk(KERN_ERR " Guessing adapter PUN = 7.\n");
+- id = 7;
+- } else {
+- id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */
+- if (i == IBM_SCSI2_FW)
+- id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit
+- * for F/W adapters */
+- }
+- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) {
+- /* IRQ11 is used by SCSI-2 F/W Adapter/A */
+- printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n");
+- /* get interrupt request level */
+- if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts))
+- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW);
+- else
+- IRQ11_registered++;
+- }
+- printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id);
++
++
++ /* give detailed information on the subsystem. This helps me
++ * additionally during debugging and analyzing bug-reports. */
++ printk(KERN_INFO "IBM MCA SCSI: %s found, io=0x%x, scsi id=%d,\n",
++ description, port, id);
++ if (mca_dev->slot == MCA_INTEGSCSI)
++ printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled.");
++ else {
+ if ((pos[2] & 0xf0) == 0xf0)
+ printk(KERN_DEBUG " ROM Addr.=off,");
+ else
+ printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000);
+- printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+
+- /* register the hostadapter */
+- if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) {
+- for (k = 2; k < 7; k++)
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i;
+- mca_set_adapter_name(slot, subsys_list[i].description);
+- mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+- mca_mark_as_used(slot);
+- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0))
+- devices_on_irq_11++;
+- else
+- devices_on_irq_14++;
+- }
+- slot++; /* advance to next slot */
+- } /* advance to next adapter id in the list of IBM-SCSI-subsystems */
++ printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+ }
+- if (IRQ11_registered && !devices_on_irq_11)
+- free_irq(IM_IRQ_FW, hosts); /* no devices on IRQ 11 */
+- if (IRQ14_registered && !devices_on_irq_14)
+- free_irq(IM_IRQ, hosts); /* no devices on IRQ 14 */
+- if (!devices_on_irq_11 && !devices_on_irq_14)
+- printk(KERN_WARNING "IBM MCA SCSI: No IBM SCSI-subsystem adapter attached.\n");
+- return found; /* return the number of found SCSI hosts. Should be 1 or 0. */
+-}
+-
+-static struct Scsi_Host *ibmmca_register(struct scsi_host_template * scsi_template, int port, int id, int adaptertype, char *hostname)
+-{
+- struct Scsi_Host *shpnt;
+- int i, j;
+- unsigned int ctrl;
+
+ /* check I/O region */
+- if (!request_region(port, IM_N_IO_PORT, hostname)) {
++ if (!request_region(port, IM_N_IO_PORT, description)) {
+ printk(KERN_ERR "IBM MCA SCSI: Unable to get I/O region 0x%x-0x%x (%d ports).\n", port, port + IM_N_IO_PORT - 1, IM_N_IO_PORT);
+- return NULL;
++ goto out_fail;
+ }
+
+ /* register host */
+- shpnt = scsi_register(scsi_template, sizeof(struct ibmmca_hostdata));
++ shpnt = scsi_host_alloc(&ibmmca_driver_template,
++ sizeof(struct ibmmca_hostdata));
+ if (!shpnt) {
+ printk(KERN_ERR "IBM MCA SCSI: Unable to register host.\n");
+- release_region(port, IM_N_IO_PORT);
+- return NULL;
++ goto out_release;
++ }
++
++ dev_set_drvdata(dev, shpnt);
++ if(request_irq(irq, interrupt_handler, IRQF_SHARED, description, dev)) {
++ printk(KERN_ERR "IBM MCA SCSI: failed to request interrupt %d\n", irq);
++ goto out_free_host;
+ }
+
+ /* request I/O region */
+- hosts[found] = shpnt; /* add new found hostadapter to the list */
+- special(found) = adaptertype; /* important assignment or else crash! */
+- subsystem_connector_size(found) = 0; /* preset slot-size */
+- shpnt->irq = IM_IRQ; /* assign necessary stuff for the adapter */
++ special(shpnt) = mca_dev->index; /* important assignment or else crash! */
++ subsystem_connector_size(shpnt) = 0; /* preset slot-size */
++ shpnt->irq = irq; /* assign necessary stuff for the adapter */
+ shpnt->io_port = port;
+ shpnt->n_io_port = IM_N_IO_PORT;
+ shpnt->this_id = id;
+ shpnt->max_id = 8; /* 8 PUNs are default */
+ /* now, the SCSI-subsystem is connected to Linux */
+
+- ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */
+ #ifdef IM_DEBUG_PROBE
++ ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */
+ printk("IBM MCA SCSI: Control Register contents: %x, status: %x\n", ctrl, inb(IM_STAT_REG(found)));
+ printk("IBM MCA SCSI: This adapters' POS-registers: ");
+ for (i = 0; i < 8; i++)
+ printk("%x ", pos[i]);
+ printk("\n");
+ #endif
+- reset_status(found) = IM_RESET_NOT_IN_PROGRESS;
++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS;
+
+ for (i = 0; i < 16; i++) /* reset the tables */
+ for (j = 0; j < 8; j++)
+- get_ldn(found)[i][j] = MAX_LOG_DEV;
++ get_ldn(shpnt)[i][j] = MAX_LOG_DEV;
+
+ /* check which logical devices exist */
+ /* after this line, local interrupting is possible: */
+- local_checking_phase_flag(found) = 1;
+- check_devices(found, adaptertype); /* call by value, using the global variable hosts */
+- local_checking_phase_flag(found) = 0;
+- found++; /* now increase index to be prepared for next found subsystem */
++ local_checking_phase_flag(shpnt) = 1;
++ check_devices(shpnt, mca_dev->index); /* call by value, using the global variable hosts */
++ local_checking_phase_flag(shpnt) = 0;
++
+ /* an ibm mca subsystem has been detected */
+- return shpnt;
++
++ for (k = 2; k < 7; k++)
++ ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
++ ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI;
++ mca_device_set_name(mca_dev, description);
++ /* FIXME: NEED TO REPLUMB TO SYSFS
++ mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
++ */
++ mca_device_set_claim(mca_dev, 1);
++ if (scsi_add_host(shpnt, dev)) {
++ dev_printk(KERN_ERR, dev, "IBM MCA SCSI: scsi_add_host failed\n");
++ goto out_free_host;
++ }
++ scsi_scan_host(shpnt);
++
++ return 0;
++ out_free_host:
++ scsi_host_put(shpnt);
++ out_release:
++ release_region(port, IM_N_IO_PORT);
++ out_fail:
++ return ret;
+ }
+
+-static int ibmmca_release(struct Scsi_Host *shpnt)
++static int __devexit ibmmca_remove(struct device *dev)
+ {
++ struct Scsi_Host *shpnt = dev_get_drvdata(dev);
++ scsi_remove_host(shpnt);
+ release_region(shpnt->io_port, shpnt->n_io_port);
+- if (!(--found))
+- free_irq(shpnt->irq, hosts);
++ free_irq(shpnt->irq, dev);
+ return 0;
+ }
+
+@@ -1805,33 +1706,24 @@
+ int current_ldn;
+ int id, lun;
+ int target;
+- int host_index;
+ int max_pun;
+ int i;
+- struct scatterlist *sl;
++ struct scatterlist *sg;
+
+ shpnt = cmd->device->host;
+- /* search for the right hostadapter */
+- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+
+- if (!hosts[host_index]) { /* invalid hostadapter descriptor address */
+- cmd->result = DID_NO_CONNECT << 16;
+- if (done)
+- done(cmd);
+- return 0;
+- }
+- max_pun = subsystem_maxid(host_index);
++ max_pun = subsystem_maxid(shpnt);
+ if (ibm_ansi_order) {
+ target = max_pun - 1 - cmd->device->id;
+- if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index)))
++ if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt)))
+ target--;
+- else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index)))
++ else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt)))
+ target++;
+ } else
+ target = cmd->device->id;
+
+ /* if (target,lun) is NO LUN or not existing at all, return error */
+- if ((get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_DEVICE)) {
++ if ((get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_DEVICE)) {
+ cmd->result = DID_NO_CONNECT << 16;
+ if (done)
+ done(cmd);
+@@ -1839,16 +1731,16 @@
+ }
+
+ /*if (target,lun) unassigned, do further checks... */
+- ldn = get_ldn(host_index)[target][cmd->device->lun];
++ ldn = get_ldn(shpnt)[target][cmd->device->lun];
+ if (ldn >= MAX_LOG_DEV) { /* on invalid ldn do special stuff */
+ if (ldn > MAX_LOG_DEV) { /* dynamical remapping if ldn unassigned */
+- current_ldn = next_ldn(host_index); /* stop-value for one circle */
+- while (ld(host_index)[next_ldn(host_index)].cmd) { /* search for a occupied, but not in */
++ current_ldn = next_ldn(shpnt); /* stop-value for one circle */
++ while (ld(shpnt)[next_ldn(shpnt)].cmd) { /* search for a occupied, but not in */
+ /* command-processing ldn. */
+- next_ldn(host_index)++;
+- if (next_ldn(host_index) >= MAX_LOG_DEV)
+- next_ldn(host_index) = 7;
+- if (current_ldn == next_ldn(host_index)) { /* One circle done ? */
++ next_ldn(shpnt)++;
++ if (next_ldn(shpnt) >= MAX_LOG_DEV)
++ next_ldn(shpnt) = 7;
++ if (current_ldn == next_ldn(shpnt)) { /* One circle done ? */
+ /* no non-processing ldn found */
+ scmd_printk(KERN_WARNING, cmd,
+ "IBM MCA SCSI: Cannot assign SCSI-device dynamically!\n"
+@@ -1864,56 +1756,56 @@
+ /* unmap non-processing ldn */
+ for (id = 0; id < max_pun; id++)
+ for (lun = 0; lun < 8; lun++) {
+- if (get_ldn(host_index)[id][lun] == next_ldn(host_index)) {
+- get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE;
+- get_scsi(host_index)[id][lun] = TYPE_NO_DEVICE;
++ if (get_ldn(shpnt)[id][lun] == next_ldn(shpnt)) {
++ get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE;
++ get_scsi(shpnt)[id][lun] = TYPE_NO_DEVICE;
+ /* unmap entry */
+ }
+ }
+ /* set reduced interrupt_handler-mode for checking */
+- local_checking_phase_flag(host_index) = 1;
++ local_checking_phase_flag(shpnt) = 1;
+ /* map found ldn to pun,lun */
+- get_ldn(host_index)[target][cmd->device->lun] = next_ldn(host_index);
++ get_ldn(shpnt)[target][cmd->device->lun] = next_ldn(shpnt);
+ /* change ldn to the right value, that is now next_ldn */
+- ldn = next_ldn(host_index);
++ ldn = next_ldn(shpnt);
+ /* unassign all ldns (pun,lun,ldn does not matter for remove) */
+- immediate_assign(host_index, 0, 0, 0, REMOVE_LDN);
++ immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN);
+ /* set only LDN for remapped device */
+- immediate_assign(host_index, target, cmd->device->lun, ldn, SET_LDN);
++ immediate_assign(shpnt, target, cmd->device->lun, ldn, SET_LDN);
+ /* get device information for ld[ldn] */
+- if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) {
+- ld(host_index)[ldn].cmd = NULL; /* To prevent panic set 0, because
++ if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) {
++ ld(shpnt)[ldn].cmd = NULL; /* To prevent panic set 0, because
+ devices that were not assigned,
+ should have nothing in progress. */
+- get_scsi(host_index)[target][cmd->device->lun] = ld(host_index)[ldn].device_type;
++ get_scsi(shpnt)[target][cmd->device->lun] = ld(shpnt)[ldn].device_type;
+ /* increase assignment counters for statistics in /proc */
+- IBM_DS(host_index).dynamical_assignments++;
+- IBM_DS(host_index).ldn_assignments[ldn]++;
++ IBM_DS(shpnt).dynamical_assignments++;
++ IBM_DS(shpnt).ldn_assignments[ldn]++;
+ } else
+ /* panic here, because a device, found at boottime has
+ vanished */
+ panic("IBM MCA SCSI: ldn=0x%x, SCSI-device on (%d,%d) vanished!\n", ldn, target, cmd->device->lun);
+ /* unassign again all ldns (pun,lun,ldn does not matter for remove) */
+- immediate_assign(host_index, 0, 0, 0, REMOVE_LDN);
++ immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN);
+ /* remap all ldns, as written in the pun/lun table */
+ lun = 0;
+ #ifdef CONFIG_SCSI_MULTI_LUN
+ for (lun = 0; lun < 8; lun++)
+ #endif
+ for (id = 0; id < max_pun; id++) {
+- if (get_ldn(host_index)[id][lun] <= MAX_LOG_DEV)
+- immediate_assign(host_index, id, lun, get_ldn(host_index)[id][lun], SET_LDN);
++ if (get_ldn(shpnt)[id][lun] <= MAX_LOG_DEV)
++ immediate_assign(shpnt, id, lun, get_ldn(shpnt)[id][lun], SET_LDN);
+ }
+ /* set back to normal interrupt_handling */
+- local_checking_phase_flag(host_index) = 0;
++ local_checking_phase_flag(shpnt) = 0;
+ #ifdef IM_DEBUG_PROBE
+ /* Information on syslog terminal */
+ printk("IBM MCA SCSI: ldn=0x%x dynamically reassigned to (%d,%d).\n", ldn, target, cmd->device->lun);
+ #endif
+ /* increase next_ldn for next dynamical assignment */
+- next_ldn(host_index)++;
+- if (next_ldn(host_index) >= MAX_LOG_DEV)
+- next_ldn(host_index) = 7;
++ next_ldn(shpnt)++;
++ if (next_ldn(shpnt) >= MAX_LOG_DEV)
++ next_ldn(shpnt) = 7;
+ } else { /* wall against Linux accesses to the subsystem adapter */
+ cmd->result = DID_BAD_TARGET << 16;
+ if (done)
+@@ -1923,34 +1815,32 @@
+ }
+
+ /*verify there is no command already in progress for this log dev */
+- if (ld(host_index)[ldn].cmd)
++ if (ld(shpnt)[ldn].cmd)
+ panic("IBM MCA SCSI: cmd already in progress for this ldn.\n");
+
+ /*save done in cmd, and save cmd for the interrupt handler */
+ cmd->scsi_done = done;
+- ld(host_index)[ldn].cmd = cmd;
++ ld(shpnt)[ldn].cmd = cmd;
+
+ /*fill scb information independent of the scsi command */
+- scb = &(ld(host_index)[ldn].scb);
+- ld(host_index)[ldn].tsb.dev_status = 0;
++ scb = &(ld(shpnt)[ldn].scb);
++ ld(shpnt)[ldn].tsb.dev_status = 0;
+ scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE;
+- scb->tsb_adr = isa_virt_to_bus(&(ld(host_index)[ldn].tsb));
++ scb->tsb_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].tsb));
+ scsi_cmd = cmd->cmnd[0];
+
+- if (cmd->use_sg) {
+- i = cmd->use_sg;
+- sl = (struct scatterlist *) (cmd->request_buffer);
+- if (i > 16)
+- panic("IBM MCA SCSI: scatter-gather list too long.\n");
+- while (--i >= 0) {
+- ld(host_index)[ldn].sge[i].address = (void *) (isa_page_to_bus(sl[i].page) + sl[i].offset);
+- ld(host_index)[ldn].sge[i].byte_length = sl[i].length;
++ if (scsi_sg_count(cmd)) {
++ BUG_ON(scsi_sg_count(cmd) > 16);
++
++ scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
++ ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset);
++ ld(shpnt)[ldn].sge[i].byte_length = sg->length;
+ }
+ scb->enable |= IM_POINTER_TO_LIST;
+- scb->sys_buf_adr = isa_virt_to_bus(&(ld(host_index)[ldn].sge[0]));
+- scb->sys_buf_length = cmd->use_sg * sizeof(struct im_sge);
++ scb->sys_buf_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].sge[0]));
++ scb->sys_buf_length = scsi_sg_count(cmd) * sizeof(struct im_sge);
+ } else {
+- scb->sys_buf_adr = isa_virt_to_bus(cmd->request_buffer);
++ scb->sys_buf_adr = isa_virt_to_bus(scsi_sglist(cmd));
+ /* recent Linux midlevel SCSI places 1024 byte for inquiry
+ * command. Far too much for old PS/2 hardware. */
+ switch (scsi_cmd) {
+@@ -1961,16 +1851,16 @@
+ case REQUEST_SENSE:
+ case MODE_SENSE:
+ case MODE_SELECT:
+- if (cmd->request_bufflen > 255)
++ if (scsi_bufflen(cmd) > 255)
+ scb->sys_buf_length = 255;
+ else
+- scb->sys_buf_length = cmd->request_bufflen;
++ scb->sys_buf_length = scsi_bufflen(cmd);
+ break;
+ case TEST_UNIT_READY:
+ scb->sys_buf_length = 0;
+ break;
+ default:
+- scb->sys_buf_length = cmd->request_bufflen;
++ scb->sys_buf_length = scsi_bufflen(cmd);
+ break;
+ }
+ }
+@@ -1982,16 +1872,16 @@
+
+ /* for specific device-type debugging: */
+ #ifdef IM_DEBUG_CMD_SPEC_DEV
+- if (ld(host_index)[ldn].device_type == IM_DEBUG_CMD_DEVICE)
+- printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(host_index)[ldn].device_type, scsi_cmd, ldn);
++ if (ld(shpnt)[ldn].device_type == IM_DEBUG_CMD_DEVICE)
++ printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(shpnt)[ldn].device_type, scsi_cmd, ldn);
+ #endif
+
+ /* for possible panics store current command */
+- last_scsi_command(host_index)[ldn] = scsi_cmd;
+- last_scsi_type(host_index)[ldn] = IM_SCB;
++ last_scsi_command(shpnt)[ldn] = scsi_cmd;
++ last_scsi_type(shpnt)[ldn] = IM_SCB;
+ /* update statistical info */
+- IBM_DS(host_index).total_accesses++;
+- IBM_DS(host_index).ldn_access[ldn]++;
++ IBM_DS(shpnt).total_accesses++;
++ IBM_DS(shpnt).ldn_access[ldn]++;
+
+ switch (scsi_cmd) {
+ case READ_6:
+@@ -2003,17 +1893,17 @@
+ /* Distinguish between disk and other devices. Only disks (that are the
+ most frequently accessed devices) should be supported by the
+ IBM-SCSI-Subsystem commands. */
+- switch (ld(host_index)[ldn].device_type) {
++ switch (ld(shpnt)[ldn].device_type) {
+ case TYPE_DISK: /* for harddisks enter here ... */
+ case TYPE_MOD: /* ... try it also for MO-drives (send flames as */
+ /* you like, if this won't work.) */
+ if (scsi_cmd == READ_6 || scsi_cmd == READ_10 || scsi_cmd == READ_12) {
+ /* read command preparations */
+ scb->enable |= IM_READ_CONTROL;
+- IBM_DS(host_index).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */
++ IBM_DS(shpnt).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */
+ scb->command = IM_READ_DATA_CMD | IM_NO_DISCONNECT;
+ } else { /* write command preparations */
+- IBM_DS(host_index).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */
++ IBM_DS(shpnt).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */
+ scb->command = IM_WRITE_DATA_CMD | IM_NO_DISCONNECT;
+ }
+ if (scsi_cmd == READ_6 || scsi_cmd == WRITE_6) {
+@@ -2023,9 +1913,9 @@
+ scb->u1.log_blk_adr = (((unsigned) cmd->cmnd[5]) << 0) | (((unsigned) cmd->cmnd[4]) << 8) | (((unsigned) cmd->cmnd[3]) << 16) | (((unsigned) cmd->cmnd[2]) << 24);
+ scb->u2.blk.count = (((unsigned) cmd->cmnd[8]) << 0) | (((unsigned) cmd->cmnd[7]) << 8);
+ }
+- last_scsi_logical_block(host_index)[ldn] = scb->u1.log_blk_adr;
+- last_scsi_blockcount(host_index)[ldn] = scb->u2.blk.count;
+- scb->u2.blk.length = ld(host_index)[ldn].block_length;
++ last_scsi_logical_block(shpnt)[ldn] = scb->u1.log_blk_adr;
++ last_scsi_blockcount(shpnt)[ldn] = scb->u2.blk.count;
++ scb->u2.blk.length = ld(shpnt)[ldn].block_length;
+ break;
+ /* for other devices, enter here. Other types are not known by
+ Linux! TYPE_NO_LUN is forbidden as valid device. */
+@@ -2046,14 +1936,14 @@
+ scb->enable |= IM_BYPASS_BUFFER;
+ scb->u1.scsi_cmd_length = cmd->cmd_len;
+ memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+- last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ /* Read/write on this non-disk devices is also displayworthy,
+ so flash-up the LED/display. */
+ break;
+ }
+ break;
+ case INQUIRY:
+- IBM_DS(host_index).ldn_inquiry_access[ldn]++;
++ IBM_DS(shpnt).ldn_inquiry_access[ldn]++;
+ scb->command = IM_DEVICE_INQUIRY_CMD;
+ scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER;
+ scb->u1.log_blk_adr = 0;
+@@ -2064,7 +1954,7 @@
+ scb->u1.log_blk_adr = 0;
+ scb->u1.scsi_cmd_length = 6;
+ memcpy(scb->u2.scsi_command, cmd->cmnd, 6);
+- last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ break;
+ case READ_CAPACITY:
+ /* the length of system memory buffer must be exactly 8 bytes */
+@@ -2081,12 +1971,12 @@
+ /* Commands that need write-only-mode (system -> device): */
+ case MODE_SELECT:
+ case MODE_SELECT_10:
+- IBM_DS(host_index).ldn_modeselect_access[ldn]++;
++ IBM_DS(shpnt).ldn_modeselect_access[ldn]++;
+ scb->command = IM_OTHER_SCSI_CMD_CMD;
+ scb->enable |= IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; /*Select needs WRITE-enabled */
+ scb->u1.scsi_cmd_length = cmd->cmd_len;
+ memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+- last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ break;
+ /* For other commands, read-only is useful. Most other commands are
+ running without an input-data-block. */
+@@ -2095,19 +1985,19 @@
+ scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER;
+ scb->u1.scsi_cmd_length = cmd->cmd_len;
+ memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+- last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ break;
+ }
+ /*issue scb command, and return */
+ if (++disk_rw_in_progress == 1)
+ PS2_DISK_LED_ON(shpnt->host_no, target);
+
+- if (last_scsi_type(host_index)[ldn] == IM_LONG_SCB) {
+- issue_cmd(host_index, isa_virt_to_bus(scb), IM_LONG_SCB | ldn);
+- IBM_DS(host_index).long_scbs++;
++ if (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB) {
++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_LONG_SCB | ldn);
++ IBM_DS(shpnt).long_scbs++;
+ } else {
+- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+- IBM_DS(host_index).scbs++;
++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++ IBM_DS(shpnt).scbs++;
+ }
+ return 0;
+ }
+@@ -2122,7 +2012,6 @@
+ unsigned int ldn;
+ void (*saved_done) (Scsi_Cmnd *);
+ int target;
+- int host_index;
+ int max_pun;
+ unsigned long imm_command;
+
+@@ -2131,35 +2020,23 @@
+ #endif
+
+ shpnt = cmd->device->host;
+- /* search for the right hostadapter */
+- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+
+- if (!hosts[host_index]) { /* invalid hostadapter descriptor address */
+- cmd->result = DID_NO_CONNECT << 16;
+- if (cmd->scsi_done)
+- (cmd->scsi_done) (cmd);
+- shpnt = cmd->device->host;
+-#ifdef IM_DEBUG_PROBE
+- printk(KERN_DEBUG "IBM MCA SCSI: Abort adapter selection failed!\n");
+-#endif
+- return SUCCESS;
+- }
+- max_pun = subsystem_maxid(host_index);
++ max_pun = subsystem_maxid(shpnt);
+ if (ibm_ansi_order) {
+ target = max_pun - 1 - cmd->device->id;
+- if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index)))
++ if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt)))
+ target--;
+- else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index)))
++ else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt)))
+ target++;
+ } else
+ target = cmd->device->id;
+
+ /* get logical device number, and disable system interrupts */
+ printk(KERN_WARNING "IBM MCA SCSI: Sending abort to device pun=%d, lun=%d.\n", target, cmd->device->lun);
+- ldn = get_ldn(host_index)[target][cmd->device->lun];
++ ldn = get_ldn(shpnt)[target][cmd->device->lun];
+
+ /*if cmd for this ldn has already finished, no need to abort */
+- if (!ld(host_index)[ldn].cmd) {
++ if (!ld(shpnt)[ldn].cmd) {
+ return SUCCESS;
+ }
+
+@@ -2170,20 +2047,20 @@
+ saved_done = cmd->scsi_done;
+ cmd->scsi_done = internal_done;
+ cmd->SCp.Status = 0;
+- last_scsi_command(host_index)[ldn] = IM_ABORT_IMM_CMD;
+- last_scsi_type(host_index)[ldn] = IM_IMM_CMD;
+- imm_command = inl(IM_CMD_REG(host_index));
++ last_scsi_command(shpnt)[ldn] = IM_ABORT_IMM_CMD;
++ last_scsi_type(shpnt)[ldn] = IM_IMM_CMD;
++ imm_command = inl(IM_CMD_REG(shpnt));
+ imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */
+ imm_command |= (unsigned long) (IM_ABORT_IMM_CMD);
+ /* must wait for attention reg not busy */
+ /* FIXME - timeout, politeness */
+ while (1) {
+- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ break;
+ }
+ /* write registers and enable system interrupts */
+- outl(imm_command, IM_CMD_REG(host_index));
+- outb(IM_IMM_CMD | ldn, IM_ATTN_REG(host_index));
++ outl(imm_command, IM_CMD_REG(shpnt));
++ outb(IM_IMM_CMD | ldn, IM_ATTN_REG(shpnt));
+ #ifdef IM_DEBUG_PROBE
+ printk("IBM MCA SCSI: Abort queued to adapter...\n");
+ #endif
+@@ -2202,7 +2079,7 @@
+ cmd->result |= DID_ABORT << 16;
+ if (cmd->scsi_done)
+ (cmd->scsi_done) (cmd);
+- ld(host_index)[ldn].cmd = NULL;
++ ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_PROBE
+ printk("IBM MCA SCSI: Abort finished with success.\n");
+ #endif
+@@ -2211,7 +2088,7 @@
+ cmd->result |= DID_NO_CONNECT << 16;
+ if (cmd->scsi_done)
+ (cmd->scsi_done) (cmd);
+- ld(host_index)[ldn].cmd = NULL;
++ ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_PROBE
+ printk("IBM MCA SCSI: Abort failed.\n");
+ #endif
+@@ -2236,71 +2113,65 @@
+ struct Scsi_Host *shpnt;
+ Scsi_Cmnd *cmd_aid;
+ int ticks, i;
+- int host_index;
+ unsigned long imm_command;
+
+ BUG_ON(cmd == NULL);
+
+ ticks = IM_RESET_DELAY * HZ;
+ shpnt = cmd->device->host;
+- /* search for the right hostadapter */
+- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+
+- if (!hosts[host_index]) /* invalid hostadapter descriptor address */
+- return FAILED;
+-
+- if (local_checking_phase_flag(host_index)) {
++ if (local_checking_phase_flag(shpnt)) {
+ printk(KERN_WARNING "IBM MCA SCSI: unable to reset while checking devices.\n");
+ return FAILED;
+ }
+
+ /* issue reset immediate command to subsystem, and wait for interrupt */
+ printk("IBM MCA SCSI: resetting all devices.\n");
+- reset_status(host_index) = IM_RESET_IN_PROGRESS;
+- last_scsi_command(host_index)[0xf] = IM_RESET_IMM_CMD;
+- last_scsi_type(host_index)[0xf] = IM_IMM_CMD;
+- imm_command = inl(IM_CMD_REG(host_index));
++ reset_status(shpnt) = IM_RESET_IN_PROGRESS;
++ last_scsi_command(shpnt)[0xf] = IM_RESET_IMM_CMD;
++ last_scsi_type(shpnt)[0xf] = IM_IMM_CMD;
++ imm_command = inl(IM_CMD_REG(shpnt));
+ imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */
+ imm_command |= (unsigned long) (IM_RESET_IMM_CMD);
+ /* must wait for attention reg not busy */
+ while (1) {
+- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ break;
+ spin_unlock_irq(shpnt->host_lock);
+ yield();
+ spin_lock_irq(shpnt->host_lock);
+ }
+ /*write registers and enable system interrupts */
+- outl(imm_command, IM_CMD_REG(host_index));
+- outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(host_index));
++ outl(imm_command, IM_CMD_REG(shpnt));
++ outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(shpnt));
+ /* wait for interrupt finished or intr_stat register to be set, as the
+ * interrupt will not be executed, while we are in here! */
+
+ /* FIXME: This is really really icky we so want a sleeping version of this ! */
+- while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(host_index)) & 0x8f) != 0x8f)) {
++ while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(shpnt)) & 0x8f) != 0x8f)) {
+ udelay((1 + 999 / HZ) * 1000);
+ barrier();
+ }
+ /* if reset did not complete, just return an error */
+ if (!ticks) {
+ printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY);
+- reset_status(host_index) = IM_RESET_FINISHED_FAIL;
++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ return FAILED;
+ }
+
+- if ((inb(IM_INTR_REG(host_index)) & 0x8f) == 0x8f) {
++ if ((inb(IM_INTR_REG(shpnt)) & 0x8f) == 0x8f) {
+ /* analysis done by this routine and not by the intr-routine */
+- if (inb(IM_INTR_REG(host_index)) == 0xaf)
+- reset_status(host_index) = IM_RESET_FINISHED_OK_NO_INT;
+- else if (inb(IM_INTR_REG(host_index)) == 0xcf)
+- reset_status(host_index) = IM_RESET_FINISHED_FAIL;
++ if (inb(IM_INTR_REG(shpnt)) == 0xaf)
++ reset_status(shpnt) = IM_RESET_FINISHED_OK_NO_INT;
++ else if (inb(IM_INTR_REG(shpnt)) == 0xcf)
++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ else /* failed, 4get it */
+- reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS_NO_INT;
+- outb(IM_EOI | 0xf, IM_ATTN_REG(host_index));
++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS_NO_INT;
++ outb(IM_EOI | 0xf, IM_ATTN_REG(shpnt));
+ }
+
+ /* if reset failed, just return an error */
+- if (reset_status(host_index) == IM_RESET_FINISHED_FAIL) {
++ if (reset_status(shpnt) == IM_RESET_FINISHED_FAIL) {
+ printk(KERN_ERR "IBM MCA SCSI: reset failed.\n");
+ return FAILED;
+ }
+@@ -2308,9 +2179,9 @@
+ /* so reset finished ok - call outstanding done's, and return success */
+ printk(KERN_INFO "IBM MCA SCSI: Reset successfully completed.\n");
+ for (i = 0; i < MAX_LOG_DEV; i++) {
+- cmd_aid = ld(host_index)[i].cmd;
++ cmd_aid = ld(shpnt)[i].cmd;
+ if (cmd_aid && cmd_aid->scsi_done) {
+- ld(host_index)[i].cmd = NULL;
++ ld(shpnt)[i].cmd = NULL;
+ cmd_aid->result = DID_RESET << 16;
+ }
+ }
+@@ -2351,46 +2222,46 @@
+ }
+
+ /* calculate percentage of total accesses on a ldn */
+-static int ldn_access_load(int host_index, int ldn)
++static int ldn_access_load(struct Scsi_Host *shpnt, int ldn)
+ {
+- if (IBM_DS(host_index).total_accesses == 0)
++ if (IBM_DS(shpnt).total_accesses == 0)
+ return (0);
+- if (IBM_DS(host_index).ldn_access[ldn] == 0)
++ if (IBM_DS(shpnt).ldn_access[ldn] == 0)
+ return (0);
+- return (IBM_DS(host_index).ldn_access[ldn] * 100) / IBM_DS(host_index).total_accesses;
++ return (IBM_DS(shpnt).ldn_access[ldn] * 100) / IBM_DS(shpnt).total_accesses;
+ }
+
+ /* calculate total amount of r/w-accesses */
+-static int ldn_access_total_read_write(int host_index)
++static int ldn_access_total_read_write(struct Scsi_Host *shpnt)
+ {
+ int a;
+ int i;
+
+ a = 0;
+ for (i = 0; i <= MAX_LOG_DEV; i++)
+- a += IBM_DS(host_index).ldn_read_access[i] + IBM_DS(host_index).ldn_write_access[i];
++ a += IBM_DS(shpnt).ldn_read_access[i] + IBM_DS(shpnt).ldn_write_access[i];
+ return (a);
+ }
+
+-static int ldn_access_total_inquiry(int host_index)
++static int ldn_access_total_inquiry(struct Scsi_Host *shpnt)
+ {
+ int a;
+ int i;
+
+ a = 0;
+ for (i = 0; i <= MAX_LOG_DEV; i++)
+- a += IBM_DS(host_index).ldn_inquiry_access[i];
++ a += IBM_DS(shpnt).ldn_inquiry_access[i];
+ return (a);
+ }
+
+-static int ldn_access_total_modeselect(int host_index)
++static int ldn_access_total_modeselect(struct Scsi_Host *shpnt)
+ {
+ int a;
+ int i;
+
+ a = 0;
+ for (i = 0; i <= MAX_LOG_DEV; i++)
+- a += IBM_DS(host_index).ldn_modeselect_access[i];
++ a += IBM_DS(shpnt).ldn_modeselect_access[i];
+ return (a);
+ }
+
+@@ -2398,19 +2269,14 @@
+ static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout)
+ {
+ int len = 0;
+- int i, id, lun, host_index;
++ int i, id, lun;
+ unsigned long flags;
+ int max_pun;
+
+- for (i = 0; hosts[i] && hosts[i] != shpnt; i++);
+
+- spin_lock_irqsave(hosts[i]->host_lock, flags); /* Check it */
+- host_index = i;
+- if (!shpnt) {
+- len += sprintf(buffer + len, "\nIBM MCA SCSI: Can't find adapter");
+- return len;
+- }
+- max_pun = subsystem_maxid(host_index);
++ spin_lock_irqsave(shpnt->host_lock, flags); /* Check it */
++
++ max_pun = subsystem_maxid(shpnt);
+
+ len += sprintf(buffer + len, "\n IBM-SCSI-Subsystem-Linux-Driver, Version %s\n\n\n", IBMMCA_SCSI_DRIVER_VERSION);
+ len += sprintf(buffer + len, " SCSI Access-Statistics:\n");
+@@ -2421,40 +2287,40 @@
+ len += sprintf(buffer + len, " Multiple LUN probing.....: No\n");
+ #endif
+ len += sprintf(buffer + len, " This Hostnumber..........: %d\n", shpnt->host_no);
+- len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(host_index)));
++ len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(shpnt)));
+ len += sprintf(buffer + len, " (Shared) IRQ.............: %d\n", IM_IRQ);
+- len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(host_index).total_interrupts);
+- len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(host_index).total_accesses);
+- len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(host_index).scbs);
+- len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(host_index).long_scbs);
+- len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(host_index));
+- len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(host_index));
+- len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(host_index));
+- len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(host_index).total_accesses - ldn_access_total_read_write(host_index)
+- - ldn_access_total_modeselect(host_index)
+- - ldn_access_total_inquiry(host_index));
+- len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(host_index).total_errors);
++ len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(shpnt).total_interrupts);
++ len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(shpnt).total_accesses);
++ len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(shpnt).scbs);
++ len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(shpnt).long_scbs);
++ len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(shpnt));
++ len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(shpnt));
++ len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(shpnt));
++ len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(shpnt).total_accesses - ldn_access_total_read_write(shpnt)
++ - ldn_access_total_modeselect(shpnt)
++ - ldn_access_total_inquiry(shpnt));
++ len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(shpnt).total_errors);
+ len += sprintf(buffer + len, " Logical-Device-Number (LDN) Access-Statistics:\n");
+ len += sprintf(buffer + len, " LDN | Accesses [%%] | READ | WRITE | ASSIGNMENTS\n");
+ len += sprintf(buffer + len, " -----|--------------|-----------|-----------|--------------\n");
+ for (i = 0; i <= MAX_LOG_DEV; i++)
+- len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(host_index, i), IBM_DS(host_index).ldn_read_access[i], IBM_DS(host_index).ldn_write_access[i], IBM_DS(host_index).ldn_assignments[i]);
++ len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(shpnt, i), IBM_DS(shpnt).ldn_read_access[i], IBM_DS(shpnt).ldn_write_access[i], IBM_DS(shpnt).ldn_assignments[i]);
+ len += sprintf(buffer + len, " -----------------------------------------------------------\n\n");
+ len += sprintf(buffer + len, " Dynamical-LDN-Assignment-Statistics:\n");
+- len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(host_index).total_scsi_devices);
+- len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(host_index).dyn_flag ? "Yes" : "No ");
+- len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(host_index));
+- len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(host_index).dynamical_assignments);
++ len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(shpnt).total_scsi_devices);
++ len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(shpnt).dyn_flag ? "Yes" : "No ");
++ len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(shpnt));
++ len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(shpnt).dynamical_assignments);
+ len += sprintf(buffer + len, "\n Current SCSI-Device-Mapping:\n");
+ len += sprintf(buffer + len, " Physical SCSI-Device Map Logical SCSI-Device Map\n");
+ len += sprintf(buffer + len, " ID\\LUN 0 1 2 3 4 5 6 7 ID\\LUN 0 1 2 3 4 5 6 7\n");
+ for (id = 0; id < max_pun; id++) {
+ len += sprintf(buffer + len, " %2d ", id);
+ for (lun = 0; lun < 8; lun++)
+- len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(host_index)[id][lun]));
++ len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(shpnt)[id][lun]));
+ len += sprintf(buffer + len, " %2d ", id);
+ for (lun = 0; lun < 8; lun++)
+- len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(host_index)[id][lun]));
++ len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(shpnt)[id][lun]));
+ len += sprintf(buffer + len, "\n");
+ }
+
+@@ -2488,20 +2354,31 @@
+
+ __setup("ibmmcascsi=", option_setup);
+
+-static struct scsi_host_template driver_template = {
+- .proc_name = "ibmmca",
+- .proc_info = ibmmca_proc_info,
+- .name = "IBM SCSI-Subsystem",
+- .detect = ibmmca_detect,
+- .release = ibmmca_release,
+- .queuecommand = ibmmca_queuecommand,
+- .eh_abort_handler = ibmmca_abort,
+- .eh_host_reset_handler = ibmmca_host_reset,
+- .bios_param = ibmmca_biosparam,
+- .can_queue = 16,
+- .this_id = 7,
+- .sg_tablesize = 16,
+- .cmd_per_lun = 1,
+- .use_clustering = ENABLE_CLUSTERING,
++static struct mca_driver ibmmca_driver = {
++ .id_table = ibmmca_id_table,
++ .driver = {
++ .name = "ibmmca",
++ .bus = &mca_bus_type,
++ .probe = ibmmca_probe,
++ .remove = __devexit_p(ibmmca_remove),
++ },
+ };
+-#include "scsi_module.c"
++
++static int __init ibmmca_init(void)
++{
++#ifdef MODULE
++ /* If the driver is run as module, read from conf.modules or cmd-line */
++ if (boot_options)
++ option_setup(boot_options);
++#endif
++
++ return mca_register_driver_integrated(&ibmmca_driver, MCA_INTEGSCSI);
++}
++
++static void __exit ibmmca_exit(void)
++{
++ mca_unregister_driver(&ibmmca_driver);
++}
++
++module_init(ibmmca_init);
++module_exit(ibmmca_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.h linux-2.6.22-591/drivers/scsi/ibmmca.h
+--- linux-2.6.22-570/drivers/scsi/ibmmca.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ibmmca.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,21 +0,0 @@
+-/*
+- * Low Level Driver for the IBM Microchannel SCSI Subsystem
+- * (Headerfile, see Documentation/scsi/ibmmca.txt for description of the
+- * IBM MCA SCSI-driver.
+- * For use under the GNU General Public License within the Linux-kernel project.
+- * This include file works only correctly with kernel 2.4.0 or higher!!! */
+-
+-#ifndef _IBMMCA_H
+-#define _IBMMCA_H
+-
+-/* Common forward declarations for all Linux-versions: */
+-
+-/* Interfaces to the midlevel Linux SCSI driver */
+-static int ibmmca_detect (struct scsi_host_template *);
+-static int ibmmca_release (struct Scsi_Host *);
+-static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *));
+-static int ibmmca_abort (Scsi_Cmnd *);
+-static int ibmmca_host_reset (Scsi_Cmnd *);
+-static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *);
+-
+-#endif /* _IBMMCA_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -173,8 +173,7 @@
+ }
+ }
+ if (in_use)
+- printk(KERN_WARNING
+- "ibmvscsi: releasing event pool with %d "
++ dev_warn(hostdata->dev, "releasing event pool with %d "
+ "events still in use?\n", in_use);
+ kfree(pool->events);
+ dma_free_coherent(hostdata->dev,
+@@ -210,14 +209,12 @@
+ struct srp_event_struct *evt)
+ {
+ if (!valid_event_struct(pool, evt)) {
+- printk(KERN_ERR
+- "ibmvscsi: Freeing invalid event_struct %p "
++ dev_err(evt->hostdata->dev, "Freeing invalid event_struct %p "
+ "(not in pool %p)\n", evt, pool->events);
+ return;
+ }
+ if (atomic_inc_return(&evt->free) != 1) {
+- printk(KERN_ERR
+- "ibmvscsi: Freeing event_struct %p "
++ dev_err(evt->hostdata->dev, "Freeing event_struct %p "
+ "which is not in use!\n", evt);
+ return;
+ }
+@@ -408,13 +405,6 @@
+ return 1;
+ }
+
+- if (sg_mapped > SG_ALL) {
+- printk(KERN_ERR
+- "ibmvscsi: More than %d mapped sg entries, got %d\n",
+- SG_ALL, sg_mapped);
+- return 0;
+- }
+-
+ indirect->table_desc.va = 0;
+ indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf);
+ indirect->table_desc.key = 0;
+@@ -433,10 +423,9 @@
+ SG_ALL * sizeof(struct srp_direct_buf),
+ &evt_struct->ext_list_token, 0);
+ if (!evt_struct->ext_list) {
+- printk(KERN_ERR
+- "ibmvscsi: Can't allocate memory for indirect table\n");
++ sdev_printk(KERN_ERR, cmd->device,
++ "Can't allocate memory for indirect table\n");
+ return 0;
+-
+ }
+ }
+
+@@ -471,8 +460,8 @@
+ cmd->request_bufflen,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(data->va)) {
+- printk(KERN_ERR
+- "ibmvscsi: Unable to map request_buffer for command!\n");
++ sdev_printk(KERN_ERR, cmd->device,
++ "Unable to map request_buffer for command!\n");
+ return 0;
+ }
+ data->len = cmd->request_bufflen;
+@@ -503,12 +492,12 @@
+ case DMA_NONE:
+ return 1;
+ case DMA_BIDIRECTIONAL:
+- printk(KERN_ERR
+- "ibmvscsi: Can't map DMA_BIDIRECTIONAL to read/write\n");
++ sdev_printk(KERN_ERR, cmd->device,
++ "Can't map DMA_BIDIRECTIONAL to read/write\n");
+ return 0;
+ default:
+- printk(KERN_ERR
+- "ibmvscsi: Unknown data direction 0x%02x; can't map!\n",
++ sdev_printk(KERN_ERR, cmd->device,
++ "Unknown data direction 0x%02x; can't map!\n",
+ cmd->sc_data_direction);
+ return 0;
+ }
+@@ -520,6 +509,70 @@
+ return map_single_data(cmd, srp_cmd, dev);
+ }
+
++/**
++ * purge_requests: Our virtual adapter just shut down. purge any sent requests
++ * @hostdata: the adapter
++ */
++static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
++{
++ struct srp_event_struct *tmp_evt, *pos;
++ unsigned long flags;
++
++ spin_lock_irqsave(hostdata->host->host_lock, flags);
++ list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) {
++ list_del(&tmp_evt->list);
++ del_timer(&tmp_evt->timer);
++ if (tmp_evt->cmnd) {
++ tmp_evt->cmnd->result = (error_code << 16);
++ unmap_cmd_data(&tmp_evt->iu.srp.cmd,
++ tmp_evt,
++ tmp_evt->hostdata->dev);
++ if (tmp_evt->cmnd_done)
++ tmp_evt->cmnd_done(tmp_evt->cmnd);
++ } else if (tmp_evt->done)
++ tmp_evt->done(tmp_evt);
++ free_event_struct(&tmp_evt->hostdata->pool, tmp_evt);
++ }
++ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
++}
++
++/**
++ * ibmvscsi_reset_host - Reset the connection to the server
++ * @hostdata: struct ibmvscsi_host_data to reset
++*/
++static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
++{
++ scsi_block_requests(hostdata->host);
++ atomic_set(&hostdata->request_limit, 0);
++
++ purge_requests(hostdata, DID_ERROR);
++ if ((ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata)) ||
++ (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0)) ||
++ (vio_enable_interrupts(to_vio_dev(hostdata->dev)))) {
++ atomic_set(&hostdata->request_limit, -1);
++ dev_err(hostdata->dev, "error after reset\n");
++ }
++
++ scsi_unblock_requests(hostdata->host);
++}
++
++/**
++ * ibmvscsi_timeout - Internal command timeout handler
++ * @evt_struct: struct srp_event_struct that timed out
++ *
++ * Called when an internally generated command times out
++*/
++static void ibmvscsi_timeout(struct srp_event_struct *evt_struct)
++{
++ struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
++
++ dev_err(hostdata->dev, "Command timed out (%x). Resetting connection\n",
++ evt_struct->iu.srp.cmd.opcode);
++
++ ibmvscsi_reset_host(hostdata);
++}
++
++
+ /* ------------------------------------------------------------
+ * Routines for sending and receiving SRPs
+ */
+@@ -527,12 +580,14 @@
+ * ibmvscsi_send_srp_event: - Transforms event to u64 array and calls send_crq()
+ * @evt_struct: evt_struct to be sent
+ * @hostdata: ibmvscsi_host_data of host
++ * @timeout: timeout in seconds - 0 means do not time command
+ *
+ * Returns the value returned from ibmvscsi_send_crq(). (Zero for success)
+ * Note that this routine assumes that host_lock is held for synchronization
+ */
+ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
+- struct ibmvscsi_host_data *hostdata)
++ struct ibmvscsi_host_data *hostdata,
++ unsigned long timeout)
+ {
+ u64 *crq_as_u64 = (u64 *) &evt_struct->crq;
+ int request_status;
+@@ -588,12 +643,20 @@
+ */
+ list_add_tail(&evt_struct->list, &hostdata->sent);
+
++ init_timer(&evt_struct->timer);
++ if (timeout) {
++ evt_struct->timer.data = (unsigned long) evt_struct;
++ evt_struct->timer.expires = jiffies + (timeout * HZ);
++ evt_struct->timer.function = (void (*)(unsigned long))ibmvscsi_timeout;
++ add_timer(&evt_struct->timer);
++ }
++
+ if ((rc =
+ ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) {
+ list_del(&evt_struct->list);
++ del_timer(&evt_struct->timer);
+
+- printk(KERN_ERR "ibmvscsi: send error %d\n",
+- rc);
++ dev_err(hostdata->dev, "send error %d\n", rc);
+ atomic_inc(&hostdata->request_limit);
+ goto send_error;
+ }
+@@ -634,9 +697,8 @@
+
+ if (unlikely(rsp->opcode != SRP_RSP)) {
+ if (printk_ratelimit())
+- printk(KERN_WARNING
+- "ibmvscsi: bad SRP RSP type %d\n",
+- rsp->opcode);
++ dev_warn(evt_struct->hostdata->dev,
++ "bad SRP RSP type %d\n", rsp->opcode);
+ }
+
+ if (cmnd) {
+@@ -697,7 +759,7 @@
+ srp_cmd->lun = ((u64) lun) << 48;
+
+ if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
+- printk(KERN_ERR "ibmvscsi: couldn't convert cmd to srp_cmd\n");
++ sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
+ free_event_struct(&hostdata->pool, evt_struct);
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
+@@ -722,7 +784,7 @@
+ offsetof(struct srp_indirect_buf, desc_list);
+ }
+
+- return ibmvscsi_send_srp_event(evt_struct, hostdata);
++ return ibmvscsi_send_srp_event(evt_struct, hostdata, 0);
+ }
+
+ /* ------------------------------------------------------------
+@@ -744,10 +806,10 @@
+ DMA_BIDIRECTIONAL);
+
+ if (evt_struct->xfer_iu->mad.adapter_info.common.status) {
+- printk("ibmvscsi: error %d getting adapter info\n",
++ dev_err(hostdata->dev, "error %d getting adapter info\n",
+ evt_struct->xfer_iu->mad.adapter_info.common.status);
+ } else {
+- printk("ibmvscsi: host srp version: %s, "
++ dev_info(hostdata->dev, "host srp version: %s, "
+ "host partition %s (%d), OS %d, max io %u\n",
+ hostdata->madapter_info.srp_version,
+ hostdata->madapter_info.partition_name,
+@@ -761,10 +823,9 @@
+
+ if (hostdata->madapter_info.os_type == 3 &&
+ strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
+- printk("ibmvscsi: host (Ver. %s) doesn't support large"
+- "transfers\n",
++ dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n",
+ hostdata->madapter_info.srp_version);
+- printk("ibmvscsi: limiting scatterlists to %d\n",
++ dev_err(hostdata->dev, "limiting scatterlists to %d\n",
+ MAX_INDIRECT_BUFS);
+ hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
+ }
+@@ -784,12 +845,13 @@
+ {
+ struct viosrp_adapter_info *req;
+ struct srp_event_struct *evt_struct;
++ unsigned long flags;
+ dma_addr_t addr;
+
+ evt_struct = get_event_struct(&hostdata->pool);
+ if (!evt_struct) {
+- printk(KERN_ERR "ibmvscsi: couldn't allocate an event "
+- "for ADAPTER_INFO_REQ!\n");
++ dev_err(hostdata->dev,
++ "couldn't allocate an event for ADAPTER_INFO_REQ!\n");
+ return;
+ }
+
+@@ -809,20 +871,20 @@
+ DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(req->buffer)) {
+- printk(KERN_ERR
+- "ibmvscsi: Unable to map request_buffer "
+- "for adapter_info!\n");
++ dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
+ free_event_struct(&hostdata->pool, evt_struct);
+ return;
+ }
+
+- if (ibmvscsi_send_srp_event(evt_struct, hostdata)) {
+- printk(KERN_ERR "ibmvscsi: couldn't send ADAPTER_INFO_REQ!\n");
++ spin_lock_irqsave(hostdata->host->host_lock, flags);
++ if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) {
++ dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
+ dma_unmap_single(hostdata->dev,
+ addr,
+ sizeof(hostdata->madapter_info),
+ DMA_BIDIRECTIONAL);
+ }
++ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ };
+
+ /**
+@@ -839,24 +901,23 @@
+ case SRP_LOGIN_RSP: /* it worked! */
+ break;
+ case SRP_LOGIN_REJ: /* refused! */
+- printk(KERN_INFO "ibmvscsi: SRP_LOGIN_REJ reason %u\n",
++ dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
+ evt_struct->xfer_iu->srp.login_rej.reason);
+ /* Login failed. */
+ atomic_set(&hostdata->request_limit, -1);
+ return;
+ default:
+- printk(KERN_ERR
+- "ibmvscsi: Invalid login response typecode 0x%02x!\n",
++ dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
+ evt_struct->xfer_iu->srp.login_rsp.opcode);
+ /* Login failed. */
+ atomic_set(&hostdata->request_limit, -1);
+ return;
+ }
+
+- printk(KERN_INFO "ibmvscsi: SRP_LOGIN succeeded\n");
++ dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
+
+ if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0)
+- printk(KERN_ERR "ibmvscsi: Invalid request_limit.\n");
++ dev_err(hostdata->dev, "Invalid request_limit.\n");
+
+ /* Now we know what the real request-limit is.
+ * This value is set rather than added to request_limit because
+@@ -885,8 +946,7 @@
+ struct srp_login_req *login;
+ struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool);
+ if (!evt_struct) {
+- printk(KERN_ERR
+- "ibmvscsi: couldn't allocate an event for login req!\n");
++ dev_err(hostdata->dev, "couldn't allocate an event for login req!\n");
+ return FAILED;
+ }
+
+@@ -907,9 +967,9 @@
+ */
+ atomic_set(&hostdata->request_limit, 1);
+
+- rc = ibmvscsi_send_srp_event(evt_struct, hostdata);
++ rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+- printk("ibmvscsic: sent SRP login\n");
++ dev_info(hostdata->dev, "sent SRP login\n");
+ return rc;
+ };
+
+@@ -958,13 +1018,13 @@
+
+ if (!found_evt) {
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+- return FAILED;
++ return SUCCESS;
+ }
+
+ evt = get_event_struct(&hostdata->pool);
+ if (evt == NULL) {
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+- printk(KERN_ERR "ibmvscsi: failed to allocate abort event\n");
++ sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n");
+ return FAILED;
+ }
+
+@@ -982,15 +1042,16 @@
+ tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
+ tsk_mgmt->task_tag = (u64) found_evt;
+
+- printk(KERN_INFO "ibmvscsi: aborting command. lun 0x%lx, tag 0x%lx\n",
++ sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n",
+ tsk_mgmt->lun, tsk_mgmt->task_tag);
+
+ evt->sync_srp = &srp_rsp;
+ init_completion(&evt->comp);
+- rsp_rc = ibmvscsi_send_srp_event(evt, hostdata);
++ rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ if (rsp_rc != 0) {
+- printk(KERN_ERR "ibmvscsi: failed to send abort() event\n");
++ sdev_printk(KERN_ERR, cmd->device,
++ "failed to send abort() event. rc=%d\n", rsp_rc);
+ return FAILED;
+ }
+
+@@ -999,8 +1060,7 @@
+ /* make sure we got a good response */
+ if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) {
+ if (printk_ratelimit())
+- printk(KERN_WARNING
+- "ibmvscsi: abort bad SRP RSP type %d\n",
++ sdev_printk(KERN_WARNING, cmd->device, "abort bad SRP RSP type %d\n",
+ srp_rsp.srp.rsp.opcode);
+ return FAILED;
+ }
+@@ -1012,10 +1072,9 @@
+
+ if (rsp_rc) {
+ if (printk_ratelimit())
+- printk(KERN_WARNING
+- "ibmvscsi: abort code %d for task tag 0x%lx\n",
+- rsp_rc,
+- tsk_mgmt->task_tag);
++ sdev_printk(KERN_WARNING, cmd->device,
++ "abort code %d for task tag 0x%lx\n",
++ rsp_rc, tsk_mgmt->task_tag);
+ return FAILED;
+ }
+
+@@ -1034,14 +1093,12 @@
+
+ if (found_evt == NULL) {
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+- printk(KERN_INFO
+- "ibmvscsi: aborted task tag 0x%lx completed\n",
++ sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%lx completed\n",
+ tsk_mgmt->task_tag);
+ return SUCCESS;
+ }
+
+- printk(KERN_INFO
+- "ibmvscsi: successfully aborted task tag 0x%lx\n",
++ sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%lx\n",
+ tsk_mgmt->task_tag);
+
+ cmd->result = (DID_ABORT << 16);
+@@ -1076,7 +1133,7 @@
+ evt = get_event_struct(&hostdata->pool);
+ if (evt == NULL) {
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+- printk(KERN_ERR "ibmvscsi: failed to allocate reset event\n");
++ sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n");
+ return FAILED;
+ }
+
+@@ -1093,15 +1150,16 @@
+ tsk_mgmt->lun = ((u64) lun) << 48;
+ tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
+
+- printk(KERN_INFO "ibmvscsi: resetting device. lun 0x%lx\n",
++ sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
+ tsk_mgmt->lun);
+
+ evt->sync_srp = &srp_rsp;
+ init_completion(&evt->comp);
+- rsp_rc = ibmvscsi_send_srp_event(evt, hostdata);
++ rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ if (rsp_rc != 0) {
+- printk(KERN_ERR "ibmvscsi: failed to send reset event\n");
++ sdev_printk(KERN_ERR, cmd->device,
++ "failed to send reset event. rc=%d\n", rsp_rc);
+ return FAILED;
+ }
+
+@@ -1110,8 +1168,7 @@
+ /* make sure we got a good response */
+ if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) {
+ if (printk_ratelimit())
+- printk(KERN_WARNING
+- "ibmvscsi: reset bad SRP RSP type %d\n",
++ sdev_printk(KERN_WARNING, cmd->device, "reset bad SRP RSP type %d\n",
+ srp_rsp.srp.rsp.opcode);
+ return FAILED;
+ }
+@@ -1123,8 +1180,8 @@
+
+ if (rsp_rc) {
+ if (printk_ratelimit())
+- printk(KERN_WARNING
+- "ibmvscsi: reset code %d for task tag 0x%lx\n",
++ sdev_printk(KERN_WARNING, cmd->device,
++ "reset code %d for task tag 0x%lx\n",
+ rsp_rc, tsk_mgmt->task_tag);
+ return FAILED;
+ }
+@@ -1154,32 +1211,30 @@
+ }
+
+ /**
+- * purge_requests: Our virtual adapter just shut down. purge any sent requests
+- * @hostdata: the adapter
+- */
+-static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
++ * ibmvscsi_eh_host_reset_handler - Reset the connection to the server
++ * @cmd: struct scsi_cmnd having problems
++*/
++static int ibmvscsi_eh_host_reset_handler(struct scsi_cmnd *cmd)
+ {
+- struct srp_event_struct *tmp_evt, *pos;
+- unsigned long flags;
++ unsigned long wait_switch = 0;
++ struct ibmvscsi_host_data *hostdata =
++ (struct ibmvscsi_host_data *)cmd->device->host->hostdata;
+
+- spin_lock_irqsave(hostdata->host->host_lock, flags);
+- list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) {
+- list_del(&tmp_evt->list);
+- if (tmp_evt->cmnd) {
+- tmp_evt->cmnd->result = (error_code << 16);
+- unmap_cmd_data(&tmp_evt->iu.srp.cmd,
+- tmp_evt,
+- tmp_evt->hostdata->dev);
+- if (tmp_evt->cmnd_done)
+- tmp_evt->cmnd_done(tmp_evt->cmnd);
+- } else {
+- if (tmp_evt->done) {
+- tmp_evt->done(tmp_evt);
+- }
+- }
+- free_event_struct(&tmp_evt->hostdata->pool, tmp_evt);
++ dev_err(hostdata->dev, "Resetting connection due to error recovery\n");
++
++ ibmvscsi_reset_host(hostdata);
++
++ for (wait_switch = jiffies + (init_timeout * HZ);
++ time_before(jiffies, wait_switch) &&
++ atomic_read(&hostdata->request_limit) < 2;) {
++
++ msleep(10);
+ }
+- spin_unlock_irqrestore(hostdata->host->host_lock, flags);
++
++ if (atomic_read(&hostdata->request_limit) <= 0)
++ return FAILED;
++
++ return SUCCESS;
+ }
+
+ /**
+@@ -1191,6 +1246,7 @@
+ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
+ struct ibmvscsi_host_data *hostdata)
+ {
++ long rc;
+ unsigned long flags;
+ struct srp_event_struct *evt_struct =
+ (struct srp_event_struct *)crq->IU_data_ptr;
+@@ -1198,27 +1254,25 @@
+ case 0xC0: /* initialization */
+ switch (crq->format) {
+ case 0x01: /* Initialization message */
+- printk(KERN_INFO "ibmvscsi: partner initialized\n");
++ dev_info(hostdata->dev, "partner initialized\n");
+ /* Send back a response */
+- if (ibmvscsi_send_crq(hostdata,
+- 0xC002000000000000LL, 0) == 0) {
++ if ((rc = ibmvscsi_send_crq(hostdata,
++ 0xC002000000000000LL, 0)) == 0) {
+ /* Now login */
+ send_srp_login(hostdata);
+ } else {
+- printk(KERN_ERR
+- "ibmvscsi: Unable to send init rsp\n");
++ dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc);
+ }
+
+ break;
+ case 0x02: /* Initialization response */
+- printk(KERN_INFO
+- "ibmvscsi: partner initialization complete\n");
++ dev_info(hostdata->dev, "partner initialization complete\n");
+
+ /* Now login */
+ send_srp_login(hostdata);
+ break;
+ default:
+- printk(KERN_ERR "ibmvscsi: unknown crq message type\n");
++ dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format);
+ }
+ return;
+ case 0xFF: /* Hypervisor telling us the connection is closed */
+@@ -1226,8 +1280,7 @@
+ atomic_set(&hostdata->request_limit, 0);
+ if (crq->format == 0x06) {
+ /* We need to re-setup the interpartition connection */
+- printk(KERN_INFO
+- "ibmvscsi: Re-enabling adapter!\n");
++ dev_info(hostdata->dev, "Re-enabling adapter!\n");
+ purge_requests(hostdata, DID_REQUEUE);
+ if ((ibmvscsi_reenable_crq_queue(&hostdata->queue,
+ hostdata)) ||
+@@ -1235,13 +1288,10 @@
+ 0xC001000000000000LL, 0))) {
+ atomic_set(&hostdata->request_limit,
+ -1);
+- printk(KERN_ERR
+- "ibmvscsi: error after"
+- " enable\n");
++ dev_err(hostdata->dev, "error after enable\n");
+ }
+ } else {
+- printk(KERN_INFO
+- "ibmvscsi: Virtual adapter failed rc %d!\n",
++ dev_err(hostdata->dev, "Virtual adapter failed rc %d!\n",
+ crq->format);
+
+ purge_requests(hostdata, DID_ERROR);
+@@ -1251,8 +1301,7 @@
+ 0xC001000000000000LL, 0))) {
+ atomic_set(&hostdata->request_limit,
+ -1);
+- printk(KERN_ERR
+- "ibmvscsi: error after reset\n");
++ dev_err(hostdata->dev, "error after reset\n");
+ }
+ }
+ scsi_unblock_requests(hostdata->host);
+@@ -1260,8 +1309,7 @@
+ case 0x80: /* real payload */
+ break;
+ default:
+- printk(KERN_ERR
+- "ibmvscsi: got an invalid message type 0x%02x\n",
++ dev_err(hostdata->dev, "got an invalid message type 0x%02x\n",
+ crq->valid);
+ return;
+ }
+@@ -1271,15 +1319,13 @@
+ * actually sent
+ */
+ if (!valid_event_struct(&hostdata->pool, evt_struct)) {
+- printk(KERN_ERR
+- "ibmvscsi: returned correlation_token 0x%p is invalid!\n",
++ dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n",
+ (void *)crq->IU_data_ptr);
+ return;
+ }
+
+ if (atomic_read(&evt_struct->free)) {
+- printk(KERN_ERR
+- "ibmvscsi: received duplicate correlation_token 0x%p!\n",
++ dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n",
+ (void *)crq->IU_data_ptr);
+ return;
+ }
+@@ -1288,11 +1334,12 @@
+ atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta,
+ &hostdata->request_limit);
+
++ del_timer(&evt_struct->timer);
++
+ if (evt_struct->done)
+ evt_struct->done(evt_struct);
+ else
+- printk(KERN_ERR
+- "ibmvscsi: returned done() is NULL; not running it!\n");
++ dev_err(hostdata->dev, "returned done() is NULL; not running it!\n");
+
+ /*
+ * Lock the host_lock before messing with these structures, since we
+@@ -1313,13 +1360,13 @@
+ {
+ struct viosrp_host_config *host_config;
+ struct srp_event_struct *evt_struct;
++ unsigned long flags;
+ dma_addr_t addr;
+ int rc;
+
+ evt_struct = get_event_struct(&hostdata->pool);
+ if (!evt_struct) {
+- printk(KERN_ERR
+- "ibmvscsi: could't allocate event for HOST_CONFIG!\n");
++ dev_err(hostdata->dev, "couldn't allocate event for HOST_CONFIG!\n");
+ return -1;
+ }
+
+@@ -1339,14 +1386,15 @@
+ DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(host_config->buffer)) {
+- printk(KERN_ERR
+- "ibmvscsi: dma_mapping error " "getting host config\n");
++ dev_err(hostdata->dev, "dma_mapping error getting host config\n");
+ free_event_struct(&hostdata->pool, evt_struct);
+ return -1;
+ }
+
+ init_completion(&evt_struct->comp);
+- rc = ibmvscsi_send_srp_event(evt_struct, hostdata);
++ spin_lock_irqsave(hostdata->host->host_lock, flags);
++ rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
++ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ if (rc == 0)
+ wait_for_completion(&evt_struct->comp);
+ dma_unmap_single(hostdata->dev, addr, length, DMA_BIDIRECTIONAL);
+@@ -1375,6 +1423,23 @@
+ return 0;
+ }
+
++/**
++ * ibmvscsi_change_queue_depth - Change the device's queue depth
++ * @sdev: scsi device struct
++ * @qdepth: depth to set
++ *
++ * Return value:
++ * actual depth set
++ **/
++static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
++{
++ if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN)
++ qdepth = IBMVSCSI_MAX_CMDS_PER_LUN;
++
++ scsi_adjust_queue_depth(sdev, 0, qdepth);
++ return sdev->queue_depth;
++}
++
+ /* ------------------------------------------------------------
+ * sysfs attributes
+ */
+@@ -1520,7 +1585,9 @@
+ .queuecommand = ibmvscsi_queuecommand,
+ .eh_abort_handler = ibmvscsi_eh_abort_handler,
+ .eh_device_reset_handler = ibmvscsi_eh_device_reset_handler,
++ .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
+ .slave_configure = ibmvscsi_slave_configure,
++ .change_queue_depth = ibmvscsi_change_queue_depth,
+ .cmd_per_lun = 16,
+ .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
+ .this_id = -1,
+@@ -1545,7 +1612,7 @@
+ driver_template.can_queue = max_requests;
+ host = scsi_host_alloc(&driver_template, sizeof(*hostdata));
+ if (!host) {
+- printk(KERN_ERR "ibmvscsi: couldn't allocate host data\n");
++ dev_err(&vdev->dev, "couldn't allocate host data\n");
+ goto scsi_host_alloc_failed;
+ }
+
+@@ -1559,11 +1626,11 @@
+
+ rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_requests);
+ if (rc != 0 && rc != H_RESOURCE) {
+- printk(KERN_ERR "ibmvscsi: couldn't initialize crq\n");
++ dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc);
+ goto init_crq_failed;
+ }
+ if (initialize_event_pool(&hostdata->pool, max_requests, hostdata) != 0) {
+- printk(KERN_ERR "ibmvscsi: couldn't initialize event pool\n");
++ dev_err(&vdev->dev, "couldn't initialize event pool\n");
+ goto init_pool_failed;
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -45,6 +45,7 @@
+ #define MAX_INDIRECT_BUFS 10
+
+ #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
++#define IBMVSCSI_MAX_CMDS_PER_LUN 64
+
+ /* ------------------------------------------------------------
+ * Data Structures
+@@ -69,6 +70,7 @@
+ union viosrp_iu iu;
+ void (*cmnd_done) (struct scsi_cmnd *);
+ struct completion comp;
++ struct timer_list timer;
+ union viosrp_iu *sync_srp;
+ struct srp_direct_buf *ext_list;
+ dma_addr_t ext_list_token;
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -177,7 +177,7 @@
+ memset(&hostdata->madapter_info, 0x00,
+ sizeof(hostdata->madapter_info));
+
+- printk(KERN_INFO "rpa_vscsi: SPR_VERSION: %s\n", SRP_VERSION);
++ dev_info(hostdata->dev, "SRP_VERSION: %s\n", SRP_VERSION);
+ strcpy(hostdata->madapter_info.srp_version, SRP_VERSION);
+
+ strncpy(hostdata->madapter_info.partition_name, partition_name,
+@@ -232,25 +232,24 @@
+
+ if (rc == 2) {
+ /* Adapter is good, but other end is not ready */
+- printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n");
++ dev_warn(hostdata->dev, "Partner adapter not ready\n");
+ retrc = 0;
+ } else if (rc != 0) {
+- printk(KERN_WARNING "ibmvscsi: Error %d opening adapter\n", rc);
++ dev_warn(hostdata->dev, "Error %d opening adapter\n", rc);
+ goto reg_crq_failed;
+ }
+
+ if (request_irq(vdev->irq,
+ ibmvscsi_handle_event,
+ 0, "ibmvscsi", (void *)hostdata) != 0) {
+- printk(KERN_ERR "ibmvscsi: couldn't register irq 0x%x\n",
++ dev_err(hostdata->dev, "couldn't register irq 0x%x\n",
+ vdev->irq);
+ goto req_irq_failed;
+ }
+
+ rc = vio_enable_interrupts(vdev);
+ if (rc != 0) {
+- printk(KERN_ERR "ibmvscsi: Error %d enabling interrupts!!!\n",
+- rc);
++ dev_err(hostdata->dev, "Error %d enabling interrupts!!!\n", rc);
+ goto req_irq_failed;
+ }
+
+@@ -294,7 +293,7 @@
+ } while ((rc == H_IN_PROGRESS) || (rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+
+ if (rc)
+- printk(KERN_ERR "ibmvscsi: Error %d enabling adapter\n", rc);
++ dev_err(hostdata->dev, "Error %d enabling adapter\n", rc);
+ return rc;
+ }
+
+@@ -327,10 +326,9 @@
+ queue->msg_token, PAGE_SIZE);
+ if (rc == 2) {
+ /* Adapter is good, but other end is not ready */
+- printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n");
++ dev_warn(hostdata->dev, "Partner adapter not ready\n");
+ } else if (rc != 0) {
+- printk(KERN_WARNING
+- "ibmvscsi: couldn't register crq--rc 0x%x\n", rc);
++ dev_warn(hostdata->dev, "couldn't register crq--rc 0x%x\n", rc);
+ }
+ return rc;
+ }
+diff -Nurb linux-2.6.22-570/drivers/scsi/initio.c linux-2.6.22-591/drivers/scsi/initio.c
+--- linux-2.6.22-570/drivers/scsi/initio.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/initio.c 2007-12-21 15:36:12.000000000 -0500
+@@ -3,7 +3,8 @@
+ *
+ * Copyright (c) 1994-1998 Initio Corporation
+ * Copyright (c) 1998 Bas Vermeulen <bvermeul@blackstar.xs4all.nl>
+- * All rights reserved.
++ * Copyright (c) 2004 Christoph Hellwig <hch@lst.de>
++ * Copyright (c) 2007 Red Hat <alan@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+@@ -19,38 +20,6 @@
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions, and the following disclaimer,
+- * without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- * derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+ *
+ *************************************************************************
+ *
+@@ -70,14 +39,14 @@
+ * - Fix memory allocation problem
+ * 03/04/98 hc - v1.01l
+ * - Fix tape rewind which will hang the system problem
+- * - Set can_queue to tul_num_scb
++ * - Set can_queue to initio_num_scb
+ * 06/25/98 hc - v1.01m
+ * - Get it work for kernel version >= 2.1.75
+- * - Dynamic assign SCSI bus reset holding time in init_tulip()
++ * - Dynamic assign SCSI bus reset holding time in initio_init()
+ * 07/02/98 hc - v1.01n
+ * - Support 0002134A
+ * 08/07/98 hc - v1.01o
+- * - Change the tul_abort_srb routine to use scsi_done. <01>
++ * - Change the initio_abort_srb routine to use scsi_done. <01>
+ * 09/07/98 hl - v1.02
+ * - Change the INI9100U define and proc_dir_entry to
+ * reflect the newer Kernel 2.1.118, but the v1.o1o
+@@ -150,23 +119,13 @@
+ static unsigned int i91u_debug = DEBUG_DEFAULT;
+ #endif
+
+-#define TUL_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-
+-typedef struct PCI_ID_Struc {
+- unsigned short vendor_id;
+- unsigned short device_id;
+-} PCI_ID;
+-
+-static int tul_num_ch = 4; /* Maximum 4 adapters */
+-static int tul_num_scb;
+-static int tul_tag_enable = 1;
+-static SCB *tul_scb;
++static int initio_tag_enable = 1;
+
+ #ifdef DEBUG_i91u
+ static int setup_debug = 0;
+ #endif
+
+-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb);
++static void i91uSCBPost(u8 * pHcb, u8 * pScb);
+
+ /* PCI Devices supported by this driver */
+ static struct pci_device_id i91u_pci_devices[] = {
+@@ -184,74 +143,66 @@
+ #define DEBUG_STATE 0
+ #define INT_DISC 0
+
+-/*--- external functions --*/
+-static void tul_se2_wait(void);
++/*--- forward references ---*/
++static struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun);
++static struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host);
++
++static int tulip_main(struct initio_host * host);
++
++static int initio_next_state(struct initio_host * host);
++static int initio_state_1(struct initio_host * host);
++static int initio_state_2(struct initio_host * host);
++static int initio_state_3(struct initio_host * host);
++static int initio_state_4(struct initio_host * host);
++static int initio_state_5(struct initio_host * host);
++static int initio_state_6(struct initio_host * host);
++static int initio_state_7(struct initio_host * host);
++static int initio_xfer_data_in(struct initio_host * host);
++static int initio_xfer_data_out(struct initio_host * host);
++static int initio_xpad_in(struct initio_host * host);
++static int initio_xpad_out(struct initio_host * host);
++static int initio_status_msg(struct initio_host * host);
++
++static int initio_msgin(struct initio_host * host);
++static int initio_msgin_sync(struct initio_host * host);
++static int initio_msgin_accept(struct initio_host * host);
++static int initio_msgout_reject(struct initio_host * host);
++static int initio_msgin_extend(struct initio_host * host);
++
++static int initio_msgout_ide(struct initio_host * host);
++static int initio_msgout_abort_targ(struct initio_host * host);
++static int initio_msgout_abort_tag(struct initio_host * host);
++
++static int initio_bus_device_reset(struct initio_host * host);
++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static int int_initio_busfree(struct initio_host * host);
++static int int_initio_scsi_rst(struct initio_host * host);
++static int int_initio_bad_seq(struct initio_host * host);
++static int int_initio_resel(struct initio_host * host);
++static int initio_sync_done(struct initio_host * host);
++static int wdtr_done(struct initio_host * host);
++static int wait_tulip(struct initio_host * host);
++static int initio_wait_done_disc(struct initio_host * host);
++static int initio_wait_disc(struct initio_host * host);
++static void tulip_scsi(struct initio_host * host);
++static int initio_post_scsi_rst(struct initio_host * host);
++
++static void initio_se2_ew_en(unsigned long base);
++static void initio_se2_ew_ds(unsigned long base);
++static int initio_se2_rd_all(unsigned long base);
++static void initio_se2_update_all(unsigned long base); /* setup default pattern */
++static void initio_read_eeprom(unsigned long base);
+
+-/*--- forward refrence ---*/
+-static SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun);
+-static SCB *tul_find_done_scb(HCS * pCurHcb);
+-
+-static int tulip_main(HCS * pCurHcb);
+-
+-static int tul_next_state(HCS * pCurHcb);
+-static int tul_state_1(HCS * pCurHcb);
+-static int tul_state_2(HCS * pCurHcb);
+-static int tul_state_3(HCS * pCurHcb);
+-static int tul_state_4(HCS * pCurHcb);
+-static int tul_state_5(HCS * pCurHcb);
+-static int tul_state_6(HCS * pCurHcb);
+-static int tul_state_7(HCS * pCurHcb);
+-static int tul_xfer_data_in(HCS * pCurHcb);
+-static int tul_xfer_data_out(HCS * pCurHcb);
+-static int tul_xpad_in(HCS * pCurHcb);
+-static int tul_xpad_out(HCS * pCurHcb);
+-static int tul_status_msg(HCS * pCurHcb);
+-
+-static int tul_msgin(HCS * pCurHcb);
+-static int tul_msgin_sync(HCS * pCurHcb);
+-static int tul_msgin_accept(HCS * pCurHcb);
+-static int tul_msgout_reject(HCS * pCurHcb);
+-static int tul_msgin_extend(HCS * pCurHcb);
+-
+-static int tul_msgout_ide(HCS * pCurHcb);
+-static int tul_msgout_abort_targ(HCS * pCurHcb);
+-static int tul_msgout_abort_tag(HCS * pCurHcb);
+-
+-static int tul_bus_device_reset(HCS * pCurHcb);
+-static void tul_select_atn(HCS * pCurHcb, SCB * pCurScb);
+-static void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb);
+-static void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb);
+-static int int_tul_busfree(HCS * pCurHcb);
+-static int int_tul_scsi_rst(HCS * pCurHcb);
+-static int int_tul_bad_seq(HCS * pCurHcb);
+-static int int_tul_resel(HCS * pCurHcb);
+-static int tul_sync_done(HCS * pCurHcb);
+-static int wdtr_done(HCS * pCurHcb);
+-static int wait_tulip(HCS * pCurHcb);
+-static int tul_wait_done_disc(HCS * pCurHcb);
+-static int tul_wait_disc(HCS * pCurHcb);
+-static void tulip_scsi(HCS * pCurHcb);
+-static int tul_post_scsi_rst(HCS * pCurHcb);
+-
+-static void tul_se2_ew_en(WORD CurBase);
+-static void tul_se2_ew_ds(WORD CurBase);
+-static int tul_se2_rd_all(WORD CurBase);
+-static void tul_se2_update_all(WORD CurBase); /* setup default pattern */
+-static void tul_read_eeprom(WORD CurBase);
+-
+- /* ---- INTERNAL VARIABLES ---- */
+-static HCS tul_hcs[MAX_SUPPORTED_ADAPTERS];
+-static INI_ADPT_STRUCT i91u_adpt[MAX_SUPPORTED_ADAPTERS];
++/* ---- INTERNAL VARIABLES ---- */
+
+-/*NVRAM nvram, *nvramp = &nvram; */
+ static NVRAM i91unvram;
+ static NVRAM *i91unvramp;
+
+-
+-
+-static UCHAR i91udftNvRam[64] =
++static u8 i91udftNvRam[64] =
+ {
+-/*----------- header -----------*/
++ /*----------- header -----------*/
+ 0x25, 0xc9, /* Signature */
+ 0x40, /* Size */
+ 0x01, /* Revision */
+@@ -289,7 +240,7 @@
+ 0, 0}; /* - CheckSum - */
+
+
+-static UCHAR tul_rate_tbl[8] = /* fast 20 */
++static u8 initio_rate_tbl[8] = /* fast 20 */
+ {
+ /* nanosecond devide by 4 */
+ 12, /* 50ns, 20M */
+@@ -302,53 +253,17 @@
+ 62 /* 250ns, 4M */
+ };
+
+-static void tul_do_pause(unsigned amount)
+-{ /* Pause for amount jiffies */
++static void initio_do_pause(unsigned amount)
++{
++ /* Pause for amount jiffies */
+ unsigned long the_time = jiffies + amount;
+
+- while (time_before_eq(jiffies, the_time));
++ while (time_before_eq(jiffies, the_time))
++ cpu_relax();
+ }
+
+ /*-- forward reference --*/
+
+-/*******************************************************************
+- Use memeory refresh time ~ 15us * 2
+-********************************************************************/
+-void tul_se2_wait(void)
+-{
+-#if 1
+- udelay(30);
+-#else
+- UCHAR readByte;
+-
+- readByte = TUL_RD(0, 0x61);
+- if ((readByte & 0x10) == 0x10) {
+- for (;;) {
+- readByte = TUL_RD(0, 0x61);
+- if ((readByte & 0x10) == 0x10)
+- break;
+- }
+- for (;;) {
+- readByte = TUL_RD(0, 0x61);
+- if ((readByte & 0x10) != 0x10)
+- break;
+- }
+- } else {
+- for (;;) {
+- readByte = TUL_RD(0, 0x61);
+- if ((readByte & 0x10) == 0x10)
+- break;
+- }
+- for (;;) {
+- readByte = TUL_RD(0, 0x61);
+- if ((readByte & 0x10) != 0x10)
+- break;
+- }
+- }
+-#endif
+-}
+-
+-
+ /******************************************************************
+ Input: instruction for Serial E2PROM
+
+@@ -379,1174 +294,1019 @@
+
+
+ ******************************************************************/
+-static void tul_se2_instr(WORD CurBase, UCHAR instr)
++
++/**
++ * initio_se2_instr - bitbang an instruction
++ * @base: Base of InitIO controller
++ * @instr: Instruction for serial E2PROM
++ *
++ * Bitbang an instruction out to the serial E2Prom
++ */
++
++static void initio_se2_instr(unsigned long base, u8 instr)
+ {
+ int i;
+- UCHAR b;
++ u8 b;
+
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* cs+start bit */
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK | SE2DO); /* +CLK */
+- tul_se2_wait();
++ outb(SE2CS | SE2DO, base + TUL_NVRAM); /* cs+start bit */
++ udelay(30);
++ outb(SE2CS | SE2CLK | SE2DO, base + TUL_NVRAM); /* +CLK */
++ udelay(30);
+
+ for (i = 0; i < 8; i++) {
+ if (instr & 0x80)
+ b = SE2CS | SE2DO; /* -CLK+dataBit */
+ else
+ b = SE2CS; /* -CLK */
+- TUL_WR(CurBase + TUL_NVRAM, b);
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, b | SE2CLK); /* +CLK */
+- tul_se2_wait();
++ outb(b, base + TUL_NVRAM);
++ udelay(30);
++ outb(b | SE2CLK, base + TUL_NVRAM); /* +CLK */
++ udelay(30);
+ instr <<= 1;
+ }
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */
+- tul_se2_wait();
+- return;
++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */
++ udelay(30);
+ }
+
+
+-/******************************************************************
+- Function name : tul_se2_ew_en
+- Description : Enable erase/write state of serial EEPROM
+-******************************************************************/
+-void tul_se2_ew_en(WORD CurBase)
++/**
++ * initio_se2_ew_en - Enable erase/write
++ * @base: Base address of InitIO controller
++ *
++ * Enable erase/write state of serial EEPROM
++ */
++void initio_se2_ew_en(unsigned long base)
+ {
+- tul_se2_instr(CurBase, 0x30); /* EWEN */
+- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */
+- tul_se2_wait();
+- return;
++ initio_se2_instr(base, 0x30); /* EWEN */
++ outb(0, base + TUL_NVRAM); /* -CS */
++ udelay(30);
+ }
+
+
+-/************************************************************************
+- Disable erase/write state of serial EEPROM
+-*************************************************************************/
+-void tul_se2_ew_ds(WORD CurBase)
+-{
+- tul_se2_instr(CurBase, 0); /* EWDS */
+- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */
+- tul_se2_wait();
+- return;
++/**
++ * initio_se2_ew_ds - Disable erase/write
++ * @base: Base address of InitIO controller
++ *
++ * Disable erase/write state of serial EEPROM
++ */
++void initio_se2_ew_ds(unsigned long base)
++{
++ initio_se2_instr(base, 0); /* EWDS */
++ outb(0, base + TUL_NVRAM); /* -CS */
++ udelay(30);
+ }
+
+
+-/******************************************************************
+- Input :address of Serial E2PROM
+- Output :value stored in Serial E2PROM
+-*******************************************************************/
+-static USHORT tul_se2_rd(WORD CurBase, ULONG adr)
++/**
++ * initio_se2_rd - read E2PROM word
++ * @base: Base of InitIO controller
++ * @addr: Address of word in E2PROM
++ *
++ * Read a word from the NV E2PROM device
++ */
++static u16 initio_se2_rd(unsigned long base, u8 addr)
+ {
+- UCHAR instr, readByte;
+- USHORT readWord;
++ u8 instr, rb;
++ u16 val = 0;
+ int i;
+
+- instr = (UCHAR) (adr | 0x80);
+- tul_se2_instr(CurBase, instr); /* READ INSTR */
+- readWord = 0;
++ instr = (u8) (addr | 0x80);
++ initio_se2_instr(base, instr); /* READ INSTR */
+
+ for (i = 15; i >= 0; i--) {
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */
++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */
++ udelay(30);
++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */
+
+ /* sample data after the following edge of clock */
+- readByte = TUL_RD(CurBase, TUL_NVRAM);
+- readByte &= SE2DI;
+- readWord += (readByte << i);
+- tul_se2_wait(); /* 6/20/95 */
++ rb = inb(base + TUL_NVRAM);
++ rb &= SE2DI;
++ val += (rb << i);
++ udelay(30); /* 6/20/95 */
+ }
+
+- TUL_WR(CurBase + TUL_NVRAM, 0); /* no chip select */
+- tul_se2_wait();
+- return readWord;
++ outb(0, base + TUL_NVRAM); /* no chip select */
++ udelay(30);
++ return val;
+ }
+
+-
+-/******************************************************************
+- Input: new value in Serial E2PROM, address of Serial E2PROM
+-*******************************************************************/
+-static void tul_se2_wr(WORD CurBase, UCHAR adr, USHORT writeWord)
++/**
++ * initio_se2_wr - read E2PROM word
++ * @base: Base of InitIO controller
++ * @addr: Address of word in E2PROM
++ * @val: Value to write
++ *
++ * Write a word to the NV E2PROM device. Used when recovering from
++ * a problem with the NV.
++ */
++static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
+ {
+- UCHAR readByte;
+- UCHAR instr;
++ u8 rb;
++ u8 instr;
+ int i;
+
+- instr = (UCHAR) (adr | 0x40);
+- tul_se2_instr(CurBase, instr); /* WRITE INSTR */
++ instr = (u8) (addr | 0x40);
++ initio_se2_instr(base, instr); /* WRITE INSTR */
+ for (i = 15; i >= 0; i--) {
+- if (writeWord & 0x8000)
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* -CLK+dataBit 1 */
++ if (val & 0x8000)
++ outb(SE2CS | SE2DO, base + TUL_NVRAM); /* -CLK+dataBit 1 */
+ else
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK+dataBit 0 */
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */
+- tul_se2_wait();
+- writeWord <<= 1;
+- }
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */
+- tul_se2_wait();
++ outb(SE2CS, base + TUL_NVRAM); /* -CLK+dataBit 0 */
++ udelay(30);
++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */
++ udelay(30);
++ val <<= 1;
++ }
++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */
++ udelay(30);
++ outb(0, base + TUL_NVRAM); /* -CS */
++ udelay(30);
+
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* +CS */
+- tul_se2_wait();
++ outb(SE2CS, base + TUL_NVRAM); /* +CS */
++ udelay(30);
+
+ for (;;) {
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */
+- tul_se2_wait();
+- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */
+- tul_se2_wait();
+- if ((readByte = TUL_RD(CurBase, TUL_NVRAM)) & SE2DI)
++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */
++ udelay(30);
++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */
++ udelay(30);
++ if ((rb = inb(base + TUL_NVRAM)) & SE2DI)
+ break; /* write complete */
+ }
+- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */
+- return;
++ outb(0, base + TUL_NVRAM); /* -CS */
+ }
+
++/**
++ * initio_se2_rd_all - read hostadapter NV configuration
++ * @base: Base address of InitIO controller
++ *
++ * Reads the E2PROM data into main memory. Ensures that the checksum
++ * and header marker are valid. Returns 1 on success -1 on error.
++ */
+
+-/***********************************************************************
+- Read SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-int tul_se2_rd_all(WORD CurBase)
++static int initio_se2_rd_all(unsigned long base)
+ {
+ int i;
+- ULONG chksum = 0;
+- USHORT *np;
++ u16 chksum = 0;
++ u16 *np;
+
+ i91unvramp = &i91unvram;
+- np = (USHORT *) i91unvramp;
+- for (i = 0; i < 32; i++) {
+- *np++ = tul_se2_rd(CurBase, i);
+- }
++ np = (u16 *) i91unvramp;
++ for (i = 0; i < 32; i++)
++ *np++ = initio_se2_rd(base, i);
+
+-/*--------------------Is signature "ini" ok ? ----------------*/
++ /* Is signature "ini" ok ? */
+ if (i91unvramp->NVM_Signature != INI_SIGNATURE)
+ return -1;
+-/*---------------------- Is ckecksum ok ? ----------------------*/
+- np = (USHORT *) i91unvramp;
++ /* Is ckecksum ok ? */
++ np = (u16 *) i91unvramp;
+ for (i = 0; i < 31; i++)
+ chksum += *np++;
+- if (i91unvramp->NVM_CheckSum != (USHORT) chksum)
++ if (i91unvramp->NVM_CheckSum != chksum)
+ return -1;
+ return 1;
+ }
+
+-
+-/***********************************************************************
+- Update SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-void tul_se2_update_all(WORD CurBase)
++/**
++ * initio_se2_update_all - Update E2PROM
++ * @base: Base of InitIO controller
++ *
++ * Update the E2PROM by wrting any changes into the E2PROM
++ * chip, rewriting the checksum.
++ */
++static void initio_se2_update_all(unsigned long base)
+ { /* setup default pattern */
+ int i;
+- ULONG chksum = 0;
+- USHORT *np, *np1;
++ u16 chksum = 0;
++ u16 *np, *np1;
+
+ i91unvramp = &i91unvram;
+ /* Calculate checksum first */
+- np = (USHORT *) i91udftNvRam;
++ np = (u16 *) i91udftNvRam;
+ for (i = 0; i < 31; i++)
+ chksum += *np++;
+- *np = (USHORT) chksum;
+- tul_se2_ew_en(CurBase); /* Enable write */
++ *np = chksum;
++ initio_se2_ew_en(base); /* Enable write */
+
+- np = (USHORT *) i91udftNvRam;
+- np1 = (USHORT *) i91unvramp;
++ np = (u16 *) i91udftNvRam;
++ np1 = (u16 *) i91unvramp;
+ for (i = 0; i < 32; i++, np++, np1++) {
+- if (*np != *np1) {
+- tul_se2_wr(CurBase, i, *np);
+- }
++ if (*np != *np1)
++ initio_se2_wr(base, i, *np);
+ }
+-
+- tul_se2_ew_ds(CurBase); /* Disable write */
+- return;
++ initio_se2_ew_ds(base); /* Disable write */
+ }
+
+-/*************************************************************************
+- Function name : read_eeprom
+-**************************************************************************/
+-void tul_read_eeprom(WORD CurBase)
+-{
+- UCHAR gctrl;
+-
+- i91unvramp = &i91unvram;
+-/*------Enable EEProm programming ---*/
+- gctrl = TUL_RD(CurBase, TUL_GCTRL);
+- TUL_WR(CurBase + TUL_GCTRL, gctrl | TUL_GCTRL_EEPROM_BIT);
+- if (tul_se2_rd_all(CurBase) != 1) {
+- tul_se2_update_all(CurBase); /* setup default pattern */
+- tul_se2_rd_all(CurBase); /* load again */
+- }
+-/*------ Disable EEProm programming ---*/
+- gctrl = TUL_RD(CurBase, TUL_GCTRL);
+- TUL_WR(CurBase + TUL_GCTRL, gctrl & ~TUL_GCTRL_EEPROM_BIT);
+-} /* read_eeprom */
++/**
++ * initio_read_eeprom - Retrieve configuration
++ * @base: Base of InitIO Host Adapter
++ *
++ * Retrieve the host adapter configuration data from E2Prom. If the
++ * data is invalid then the defaults are used and are also restored
++ * into the E2PROM. This forms the access point for the SCSI driver
++ * into the E2PROM layer, the other functions for the E2PROM are all
++ * internal use.
++ *
++ * Must be called single threaded, uses a shared global area.
++ */
+
+-static int Addi91u_into_Adapter_table(WORD wBIOS, WORD wBASE, BYTE bInterrupt,
+- BYTE bBus, BYTE bDevice)
++static void initio_read_eeprom(unsigned long base)
+ {
+- int i, j;
++ u8 gctrl;
+
+- for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) {
+- if (i91u_adpt[i].ADPT_BIOS < wBIOS)
+- continue;
+- if (i91u_adpt[i].ADPT_BIOS == wBIOS) {
+- if (i91u_adpt[i].ADPT_BASE == wBASE) {
+- if (i91u_adpt[i].ADPT_Bus != 0xFF)
+- return 1;
+- } else if (i91u_adpt[i].ADPT_BASE < wBASE)
+- continue;
+- }
+- for (j = MAX_SUPPORTED_ADAPTERS - 1; j > i; j--) {
+- i91u_adpt[j].ADPT_BASE = i91u_adpt[j - 1].ADPT_BASE;
+- i91u_adpt[j].ADPT_INTR = i91u_adpt[j - 1].ADPT_INTR;
+- i91u_adpt[j].ADPT_BIOS = i91u_adpt[j - 1].ADPT_BIOS;
+- i91u_adpt[j].ADPT_Bus = i91u_adpt[j - 1].ADPT_Bus;
+- i91u_adpt[j].ADPT_Device = i91u_adpt[j - 1].ADPT_Device;
+- }
+- i91u_adpt[i].ADPT_BASE = wBASE;
+- i91u_adpt[i].ADPT_INTR = bInterrupt;
+- i91u_adpt[i].ADPT_BIOS = wBIOS;
+- i91u_adpt[i].ADPT_Bus = bBus;
+- i91u_adpt[i].ADPT_Device = bDevice;
+- return 0;
+- }
+- return 1;
++ i91unvramp = &i91unvram;
++ /* Enable EEProm programming */
++ gctrl = inb(base + TUL_GCTRL);
++ outb(gctrl | TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL);
++ if (initio_se2_rd_all(base) != 1) {
++ initio_se2_update_all(base); /* setup default pattern */
++ initio_se2_rd_all(base); /* load again */
++ }
++ /* Disable EEProm programming */
++ gctrl = inb(base + TUL_GCTRL);
++ outb(gctrl & ~TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL);
+ }
+
+-static void init_i91uAdapter_table(void)
+-{
+- int i;
+-
+- for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) { /* Initialize adapter structure */
+- i91u_adpt[i].ADPT_BIOS = 0xffff;
+- i91u_adpt[i].ADPT_BASE = 0xffff;
+- i91u_adpt[i].ADPT_INTR = 0xff;
+- i91u_adpt[i].ADPT_Bus = 0xff;
+- i91u_adpt[i].ADPT_Device = 0xff;
+- }
+- return;
+-}
++/**
++ * initio_stop_bm - stop bus master
++ * @host: InitIO we are stopping
++ *
++ * Stop any pending DMA operation, aborting the DMA if neccessary
++ */
+
+-static void tul_stop_bm(HCS * pCurHcb)
++static void initio_stop_bm(struct initio_host * host)
+ {
+
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO);
++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */
++ outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd);
+ /* wait Abort DMA xfer done */
+- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0);
++ while ((inb(host->addr + TUL_Int) & XABT) == 0)
++ cpu_relax();
+ }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ }
+
+-/***************************************************************************/
+-static void get_tulipPCIConfig(HCS * pCurHcb, int ch_idx)
+-{
+- pCurHcb->HCS_Base = i91u_adpt[ch_idx].ADPT_BASE; /* Supply base address */
+- pCurHcb->HCS_BIOS = i91u_adpt[ch_idx].ADPT_BIOS; /* Supply BIOS address */
+- pCurHcb->HCS_Intr = i91u_adpt[ch_idx].ADPT_INTR; /* Supply interrupt line */
+- return;
+-}
++/**
++ * initio_reset_scsi - Reset SCSI host controller
++ * @host: InitIO host to reset
++ * @seconds: Recovery time
++ *
++ * Perform a full reset of the SCSI subsystem.
++ */
+
+-/***************************************************************************/
+-static int tul_reset_scsi(HCS * pCurHcb, int seconds)
++static int initio_reset_scsi(struct initio_host * host, int seconds)
+ {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_BUS);
++ outb(TSC_RST_BUS, host->addr + TUL_SCtrl0);
+
+- while (!((pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt)) & TSS_SCSIRST_INT));
+- /* reset tulip chip */
++ while (!((host->jsint = inb(host->addr + TUL_SInt)) & TSS_SCSIRST_INT))
++ cpu_relax();
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, 0);
++ /* reset tulip chip */
++ outb(0, host->addr + TUL_SSignal);
+
+ /* Stall for a while, wait for target's firmware ready,make it 2 sec ! */
+ /* SONY 5200 tape drive won't work if only stall for 1 sec */
+- tul_do_pause(seconds * HZ);
+-
+- TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++ /* FIXME: this is a very long busy wait right now */
++ initio_do_pause(seconds * HZ);
+
+- return (SCSI_RESET_SUCCESS);
++ inb(host->addr + TUL_SInt);
++ return SCSI_RESET_SUCCESS;
+ }
+
+-/***************************************************************************/
+-static int init_tulip(HCS * pCurHcb, SCB * scbp, int tul_num_scb,
+- BYTE * pbBiosAdr, int seconds)
++/**
++ * initio_init - set up an InitIO host adapter
++ * @host: InitIO host adapter
++ * @num_scbs: Number of SCBS
++ * @bios_addr: BIOS address
++ *
++ * Set up the host adapter and devices according to the configuration
++ * retrieved from the E2PROM.
++ *
++ * Locking: Calls E2PROM layer code which is not re-enterable so must
++ * run single threaded for now.
++ */
++
++static void initio_init(struct initio_host * host, u8 *bios_addr)
+ {
+ int i;
+- BYTE *pwFlags;
+- BYTE *pbHeads;
+- SCB *pTmpScb, *pPrevScb = NULL;
+-
+- pCurHcb->HCS_NumScbs = tul_num_scb;
+- pCurHcb->HCS_Semaph = 1;
+- spin_lock_init(&pCurHcb->HCS_SemaphLock);
+- pCurHcb->HCS_JSStatus0 = 0;
+- pCurHcb->HCS_Scb = scbp;
+- pCurHcb->HCS_NxtPend = scbp;
+- pCurHcb->HCS_NxtAvail = scbp;
+- for (i = 0, pTmpScb = scbp; i < tul_num_scb; i++, pTmpScb++) {
+- pTmpScb->SCB_TagId = i;
+- if (i != 0)
+- pPrevScb->SCB_NxtScb = pTmpScb;
+- pPrevScb = pTmpScb;
+- }
+- pPrevScb->SCB_NxtScb = NULL;
+- pCurHcb->HCS_ScbEnd = pTmpScb;
+- pCurHcb->HCS_FirstAvail = scbp;
+- pCurHcb->HCS_LastAvail = pPrevScb;
+- spin_lock_init(&pCurHcb->HCS_AvailLock);
+- pCurHcb->HCS_FirstPend = NULL;
+- pCurHcb->HCS_LastPend = NULL;
+- pCurHcb->HCS_FirstBusy = NULL;
+- pCurHcb->HCS_LastBusy = NULL;
+- pCurHcb->HCS_FirstDone = NULL;
+- pCurHcb->HCS_LastDone = NULL;
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
++ u8 *flags;
++ u8 *heads;
+
+- tul_read_eeprom(pCurHcb->HCS_Base);
+-/*---------- get H/A configuration -------------*/
++ /* Get E2Prom configuration */
++ initio_read_eeprom(host->addr);
+ if (i91unvramp->NVM_SCSIInfo[0].NVM_NumOfTarg == 8)
+- pCurHcb->HCS_MaxTar = 8;
++ host->max_tar = 8;
+ else
+- pCurHcb->HCS_MaxTar = 16;
++ host->max_tar = 16;
+
+- pCurHcb->HCS_Config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1;
++ host->config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1;
+
+- pCurHcb->HCS_SCSI_ID = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID;
+- pCurHcb->HCS_IdMask = ~(1 << pCurHcb->HCS_SCSI_ID);
++ host->scsi_id = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID;
++ host->idmask = ~(1 << host->scsi_id);
+
+ #ifdef CHK_PARITY
+ /* Enable parity error response */
+- TUL_WR(pCurHcb->HCS_Base + TUL_PCMD, TUL_RD(pCurHcb->HCS_Base, TUL_PCMD) | 0x40);
++ outb(inb(host->addr + TUL_PCMD) | 0x40, host->addr + TUL_PCMD);
+ #endif
+
+ /* Mask all the interrupt */
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++ outb(0x1F, host->addr + TUL_Mask);
+
+- tul_stop_bm(pCurHcb);
++ initio_stop_bm(host);
+ /* --- Initialize the tulip --- */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_CHIP);
++ outb(TSC_RST_CHIP, host->addr + TUL_SCtrl0);
+
+ /* program HBA's SCSI ID */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, pCurHcb->HCS_SCSI_ID << 4);
++ outb(host->scsi_id << 4, host->addr + TUL_SScsiId);
+
+ /* Enable Initiator Mode ,phase latch,alternate sync period mode,
+ disable SCSI reset */
+- if (pCurHcb->HCS_Config & HCC_EN_PAR)
+- pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR);
++ if (host->config & HCC_EN_PAR)
++ host->sconf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR);
+ else
+- pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_SConf1);
++ host->sconf1 = (TSC_INITDEFAULT);
++ outb(host->sconf1, host->addr + TUL_SConfig);
+
+ /* Enable HW reselect */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, 0);
++ outb(0, host->addr + TUL_SPeriod);
+
+ /* selection time out = 250 ms */
+- TUL_WR(pCurHcb->HCS_Base + TUL_STimeOut, 153);
++ outb(153, host->addr + TUL_STimeOut);
+
+-/*--------- Enable SCSI terminator -----*/
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, (pCurHcb->HCS_Config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)));
+- TUL_WR(pCurHcb->HCS_Base + TUL_GCTRL1,
+- ((pCurHcb->HCS_Config & HCC_AUTO_TERM) >> 4) | (TUL_RD(pCurHcb->HCS_Base, TUL_GCTRL1) & 0xFE));
++ /* Enable SCSI terminator */
++ outb((host->config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)),
++ host->addr + TUL_XCtrl);
++ outb(((host->config & HCC_AUTO_TERM) >> 4) |
++ (inb(host->addr + TUL_GCTRL1) & 0xFE),
++ host->addr + TUL_GCTRL1);
+
+ for (i = 0,
+- pwFlags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config),
+- pbHeads = pbBiosAdr + 0x180;
+- i < pCurHcb->HCS_MaxTar;
+- i++, pwFlags++) {
+- pCurHcb->HCS_Tcs[i].TCS_Flags = *pwFlags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+- if (pCurHcb->HCS_Tcs[i].TCS_Flags & TCF_EN_255)
+- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63;
++ flags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config),
++ heads = bios_addr + 0x180;
++ i < host->max_tar;
++ i++, flags++) {
++ host->targets[i].flags = *flags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++ if (host->targets[i].flags & TCF_EN_255)
++ host->targets[i].drv_flags = TCF_DRV_255_63;
+ else
+- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0;
+- pCurHcb->HCS_Tcs[i].TCS_JS_Period = 0;
+- pCurHcb->HCS_Tcs[i].TCS_SConfig0 = pCurHcb->HCS_SConf1;
+- pCurHcb->HCS_Tcs[i].TCS_DrvHead = *pbHeads++;
+- if (pCurHcb->HCS_Tcs[i].TCS_DrvHead == 255)
+- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63;
++ host->targets[i].drv_flags = 0;
++ host->targets[i].js_period = 0;
++ host->targets[i].sconfig0 = host->sconf1;
++ host->targets[i].heads = *heads++;
++ if (host->targets[i].heads == 255)
++ host->targets[i].drv_flags = TCF_DRV_255_63;
+ else
+- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0;
+- pCurHcb->HCS_Tcs[i].TCS_DrvSector = *pbHeads++;
+- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY;
+- pCurHcb->HCS_ActTags[i] = 0;
+- pCurHcb->HCS_MaxTags[i] = 0xFF;
++ host->targets[i].drv_flags = 0;
++ host->targets[i].sectors = *heads++;
++ host->targets[i].flags &= ~TCF_BUSY;
++ host->act_tags[i] = 0;
++ host->max_tags[i] = 0xFF;
+ } /* for */
+ printk("i91u: PCI Base=0x%04X, IRQ=%d, BIOS=0x%04X0, SCSI ID=%d\n",
+- pCurHcb->HCS_Base, pCurHcb->HCS_Intr,
+- pCurHcb->HCS_BIOS, pCurHcb->HCS_SCSI_ID);
+-/*------------------- reset SCSI Bus ---------------------------*/
+- if (pCurHcb->HCS_Config & HCC_SCSI_RESET) {
+- printk("i91u: Reset SCSI Bus ... \n");
+- tul_reset_scsi(pCurHcb, seconds);
+- }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCFG1, 0x17);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SIntEnable, 0xE9);
+- return (0);
++ host->addr, host->irq,
++ host->bios_addr, host->scsi_id);
++ /* Reset SCSI Bus */
++ if (host->config & HCC_SCSI_RESET) {
++ printk(KERN_INFO "i91u: Reset SCSI Bus ... \n");
++ initio_reset_scsi(host, 10);
++ }
++ outb(0x17, host->addr + TUL_SCFG1);
++ outb(0xE9, host->addr + TUL_SIntEnable);
+ }
+
+-/***************************************************************************/
+-static SCB *tul_alloc_scb(HCS * hcsp)
++/**
++ * initio_alloc_scb - Allocate an SCB
++ * @host: InitIO host we are allocating for
++ *
++ * Walk the SCB list for the controller and allocate a free SCB if
++ * one exists.
++ */
++static struct scsi_ctrl_blk *initio_alloc_scb(struct initio_host *host)
+ {
+- SCB *pTmpScb;
+- ULONG flags;
+- spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags);
+- if ((pTmpScb = hcsp->HCS_FirstAvail) != NULL) {
++ struct scsi_ctrl_blk *scb;
++ unsigned long flags;
++
++ spin_lock_irqsave(&host->avail_lock, flags);
++ if ((scb = host->first_avail) != NULL) {
+ #if DEBUG_QUEUE
+- printk("find scb at %08lx\n", (ULONG) pTmpScb);
++ printk("find scb at %p\n", scb);
+ #endif
+- if ((hcsp->HCS_FirstAvail = pTmpScb->SCB_NxtScb) == NULL)
+- hcsp->HCS_LastAvail = NULL;
+- pTmpScb->SCB_NxtScb = NULL;
+- pTmpScb->SCB_Status = SCB_RENT;
++ if ((host->first_avail = scb->next) == NULL)
++ host->last_avail = NULL;
++ scb->next = NULL;
++ scb->status = SCB_RENT;
+ }
+- spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags);
+- return (pTmpScb);
++ spin_unlock_irqrestore(&host->avail_lock, flags);
++ return scb;
+ }
+
+-/***************************************************************************/
+-static void tul_release_scb(HCS * hcsp, SCB * scbp)
++/**
++ * initio_release_scb - Release an SCB
++ * @host: InitIO host that owns the SCB
++ * @cmnd: SCB command block being returned
++ *
++ * Return an allocated SCB to the host free list
++ */
++
++static void initio_release_scb(struct initio_host * host, struct scsi_ctrl_blk * cmnd)
+ {
+- ULONG flags;
++ unsigned long flags;
+
+ #if DEBUG_QUEUE
+- printk("Release SCB %lx; ", (ULONG) scbp);
++ printk("Release SCB %p; ", cmnd);
+ #endif
+- spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags);
+- scbp->SCB_Srb = NULL;
+- scbp->SCB_Status = 0;
+- scbp->SCB_NxtScb = NULL;
+- if (hcsp->HCS_LastAvail != NULL) {
+- hcsp->HCS_LastAvail->SCB_NxtScb = scbp;
+- hcsp->HCS_LastAvail = scbp;
++ spin_lock_irqsave(&(host->avail_lock), flags);
++ cmnd->srb = NULL;
++ cmnd->status = 0;
++ cmnd->next = NULL;
++ if (host->last_avail != NULL) {
++ host->last_avail->next = cmnd;
++ host->last_avail = cmnd;
+ } else {
+- hcsp->HCS_FirstAvail = scbp;
+- hcsp->HCS_LastAvail = scbp;
++ host->first_avail = cmnd;
++ host->last_avail = cmnd;
+ }
+- spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags);
++ spin_unlock_irqrestore(&(host->avail_lock), flags);
+ }
+
+ /***************************************************************************/
+-static void tul_append_pend_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_append_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+
+ #if DEBUG_QUEUE
+- printk("Append pend SCB %lx; ", (ULONG) scbp);
++ printk("Append pend SCB %p; ", scbp);
+ #endif
+- scbp->SCB_Status = SCB_PEND;
+- scbp->SCB_NxtScb = NULL;
+- if (pCurHcb->HCS_LastPend != NULL) {
+- pCurHcb->HCS_LastPend->SCB_NxtScb = scbp;
+- pCurHcb->HCS_LastPend = scbp;
++ scbp->status = SCB_PEND;
++ scbp->next = NULL;
++ if (host->last_pending != NULL) {
++ host->last_pending->next = scbp;
++ host->last_pending = scbp;
+ } else {
+- pCurHcb->HCS_FirstPend = scbp;
+- pCurHcb->HCS_LastPend = scbp;
++ host->first_pending = scbp;
++ host->last_pending = scbp;
+ }
+ }
+
+ /***************************************************************************/
+-static void tul_push_pend_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_push_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+
+ #if DEBUG_QUEUE
+- printk("Push pend SCB %lx; ", (ULONG) scbp);
++ printk("Push pend SCB %p; ", scbp);
+ #endif
+- scbp->SCB_Status = SCB_PEND;
+- if ((scbp->SCB_NxtScb = pCurHcb->HCS_FirstPend) != NULL) {
+- pCurHcb->HCS_FirstPend = scbp;
++ scbp->status = SCB_PEND;
++ if ((scbp->next = host->first_pending) != NULL) {
++ host->first_pending = scbp;
+ } else {
+- pCurHcb->HCS_FirstPend = scbp;
+- pCurHcb->HCS_LastPend = scbp;
++ host->first_pending = scbp;
++ host->last_pending = scbp;
+ }
+ }
+
+-/***************************************************************************/
+-static SCB *tul_find_first_pend_scb(HCS * pCurHcb)
++static struct scsi_ctrl_blk *initio_find_first_pend_scb(struct initio_host * host)
+ {
+- SCB *pFirstPend;
++ struct scsi_ctrl_blk *first;
+
+
+- pFirstPend = pCurHcb->HCS_FirstPend;
+- while (pFirstPend != NULL) {
+- if (pFirstPend->SCB_Opcode != ExecSCSI) {
+- return (pFirstPend);
+- }
+- if (pFirstPend->SCB_TagMsg == 0) {
+- if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] == 0) &&
+- !(pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) {
+- return (pFirstPend);
+- }
++ first = host->first_pending;
++ while (first != NULL) {
++ if (first->opcode != ExecSCSI)
++ return first;
++ if (first->tagmsg == 0) {
++ if ((host->act_tags[first->target] == 0) &&
++ !(host->targets[first->target].flags & TCF_BUSY))
++ return first;
+ } else {
+- if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] >=
+- pCurHcb->HCS_MaxTags[pFirstPend->SCB_Target]) |
+- (pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) {
+- pFirstPend = pFirstPend->SCB_NxtScb;
++ if ((host->act_tags[first->target] >=
++ host->max_tags[first->target]) |
++ (host->targets[first->target].flags & TCF_BUSY)) {
++ first = first->next;
+ continue;
+ }
+- return (pFirstPend);
++ return first;
+ }
+- pFirstPend = pFirstPend->SCB_NxtScb;
++ first = first->next;
+ }
+-
+-
+- return (pFirstPend);
++ return first;
+ }
+-/***************************************************************************/
+-static void tul_unlink_pend_scb(HCS * pCurHcb, SCB * pCurScb)
++
++static void initio_unlink_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+- SCB *pTmpScb, *pPrevScb;
++ struct scsi_ctrl_blk *tmp, *prev;
+
+ #if DEBUG_QUEUE
+- printk("unlink pend SCB %lx; ", (ULONG) pCurScb);
++ printk("unlink pend SCB %p; ", scb);
+ #endif
+
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend;
+- while (pTmpScb != NULL) {
+- if (pCurScb == pTmpScb) { /* Unlink this SCB */
+- if (pTmpScb == pCurHcb->HCS_FirstPend) {
+- if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastPend = NULL;
++ prev = tmp = host->first_pending;
++ while (tmp != NULL) {
++ if (scb == tmp) { /* Unlink this SCB */
++ if (tmp == host->first_pending) {
++ if ((host->first_pending = tmp->next) == NULL)
++ host->last_pending = NULL;
+ } else {
+- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+- if (pTmpScb == pCurHcb->HCS_LastPend)
+- pCurHcb->HCS_LastPend = pPrevScb;
++ prev->next = tmp->next;
++ if (tmp == host->last_pending)
++ host->last_pending = prev;
+ }
+- pTmpScb->SCB_NxtScb = NULL;
++ tmp->next = NULL;
+ break;
+ }
+- pPrevScb = pTmpScb;
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ prev = tmp;
++ tmp = tmp->next;
+ }
+- return;
+ }
+-/***************************************************************************/
+-static void tul_append_busy_scb(HCS * pCurHcb, SCB * scbp)
++
++static void initio_append_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+
+ #if DEBUG_QUEUE
+- printk("append busy SCB %lx; ", (ULONG) scbp);
++ printk("append busy SCB %o; ", scbp);
+ #endif
+- if (scbp->SCB_TagMsg)
+- pCurHcb->HCS_ActTags[scbp->SCB_Target]++;
++ if (scbp->tagmsg)
++ host->act_tags[scbp->target]++;
+ else
+- pCurHcb->HCS_Tcs[scbp->SCB_Target].TCS_Flags |= TCF_BUSY;
+- scbp->SCB_Status = SCB_BUSY;
+- scbp->SCB_NxtScb = NULL;
+- if (pCurHcb->HCS_LastBusy != NULL) {
+- pCurHcb->HCS_LastBusy->SCB_NxtScb = scbp;
+- pCurHcb->HCS_LastBusy = scbp;
++ host->targets[scbp->target].flags |= TCF_BUSY;
++ scbp->status = SCB_BUSY;
++ scbp->next = NULL;
++ if (host->last_busy != NULL) {
++ host->last_busy->next = scbp;
++ host->last_busy = scbp;
+ } else {
+- pCurHcb->HCS_FirstBusy = scbp;
+- pCurHcb->HCS_LastBusy = scbp;
++ host->first_busy = scbp;
++ host->last_busy = scbp;
+ }
+ }
+
+ /***************************************************************************/
+-static SCB *tul_pop_busy_scb(HCS * pCurHcb)
++static struct scsi_ctrl_blk *initio_pop_busy_scb(struct initio_host * host)
+ {
+- SCB *pTmpScb;
++ struct scsi_ctrl_blk *tmp;
+
+
+- if ((pTmpScb = pCurHcb->HCS_FirstBusy) != NULL) {
+- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastBusy = NULL;
+- pTmpScb->SCB_NxtScb = NULL;
+- if (pTmpScb->SCB_TagMsg)
+- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
++ if ((tmp = host->first_busy) != NULL) {
++ if ((host->first_busy = tmp->next) == NULL)
++ host->last_busy = NULL;
++ tmp->next = NULL;
++ if (tmp->tagmsg)
++ host->act_tags[tmp->target]--;
+ else
+- pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY;
++ host->targets[tmp->target].flags &= ~TCF_BUSY;
+ }
+ #if DEBUG_QUEUE
+- printk("Pop busy SCB %lx; ", (ULONG) pTmpScb);
++ printk("Pop busy SCB %p; ", tmp);
+ #endif
+- return (pTmpScb);
++ return tmp;
+ }
+
+ /***************************************************************************/
+-static void tul_unlink_busy_scb(HCS * pCurHcb, SCB * pCurScb)
++static void initio_unlink_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+- SCB *pTmpScb, *pPrevScb;
++ struct scsi_ctrl_blk *tmp, *prev;
+
+ #if DEBUG_QUEUE
+- printk("unlink busy SCB %lx; ", (ULONG) pCurScb);
++ printk("unlink busy SCB %p; ", scb);
+ #endif
+
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;
+- while (pTmpScb != NULL) {
+- if (pCurScb == pTmpScb) { /* Unlink this SCB */
+- if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastBusy = NULL;
+- } else {
+- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+- if (pTmpScb == pCurHcb->HCS_LastBusy)
+- pCurHcb->HCS_LastBusy = pPrevScb;
+- }
+- pTmpScb->SCB_NxtScb = NULL;
+- if (pTmpScb->SCB_TagMsg)
+- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
++ prev = tmp = host->first_busy;
++ while (tmp != NULL) {
++ if (scb == tmp) { /* Unlink this SCB */
++ if (tmp == host->first_busy) {
++ if ((host->first_busy = tmp->next) == NULL)
++ host->last_busy = NULL;
++ } else {
++ prev->next = tmp->next;
++ if (tmp == host->last_busy)
++ host->last_busy = prev;
++ }
++ tmp->next = NULL;
++ if (tmp->tagmsg)
++ host->act_tags[tmp->target]--;
+ else
+- pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY;
++ host->targets[tmp->target].flags &= ~TCF_BUSY;
+ break;
+ }
+- pPrevScb = pTmpScb;
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ prev = tmp;
++ tmp = tmp->next;
+ }
+ return;
+ }
+
+-/***************************************************************************/
+-SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun)
++struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun)
+ {
+- SCB *pTmpScb, *pPrevScb;
+- WORD scbp_tarlun;
++ struct scsi_ctrl_blk *tmp, *prev;
++ u16 scbp_tarlun;
+
+
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;
+- while (pTmpScb != NULL) {
+- scbp_tarlun = (pTmpScb->SCB_Lun << 8) | (pTmpScb->SCB_Target);
++ prev = tmp = host->first_busy;
++ while (tmp != NULL) {
++ scbp_tarlun = (tmp->lun << 8) | (tmp->target);
+ if (scbp_tarlun == tarlun) { /* Unlink this SCB */
+ break;
+ }
+- pPrevScb = pTmpScb;
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ prev = tmp;
++ tmp = tmp->next;
+ }
+ #if DEBUG_QUEUE
+- printk("find busy SCB %lx; ", (ULONG) pTmpScb);
++ printk("find busy SCB %p; ", tmp);
+ #endif
+- return (pTmpScb);
++ return tmp;
+ }
+
+-/***************************************************************************/
+-static void tul_append_done_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_append_done_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+-
+ #if DEBUG_QUEUE
+- printk("append done SCB %lx; ", (ULONG) scbp);
++ printk("append done SCB %p; ", scbp);
+ #endif
+
+- scbp->SCB_Status = SCB_DONE;
+- scbp->SCB_NxtScb = NULL;
+- if (pCurHcb->HCS_LastDone != NULL) {
+- pCurHcb->HCS_LastDone->SCB_NxtScb = scbp;
+- pCurHcb->HCS_LastDone = scbp;
++ scbp->status = SCB_DONE;
++ scbp->next = NULL;
++ if (host->last_done != NULL) {
++ host->last_done->next = scbp;
++ host->last_done = scbp;
+ } else {
+- pCurHcb->HCS_FirstDone = scbp;
+- pCurHcb->HCS_LastDone = scbp;
++ host->first_done = scbp;
++ host->last_done = scbp;
+ }
+ }
+
+-/***************************************************************************/
+-SCB *tul_find_done_scb(HCS * pCurHcb)
++struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host)
+ {
+- SCB *pTmpScb;
+-
++ struct scsi_ctrl_blk *tmp;
+
+- if ((pTmpScb = pCurHcb->HCS_FirstDone) != NULL) {
+- if ((pCurHcb->HCS_FirstDone = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastDone = NULL;
+- pTmpScb->SCB_NxtScb = NULL;
++ if ((tmp = host->first_done) != NULL) {
++ if ((host->first_done = tmp->next) == NULL)
++ host->last_done = NULL;
++ tmp->next = NULL;
+ }
+ #if DEBUG_QUEUE
+- printk("find done SCB %lx; ", (ULONG) pTmpScb);
++ printk("find done SCB %p; ",tmp);
+ #endif
+- return (pTmpScb);
++ return tmp;
+ }
+
+-/***************************************************************************/
+-static int tul_abort_srb(HCS * pCurHcb, struct scsi_cmnd *srbp)
++static int initio_abort_srb(struct initio_host * host, struct scsi_cmnd *srbp)
+ {
+- ULONG flags;
+- SCB *pTmpScb, *pPrevScb;
++ unsigned long flags;
++ struct scsi_ctrl_blk *tmp, *prev;
+
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++ spin_lock_irqsave(&host->semaph_lock, flags);
+
+- if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++ if ((host->semaph == 0) && (host->active == NULL)) {
+ /* disable Jasmin SCSI Int */
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tulip_main(pCurHcb);
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
++ outb(0x1F, host->addr + TUL_Mask);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
++ /* FIXME: synchronize_irq needed ? */
++ tulip_main(host);
++ spin_lock_irqsave(&host->semaph_lock, flags);
++ host->semaph = 1;
++ outb(0x0F, host->addr + TUL_Mask);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_SNOOZE;
+ }
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend; /* Check Pend queue */
+- while (pTmpScb != NULL) {
++ prev = tmp = host->first_pending; /* Check Pend queue */
++ while (tmp != NULL) {
+ /* 07/27/98 */
+- if (pTmpScb->SCB_Srb == srbp) {
+- if (pTmpScb == pCurHcb->HCS_ActScb) {
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ if (tmp->srb == srbp) {
++ if (tmp == host->active) {
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_BUSY;
+- } else if (pTmpScb == pCurHcb->HCS_FirstPend) {
+- if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastPend = NULL;
+- } else {
+- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+- if (pTmpScb == pCurHcb->HCS_LastPend)
+- pCurHcb->HCS_LastPend = pPrevScb;
+- }
+- pTmpScb->SCB_HaStat = HOST_ABORTED;
+- pTmpScb->SCB_Flags |= SCF_DONE;
+- if (pTmpScb->SCB_Flags & SCF_POST)
+- (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb);
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ } else if (tmp == host->first_pending) {
++ if ((host->first_pending = tmp->next) == NULL)
++ host->last_pending = NULL;
++ } else {
++ prev->next = tmp->next;
++ if (tmp == host->last_pending)
++ host->last_pending = prev;
++ }
++ tmp->hastat = HOST_ABORTED;
++ tmp->flags |= SCF_DONE;
++ if (tmp->flags & SCF_POST)
++ (*tmp->post) ((u8 *) host, (u8 *) tmp);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_SUCCESS;
+ }
+- pPrevScb = pTmpScb;
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ prev = tmp;
++ tmp = tmp->next;
+ }
+
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */
+- while (pTmpScb != NULL) {
+-
+- if (pTmpScb->SCB_Srb == srbp) {
+-
+- if (pTmpScb == pCurHcb->HCS_ActScb) {
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ prev = tmp = host->first_busy; /* Check Busy queue */
++ while (tmp != NULL) {
++ if (tmp->srb == srbp) {
++ if (tmp == host->active) {
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_BUSY;
+- } else if (pTmpScb->SCB_TagMsg == 0) {
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ } else if (tmp->tagmsg == 0) {
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_BUSY;
+ } else {
+- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
+- if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastBusy = NULL;
+- } else {
+- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+- if (pTmpScb == pCurHcb->HCS_LastBusy)
+- pCurHcb->HCS_LastBusy = pPrevScb;
+- }
+- pTmpScb->SCB_NxtScb = NULL;
+-
+-
+- pTmpScb->SCB_HaStat = HOST_ABORTED;
+- pTmpScb->SCB_Flags |= SCF_DONE;
+- if (pTmpScb->SCB_Flags & SCF_POST)
+- (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb);
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ host->act_tags[tmp->target]--;
++ if (tmp == host->first_busy) {
++ if ((host->first_busy = tmp->next) == NULL)
++ host->last_busy = NULL;
++ } else {
++ prev->next = tmp->next;
++ if (tmp == host->last_busy)
++ host->last_busy = prev;
++ }
++ tmp->next = NULL;
++
++
++ tmp->hastat = HOST_ABORTED;
++ tmp->flags |= SCF_DONE;
++ if (tmp->flags & SCF_POST)
++ (*tmp->post) ((u8 *) host, (u8 *) tmp);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return SCSI_ABORT_SUCCESS;
+ }
+ }
+- pPrevScb = pTmpScb;
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ prev = tmp;
++ tmp = tmp->next;
+ }
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+- return (SCSI_ABORT_NOT_RUNNING);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
++ return SCSI_ABORT_NOT_RUNNING;
+ }
+
+ /***************************************************************************/
+-static int tul_bad_seq(HCS * pCurHcb)
+-{
+- SCB *pCurScb;
+-
+- printk("tul_bad_seg c=%d\n", pCurHcb->HCS_Index);
+-
+- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) {
+- tul_unlink_busy_scb(pCurHcb, pCurScb);
+- pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+- pCurScb->SCB_TaStat = 0;
+- tul_append_done_scb(pCurHcb, pCurScb);
+- }
+- tul_stop_bm(pCurHcb);
+-
+- tul_reset_scsi(pCurHcb, 8); /* 7/29/98 */
+-
+- return (tul_post_scsi_rst(pCurHcb));
+-}
+-
+-#if 0
+-
+-/************************************************************************/
+-static int tul_device_reset(HCS * pCurHcb, struct scsi_cmnd *pSrb,
+- unsigned int target, unsigned int ResetFlags)
++static int initio_bad_seq(struct initio_host * host)
+ {
+- ULONG flags;
+- SCB *pScb;
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- if (ResetFlags & SCSI_RESET_ASYNCHRONOUS) {
+-
+- if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+- /* disable Jasmin SCSI Int */
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tulip_main(pCurHcb);
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- return SCSI_RESET_SNOOZE;
+- }
+- pScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */
+- while (pScb != NULL) {
+- if (pScb->SCB_Srb == pSrb)
+- break;
+- pScb = pScb->SCB_NxtScb;
+- }
+- if (pScb == NULL) {
+- printk("Unable to Reset - No SCB Found\n");
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+- return SCSI_RESET_NOT_RUNNING;
+- }
+- }
+- if ((pScb = tul_alloc_scb(pCurHcb)) == NULL) {
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+- return SCSI_RESET_NOT_RUNNING;
+- }
+- pScb->SCB_Opcode = BusDevRst;
+- pScb->SCB_Flags = SCF_POST;
+- pScb->SCB_Target = target;
+- pScb->SCB_Mode = 0;
+-
+- pScb->SCB_Srb = NULL;
+- if (ResetFlags & SCSI_RESET_SYNCHRONOUS) {
+- pScb->SCB_Srb = pSrb;
+- }
+- tul_push_pend_scb(pCurHcb, pScb); /* push this SCB to Pending queue */
++ struct scsi_ctrl_blk *scb;
+
+- if (pCurHcb->HCS_Semaph == 1) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+- /* disable Jasmin SCSI Int */
+- pCurHcb->HCS_Semaph = 0;
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tulip_main(pCurHcb);
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++ printk("initio_bad_seg c=%d\n", host->index);
+
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
++ if ((scb = host->active) != NULL) {
++ initio_unlink_busy_scb(host, scb);
++ scb->hastat = HOST_BAD_PHAS;
++ scb->tastat = 0;
++ initio_append_done_scb(host, scb);
+ }
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+- return SCSI_RESET_PENDING;
+-}
+-
+-static int tul_reset_scsi_bus(HCS * pCurHcb)
+-{
+- ULONG flags;
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+- pCurHcb->HCS_Semaph = 0;
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tul_stop_bm(pCurHcb);
+-
+- tul_reset_scsi(pCurHcb, 2); /* 7/29/98 */
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+- tul_post_scsi_rst(pCurHcb);
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tulip_main(pCurHcb);
+-
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+- return (SCSI_RESET_SUCCESS | SCSI_RESET_HOST_RESET);
++ initio_stop_bm(host);
++ initio_reset_scsi(host, 8); /* 7/29/98 */
++ return initio_post_scsi_rst(host);
+ }
+
+-#endif /* 0 */
+
+ /************************************************************************/
+-static void tul_exec_scb(HCS * pCurHcb, SCB * pCurScb)
++static void initio_exec_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+- ULONG flags;
++ unsigned long flags;
+
+- pCurScb->SCB_Mode = 0;
++ scb->mode = 0;
+
+- pCurScb->SCB_SGIdx = 0;
+- pCurScb->SCB_SGMax = pCurScb->SCB_SGLen;
++ scb->sgidx = 0;
++ scb->sgmax = scb->sglen;
+
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++ spin_lock_irqsave(&host->semaph_lock, flags);
+
+- tul_append_pend_scb(pCurHcb, pCurScb); /* Append this SCB to Pending queue */
++ initio_append_pend_scb(host, scb); /* Append this SCB to Pending queue */
+
+ /* VVVVV 07/21/98 */
+- if (pCurHcb->HCS_Semaph == 1) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+- /* disable Jasmin SCSI Int */
+- pCurHcb->HCS_Semaph = 0;
+-
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+- tulip_main(pCurHcb);
++ if (host->semaph == 1) {
++ /* Disable Jasmin SCSI Int */
++ outb(0x1F, host->addr + TUL_Mask);
++ host->semaph = 0;
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+
+- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++ tulip_main(host);
+
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
++ spin_lock_irqsave(&host->semaph_lock, flags);
++ host->semaph = 1;
++ outb(0x0F, host->addr + TUL_Mask);
+ }
+- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++ spin_unlock_irqrestore(&host->semaph_lock, flags);
+ return;
+ }
+
+ /***************************************************************************/
+-static int tul_isr(HCS * pCurHcb)
++static int initio_isr(struct initio_host * host)
+ {
+- /* Enter critical section */
+-
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_Int) & TSS_INT_PENDING) {
+- if (pCurHcb->HCS_Semaph == 1) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++ if (inb(host->addr + TUL_Int) & TSS_INT_PENDING) {
++ if (host->semaph == 1) {
++ outb(0x1F, host->addr + TUL_Mask);
+ /* Disable Tulip SCSI Int */
+- pCurHcb->HCS_Semaph = 0;
++ host->semaph = 0;
+
+- tulip_main(pCurHcb);
++ tulip_main(host);
+
+- pCurHcb->HCS_Semaph = 1;
+- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+- return (1);
++ host->semaph = 1;
++ outb(0x0F, host->addr + TUL_Mask);
++ return 1;
+ }
+ }
+- return (0);
++ return 0;
+ }
+
+-/***************************************************************************/
+-int tulip_main(HCS * pCurHcb)
++static int tulip_main(struct initio_host * host)
+ {
+- SCB *pCurScb;
++ struct scsi_ctrl_blk *scb;
+
+ for (;;) {
++ tulip_scsi(host); /* Call tulip_scsi */
+
+- tulip_scsi(pCurHcb); /* Call tulip_scsi */
+-
+- while ((pCurScb = tul_find_done_scb(pCurHcb)) != NULL) { /* find done entry */
+- if (pCurScb->SCB_TaStat == INI_QUEUE_FULL) {
+- pCurHcb->HCS_MaxTags[pCurScb->SCB_Target] =
+- pCurHcb->HCS_ActTags[pCurScb->SCB_Target] - 1;
+- pCurScb->SCB_TaStat = 0;
+- tul_append_pend_scb(pCurHcb, pCurScb);
++ /* Walk the list of completed SCBs */
++ while ((scb = initio_find_done_scb(host)) != NULL) { /* find done entry */
++ if (scb->tastat == INI_QUEUE_FULL) {
++ host->max_tags[scb->target] =
++ host->act_tags[scb->target] - 1;
++ scb->tastat = 0;
++ initio_append_pend_scb(host, scb);
+ continue;
+ }
+- if (!(pCurScb->SCB_Mode & SCM_RSENS)) { /* not in auto req. sense mode */
+- if (pCurScb->SCB_TaStat == 2) {
++ if (!(scb->mode & SCM_RSENS)) { /* not in auto req. sense mode */
++ if (scb->tastat == 2) {
+
+ /* clr sync. nego flag */
+
+- if (pCurScb->SCB_Flags & SCF_SENSE) {
+- BYTE len;
+- len = pCurScb->SCB_SenseLen;
++ if (scb->flags & SCF_SENSE) {
++ u8 len;
++ len = scb->senselen;
+ if (len == 0)
+ len = 1;
+- pCurScb->SCB_BufLen = pCurScb->SCB_SenseLen;
+- pCurScb->SCB_BufPtr = pCurScb->SCB_SensePtr;
+- pCurScb->SCB_Flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */
+-/* pCurScb->SCB_Flags |= SCF_NO_DCHK; */
+- /* so, we won't report worng direction in xfer_data_in,
++ scb->buflen = scb->senselen;
++ scb->bufptr = scb->senseptr;
++ scb->flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */
++ /* so, we won't report wrong direction in xfer_data_in,
+ and won't report HOST_DO_DU in state_6 */
+- pCurScb->SCB_Mode = SCM_RSENS;
+- pCurScb->SCB_Ident &= 0xBF; /* Disable Disconnect */
+- pCurScb->SCB_TagMsg = 0;
+- pCurScb->SCB_TaStat = 0;
+- pCurScb->SCB_CDBLen = 6;
+- pCurScb->SCB_CDB[0] = SCSICMD_RequestSense;
+- pCurScb->SCB_CDB[1] = 0;
+- pCurScb->SCB_CDB[2] = 0;
+- pCurScb->SCB_CDB[3] = 0;
+- pCurScb->SCB_CDB[4] = len;
+- pCurScb->SCB_CDB[5] = 0;
+- tul_push_pend_scb(pCurHcb, pCurScb);
++ scb->mode = SCM_RSENS;
++ scb->ident &= 0xBF; /* Disable Disconnect */
++ scb->tagmsg = 0;
++ scb->tastat = 0;
++ scb->cdblen = 6;
++ scb->cdb[0] = SCSICMD_RequestSense;
++ scb->cdb[1] = 0;
++ scb->cdb[2] = 0;
++ scb->cdb[3] = 0;
++ scb->cdb[4] = len;
++ scb->cdb[5] = 0;
++ initio_push_pend_scb(host, scb);
+ break;
+ }
+ }
+ } else { /* in request sense mode */
+
+- if (pCurScb->SCB_TaStat == 2) { /* check contition status again after sending
++ if (scb->tastat == 2) { /* check contition status again after sending
+ requset sense cmd 0x3 */
+- pCurScb->SCB_HaStat = HOST_BAD_PHAS;
++ scb->hastat = HOST_BAD_PHAS;
+ }
+- pCurScb->SCB_TaStat = 2;
++ scb->tastat = 2;
+ }
+- pCurScb->SCB_Flags |= SCF_DONE;
+- if (pCurScb->SCB_Flags & SCF_POST) {
+- (*pCurScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pCurScb);
++ scb->flags |= SCF_DONE;
++ if (scb->flags & SCF_POST) {
++ /* FIXME: only one post method and lose casts */
++ (*scb->post) ((u8 *) host, (u8 *) scb);
+ }
+ } /* while */
+-
+ /* find_active: */
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0) & TSS_INT_PENDING)
++ if (inb(host->addr + TUL_SStatus0) & TSS_INT_PENDING)
+ continue;
+-
+- if (pCurHcb->HCS_ActScb) { /* return to OS and wait for xfer_done_ISR/Selected_ISR */
++ if (host->active) /* return to OS and wait for xfer_done_ISR/Selected_ISR */
+ return 1; /* return to OS, enable interrupt */
+- }
+ /* Check pending SCB */
+- if (tul_find_first_pend_scb(pCurHcb) == NULL) {
++ if (initio_find_first_pend_scb(host) == NULL)
+ return 1; /* return to OS, enable interrupt */
+- }
+ } /* End of for loop */
+ /* statement won't reach here */
+ }
+
+-
+-
+-
+-/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+-/***************************************************************************/
+-/***************************************************************************/
+-/***************************************************************************/
+-/***************************************************************************/
+-
+-/***************************************************************************/
+-void tulip_scsi(HCS * pCurHcb)
++static void tulip_scsi(struct initio_host * host)
+ {
+- SCB *pCurScb;
+- TCS *pCurTcb;
++ struct scsi_ctrl_blk *scb;
++ struct target_control *active_tc;
+
+ /* make sure to service interrupt asap */
+-
+- if ((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) & TSS_INT_PENDING) {
+-
+- pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK;
+- pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1);
+- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
+- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* SCSI bus reset detected */
+- int_tul_scsi_rst(pCurHcb);
++ if ((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING) {
++ host->phase = host->jsstatus0 & TSS_PH_MASK;
++ host->jsstatus1 = inb(host->addr + TUL_SStatus1);
++ host->jsint = inb(host->addr + TUL_SInt);
++ if (host->jsint & TSS_SCSIRST_INT) { /* SCSI bus reset detected */
++ int_initio_scsi_rst(host);
+ return;
+ }
+- if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if selected/reselected interrupt */
+- if (int_tul_resel(pCurHcb) == 0)
+- tul_next_state(pCurHcb);
++ if (host->jsint & TSS_RESEL_INT) { /* if selected/reselected interrupt */
++ if (int_initio_resel(host) == 0)
++ initio_next_state(host);
+ return;
+ }
+- if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) {
+- int_tul_busfree(pCurHcb);
++ if (host->jsint & TSS_SEL_TIMEOUT) {
++ int_initio_busfree(host);
+ return;
+ }
+- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */
+- int_tul_busfree(pCurHcb); /* unexpected bus free or sel timeout */
++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */
++ int_initio_busfree(host); /* unexpected bus free or sel timeout */
+ return;
+ }
+- if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */
+- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL)
+- tul_next_state(pCurHcb);
++ if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */
++ if ((scb = host->active) != NULL)
++ initio_next_state(host);
+ return;
+ }
+ }
+- if (pCurHcb->HCS_ActScb != NULL)
++ if (host->active != NULL)
+ return;
+
+- if ((pCurScb = tul_find_first_pend_scb(pCurHcb)) == NULL)
++ if ((scb = initio_find_first_pend_scb(host)) == NULL)
+ return;
+
+ /* program HBA's SCSI ID & target SCSI ID */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId,
+- (pCurHcb->HCS_SCSI_ID << 4) | (pCurScb->SCB_Target & 0x0F));
+- if (pCurScb->SCB_Opcode == ExecSCSI) {
+- pCurTcb = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
++ outb((host->scsi_id << 4) | (scb->target & 0x0F),
++ host->addr + TUL_SScsiId);
++ if (scb->opcode == ExecSCSI) {
++ active_tc = &host->targets[scb->target];
+
+- if (pCurScb->SCB_TagMsg)
+- pCurTcb->TCS_DrvFlags |= TCF_DRV_EN_TAG;
++ if (scb->tagmsg)
++ active_tc->drv_flags |= TCF_DRV_EN_TAG;
+ else
+- pCurTcb->TCS_DrvFlags &= ~TCF_DRV_EN_TAG;
++ active_tc->drv_flags &= ~TCF_DRV_EN_TAG;
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period);
+- if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */
+- tul_select_atn_stop(pCurHcb, pCurScb);
++ outb(active_tc->js_period, host->addr + TUL_SPeriod);
++ if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */
++ initio_select_atn_stop(host, scb);
+ } else {
+- if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */
+- tul_select_atn_stop(pCurHcb, pCurScb);
++ if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */
++ initio_select_atn_stop(host, scb);
+ } else {
+- if (pCurScb->SCB_TagMsg)
+- tul_select_atn3(pCurHcb, pCurScb);
++ if (scb->tagmsg)
++ initio_select_atn3(host, scb);
+ else
+- tul_select_atn(pCurHcb, pCurScb);
++ initio_select_atn(host, scb);
+ }
+ }
+- if (pCurScb->SCB_Flags & SCF_POLL) {
+- while (wait_tulip(pCurHcb) != -1) {
+- if (tul_next_state(pCurHcb) == -1)
++ if (scb->flags & SCF_POLL) {
++ while (wait_tulip(host) != -1) {
++ if (initio_next_state(host) == -1)
+ break;
+ }
+ }
+- } else if (pCurScb->SCB_Opcode == BusDevRst) {
+- tul_select_atn_stop(pCurHcb, pCurScb);
+- pCurScb->SCB_NxtStat = 8;
+- if (pCurScb->SCB_Flags & SCF_POLL) {
+- while (wait_tulip(pCurHcb) != -1) {
+- if (tul_next_state(pCurHcb) == -1)
++ } else if (scb->opcode == BusDevRst) {
++ initio_select_atn_stop(host, scb);
++ scb->next_state = 8;
++ if (scb->flags & SCF_POLL) {
++ while (wait_tulip(host) != -1) {
++ if (initio_next_state(host) == -1)
+ break;
+ }
+ }
+- } else if (pCurScb->SCB_Opcode == AbortCmd) {
+- if (tul_abort_srb(pCurHcb, pCurScb->SCB_Srb) != 0) {
+-
+-
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+-
+- tul_release_scb(pCurHcb, pCurScb);
++ } else if (scb->opcode == AbortCmd) {
++ if (initio_abort_srb(host, scb->srb) != 0) {
++ initio_unlink_pend_scb(host, scb);
++ initio_release_scb(host, scb);
+ } else {
+- pCurScb->SCB_Opcode = BusDevRst;
+- tul_select_atn_stop(pCurHcb, pCurScb);
+- pCurScb->SCB_NxtStat = 8;
++ scb->opcode = BusDevRst;
++ initio_select_atn_stop(host, scb);
++ scb->next_state = 8;
+ }
+-
+-/* 08/03/98 */
+ } else {
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+- pCurScb->SCB_HaStat = 0x16; /* bad command */
+- tul_append_done_scb(pCurHcb, pCurScb);
++ initio_unlink_pend_scb(host, scb);
++ scb->hastat = 0x16; /* bad command */
++ initio_append_done_scb(host, scb);
+ }
+ return;
+ }
+
++/**
++ * initio_next_state - Next SCSI state
++ * @host: InitIO host we are processing
++ *
++ * Progress the active command block along the state machine
++ * until we hit a state which we must wait for activity to occur.
++ *
++ * Returns zero or a negative code.
++ */
+
+-/***************************************************************************/
+-int tul_next_state(HCS * pCurHcb)
++static int initio_next_state(struct initio_host * host)
+ {
+ int next;
+
+- next = pCurHcb->HCS_ActScb->SCB_NxtStat;
++ next = host->active->next_state;
+ for (;;) {
+ switch (next) {
+ case 1:
+- next = tul_state_1(pCurHcb);
++ next = initio_state_1(host);
+ break;
+ case 2:
+- next = tul_state_2(pCurHcb);
++ next = initio_state_2(host);
+ break;
+ case 3:
+- next = tul_state_3(pCurHcb);
++ next = initio_state_3(host);
+ break;
+ case 4:
+- next = tul_state_4(pCurHcb);
++ next = initio_state_4(host);
+ break;
+ case 5:
+- next = tul_state_5(pCurHcb);
++ next = initio_state_5(host);
+ break;
+ case 6:
+- next = tul_state_6(pCurHcb);
++ next = initio_state_6(host);
+ break;
+ case 7:
+- next = tul_state_7(pCurHcb);
++ next = initio_state_7(host);
+ break;
+ case 8:
+- return (tul_bus_device_reset(pCurHcb));
++ return initio_bus_device_reset(host);
+ default:
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+ if (next <= 0)
+ return next;
+@@ -1554,338 +1314,363 @@
+ }
+
+
+-/***************************************************************************/
+-/* sTate after selection with attention & stop */
+-int tul_state_1(HCS * pCurHcb)
++/**
++ * initio_state_1 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * Perform SCSI state processing for Select/Attention/Stop
++ */
++
++static int initio_state_1(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
+ #if DEBUG_STATE
+ printk("-s1-");
+ #endif
+
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+- tul_append_busy_scb(pCurHcb, pCurScb);
++ /* Move the SCB from pending to busy */
++ initio_unlink_pend_scb(host, scb);
++ initio_append_busy_scb(host, scb);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
++ outb(active_tc->sconfig0, host->addr + TUL_SConfig );
+ /* ATN on */
+- if (pCurHcb->HCS_Phase == MSG_OUT) {
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, (TSC_EN_BUS_IN | TSC_HW_RESELECT));
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+-
+- if (pCurScb->SCB_TagMsg) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId);
+- }
+- if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {
+-
+- pCurTcb->TCS_Flags |= TCF_WDTR_DONE;
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2); /* Extended msg length */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* Sync request */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* Start from 16 bits */
+- } else if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {
+-
+- pCurTcb->TCS_Flags |= TCF_SYNC_DONE;
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* extended msg length */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */
++ if (host->phase == MSG_OUT) {
++ outb(TSC_EN_BUS_IN | TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
++ outb(scb->ident, host->addr + TUL_SFifo);
++
++ if (scb->tagmsg) {
++ outb(scb->tagmsg, host->addr + TUL_SFifo);
++ outb(scb->tagid, host->addr + TUL_SFifo);
++ }
++ if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {
++ active_tc->flags |= TCF_WDTR_DONE;
++ outb(MSG_EXTEND, host->addr + TUL_SFifo);
++ outb(2, host->addr + TUL_SFifo); /* Extended msg length */
++ outb(3, host->addr + TUL_SFifo); /* Sync request */
++ outb(1, host->addr + TUL_SFifo); /* Start from 16 bits */
++ } else if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {
++ active_tc->flags |= TCF_SYNC_DONE;
++ outb(MSG_EXTEND, host->addr + TUL_SFifo);
++ outb(3, host->addr + TUL_SFifo); /* extended msg length */
++ outb(1, host->addr + TUL_SFifo); /* sync request */
++ outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo);
++ outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */
+ }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+ }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+- return (3);
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal);
++ /* Into before CDB xfer */
++ return 3;
+ }
+
+
+-/***************************************************************************/
+-/* state after selection with attention */
+-/* state after selection with attention3 */
+-int tul_state_2(HCS * pCurHcb)
++/**
++ * initio_state_2 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * state after selection with attention
++ * state after selection with attention3
++ */
++
++static int initio_state_2(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
+ #if DEBUG_STATE
+ printk("-s2-");
+ #endif
+
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+- tul_append_busy_scb(pCurHcb, pCurScb);
++ initio_unlink_pend_scb(host, scb);
++ initio_append_busy_scb(host, scb);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
++ outb(active_tc->sconfig0, host->addr + TUL_SConfig);
+
+- if (pCurHcb->HCS_JSStatus1 & TSS_CMD_PH_CMP) {
+- return (4);
+- }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+- return (3);
++ if (host->jsstatus1 & TSS_CMD_PH_CMP)
++ return 4;
++
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal);
++ /* Into before CDB xfer */
++ return 3;
+ }
+
+-/***************************************************************************/
+-/* state before CDB xfer is done */
+-int tul_state_3(HCS * pCurHcb)
++/**
++ * initio_state_3 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * state before CDB xfer is done
++ */
++
++static int initio_state_3(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
+ int i;
+
+ #if DEBUG_STATE
+ printk("-s3-");
+ #endif
+ for (;;) {
+- switch (pCurHcb->HCS_Phase) {
++ switch (host->phase) {
+ case CMD_OUT: /* Command out phase */
+- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
+- if (pCurHcb->HCS_Phase == CMD_OUT) {
+- return (tul_bad_seq(pCurHcb));
+- }
+- return (4);
++ for (i = 0; i < (int) scb->cdblen; i++)
++ outb(scb->cdb[i], host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ if (host->phase == CMD_OUT)
++ return initio_bad_seq(host);
++ return 4;
+
+ case MSG_IN: /* Message in phase */
+- pCurScb->SCB_NxtStat = 3;
+- if (tul_msgin(pCurHcb) == -1)
+- return (-1);
++ scb->next_state = 3;
++ if (initio_msgin(host) == -1)
++ return -1;
+ break;
+
+ case STATUS_IN: /* Status phase */
+- if (tul_status_msg(pCurHcb) == -1)
+- return (-1);
++ if (initio_status_msg(host) == -1)
++ return -1;
+ break;
+
+ case MSG_OUT: /* Message out phase */
+- if (pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) {
++ if (active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) {
++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ } else {
++ active_tc->flags |= TCF_SYNC_DONE;
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
+-
+- } else {
+- pCurTcb->TCS_Flags |= TCF_SYNC_DONE;
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* ext. msg len */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7));
++ outb(MSG_EXTEND, host->addr + TUL_SFifo);
++ outb(3, host->addr + TUL_SFifo); /* ext. msg len */
++ outb(1, host->addr + TUL_SFifo); /* sync request */
++ outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo);
++ outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ outb(inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7), host->addr + TUL_SSignal);
+
+ }
+ break;
+-
+ default:
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+ }
+ }
+
+-
+-/***************************************************************************/
+-int tul_state_4(HCS * pCurHcb)
++/**
++ * initio_state_4 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * SCSI state machine. State 4
++ */
++
++static int initio_state_4(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+
+ #if DEBUG_STATE
+ printk("-s4-");
+ #endif
+- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_NO_XF) {
+- return (6); /* Go to state 6 */
++ if ((scb->flags & SCF_DIR) == SCF_NO_XF) {
++ return 6; /* Go to state 6 (After data) */
+ }
+ for (;;) {
+- if (pCurScb->SCB_BufLen == 0)
+- return (6); /* Go to state 6 */
++ if (scb->buflen == 0)
++ return 6;
+
+- switch (pCurHcb->HCS_Phase) {
++ switch (host->phase) {
+
+ case STATUS_IN: /* Status phase */
+- if ((pCurScb->SCB_Flags & SCF_DIR) != 0) { /* if direction bit set then report data underrun */
+- pCurScb->SCB_HaStat = HOST_DO_DU;
+- }
+- if ((tul_status_msg(pCurHcb)) == -1)
+- return (-1);
++ if ((scb->flags & SCF_DIR) != 0) /* if direction bit set then report data underrun */
++ scb->hastat = HOST_DO_DU;
++ if ((initio_status_msg(host)) == -1)
++ return -1;
+ break;
+
+ case MSG_IN: /* Message in phase */
+- pCurScb->SCB_NxtStat = 0x4;
+- if (tul_msgin(pCurHcb) == -1)
+- return (-1);
++ scb->next_state = 0x4;
++ if (initio_msgin(host) == -1)
++ return -1;
+ break;
+
+ case MSG_OUT: /* Message out phase */
+- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+- pCurScb->SCB_BufLen = 0;
+- pCurScb->SCB_HaStat = HOST_DO_DU;
+- if (tul_msgout_ide(pCurHcb) == -1)
+- return (-1);
+- return (6); /* Go to state 6 */
+- } else {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
++ if (host->jsstatus0 & TSS_PAR_ERROR) {
++ scb->buflen = 0;
++ scb->hastat = HOST_DO_DU;
++ if (initio_msgout_ide(host) == -1)
++ return -1;
++ return 6;
++ } else {
++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+ }
+ break;
+
+ case DATA_IN: /* Data in phase */
+- return (tul_xfer_data_in(pCurHcb));
++ return initio_xfer_data_in(host);
+
+ case DATA_OUT: /* Data out phase */
+- return (tul_xfer_data_out(pCurHcb));
++ return initio_xfer_data_out(host);
+
+ default:
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+ }
+ }
+
+
+-/***************************************************************************/
+-/* state after dma xfer done or phase change before xfer done */
+-int tul_state_5(HCS * pCurHcb)
++/**
++ * initio_state_5 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * State after dma xfer done or phase change before xfer done
++ */
++
++static int initio_state_5(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+ long cnt, xcnt; /* cannot use unsigned !! code: if (xcnt < 0) */
+
+ #if DEBUG_STATE
+ printk("-s5-");
+ #endif
+-/*------ get remaining count -------*/
++ /*------ get remaining count -------*/
++ cnt = inl(host->addr + TUL_SCnt0) & 0x0FFFFFF;
+
+- cnt = TUL_RDLONG(pCurHcb->HCS_Base, TUL_SCnt0) & 0x0FFFFFF;
+-
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_XCmd) & 0x20) {
++ if (inb(host->addr + TUL_XCmd) & 0x20) {
+ /* ----------------------- DATA_IN ----------------------------- */
+ /* check scsi parity error */
+- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+- pCurScb->SCB_HaStat = HOST_DO_DU;
+- }
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */
++ if (host->jsstatus0 & TSS_PAR_ERROR)
++ scb->hastat = HOST_DO_DU;
++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */
+ /* tell Hardware scsi xfer has been terminated */
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, TUL_RD(pCurHcb->HCS_Base, TUL_XCtrl) | 0x80);
++ outb(inb(host->addr + TUL_XCtrl) | 0x80, host->addr + TUL_XCtrl);
+ /* wait until DMA xfer not pending */
+- while (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND);
++ while (inb(host->addr + TUL_XStatus) & XPEND)
++ cpu_relax();
+ }
+ } else {
+-/*-------- DATA OUT -----------*/
+- if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) {
+- if (pCurHcb->HCS_ActTcs->TCS_JS_Period & TSC_WIDE_SCSI)
+- cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F) << 1;
++ /*-------- DATA OUT -----------*/
++ if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) {
++ if (host->active_tc->js_period & TSC_WIDE_SCSI)
++ cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F) << 1;
+ else
+- cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F);
++ cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F);
+ }
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT);
++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */
++ outb(TAX_X_ABT, host->addr + TUL_XCmd);
+ /* wait Abort DMA xfer done */
+- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0);
+- }
+- if ((cnt == 1) && (pCurHcb->HCS_Phase == DATA_OUT)) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1) {
+- return (-1);
++ while ((inb(host->addr + TUL_Int) & XABT) == 0)
++ cpu_relax();
+ }
++ if ((cnt == 1) && (host->phase == DATA_OUT)) {
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+ cnt = 0;
+ } else {
+- if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0)
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++ if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0)
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ }
+ }
+-
+ if (cnt == 0) {
+- pCurScb->SCB_BufLen = 0;
+- return (6); /* Go to state 6 */
++ scb->buflen = 0;
++ return 6; /* After Data */
+ }
+ /* Update active data pointer */
+- xcnt = (long) pCurScb->SCB_BufLen - cnt; /* xcnt== bytes already xferred */
+- pCurScb->SCB_BufLen = (U32) cnt; /* cnt == bytes left to be xferred */
+- if (pCurScb->SCB_Flags & SCF_SG) {
+- register SG *sgp;
+- ULONG i;
+-
+- sgp = &pCurScb->SCB_SGList[pCurScb->SCB_SGIdx];
+- for (i = pCurScb->SCB_SGIdx; i < pCurScb->SCB_SGMax; sgp++, i++) {
+- xcnt -= (long) sgp->SG_Len;
++ xcnt = (long) scb->buflen - cnt; /* xcnt== bytes already xferred */
++ scb->buflen = (u32) cnt; /* cnt == bytes left to be xferred */
++ if (scb->flags & SCF_SG) {
++ struct sg_entry *sgp;
++ unsigned long i;
++
++ sgp = &scb->sglist[scb->sgidx];
++ for (i = scb->sgidx; i < scb->sgmax; sgp++, i++) {
++ xcnt -= (long) sgp->len;
+ if (xcnt < 0) { /* this sgp xfer half done */
+- xcnt += (long) sgp->SG_Len; /* xcnt == bytes xferred in this sgp */
+- sgp->SG_Ptr += (U32) xcnt; /* new ptr to be xfer */
+- sgp->SG_Len -= (U32) xcnt; /* new len to be xfer */
+- pCurScb->SCB_BufPtr += ((U32) (i - pCurScb->SCB_SGIdx) << 3);
++ xcnt += (long) sgp->len; /* xcnt == bytes xferred in this sgp */
++ sgp->data += (u32) xcnt; /* new ptr to be xfer */
++ sgp->len -= (u32) xcnt; /* new len to be xfer */
++ scb->bufptr += ((u32) (i - scb->sgidx) << 3);
+ /* new SG table ptr */
+- pCurScb->SCB_SGLen = (BYTE) (pCurScb->SCB_SGMax - i);
++ scb->sglen = (u8) (scb->sgmax - i);
+ /* new SG table len */
+- pCurScb->SCB_SGIdx = (WORD) i;
++ scb->sgidx = (u16) i;
+ /* for next disc and come in this loop */
+- return (4); /* Go to state 4 */
++ return 4; /* Go to state 4 */
+ }
+ /* else (xcnt >= 0 , i.e. this sgp already xferred */
+ } /* for */
+- return (6); /* Go to state 6 */
++ return 6; /* Go to state 6 */
+ } else {
+- pCurScb->SCB_BufPtr += (U32) xcnt;
++ scb->bufptr += (u32) xcnt;
+ }
+- return (4); /* Go to state 4 */
++ return 4; /* Go to state 4 */
+ }
+
+-/***************************************************************************/
+-/* state after Data phase */
+-int tul_state_6(HCS * pCurHcb)
++/**
++ * initio_state_6 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ * State after Data phase
++ */
++
++static int initio_state_6(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+
+ #if DEBUG_STATE
+ printk("-s6-");
+ #endif
+ for (;;) {
+- switch (pCurHcb->HCS_Phase) {
++ switch (host->phase) {
+ case STATUS_IN: /* Status phase */
+- if ((tul_status_msg(pCurHcb)) == -1)
+- return (-1);
++ if ((initio_status_msg(host)) == -1)
++ return -1;
+ break;
+
+ case MSG_IN: /* Message in phase */
+- pCurScb->SCB_NxtStat = 6;
+- if ((tul_msgin(pCurHcb)) == -1)
+- return (-1);
++ scb->next_state = 6;
++ if ((initio_msgin(host)) == -1)
++ return -1;
+ break;
+
+ case MSG_OUT: /* Message out phase */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+ break;
+
+ case DATA_IN: /* Data in phase */
+- return (tul_xpad_in(pCurHcb));
++ return initio_xpad_in(host);
+
+ case DATA_OUT: /* Data out phase */
+- return (tul_xpad_out(pCurHcb));
++ return initio_xpad_out(host);
+
+ default:
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+ }
+ }
+
+-/***************************************************************************/
+-int tul_state_7(HCS * pCurHcb)
++/**
++ * initio_state_7 - SCSI state machine
++ * @host: InitIO host we are controlling
++ *
++ */
++
++int initio_state_7(struct initio_host * host)
+ {
+ int cnt, i;
+
+@@ -1893,1139 +1678,1029 @@
+ printk("-s7-");
+ #endif
+ /* flush SCSI FIFO */
+- cnt = TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F;
++ cnt = inb(host->addr + TUL_SFifoCnt) & 0x1F;
+ if (cnt) {
+ for (i = 0; i < cnt; i++)
+- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++ inb(host->addr + TUL_SFifo);
+ }
+- switch (pCurHcb->HCS_Phase) {
++ switch (host->phase) {
+ case DATA_IN: /* Data in phase */
+ case DATA_OUT: /* Data out phase */
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ default:
+- return (6); /* Go to state 6 */
++ return 6; /* Go to state 6 */
+ }
+ }
+
+-/***************************************************************************/
+-int tul_xfer_data_in(HCS * pCurHcb)
++/**
++ * initio_xfer_data_in - Commence data input
++ * @host: InitIO host in use
++ *
++ * Commence a block of data transfer. The transfer itself will
++ * be managed by the controller and we will get a completion (or
++ * failure) interrupt.
++ */
++static int initio_xfer_data_in(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+
+- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DOUT) {
+- return (6); /* wrong direction */
+- }
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen);
++ if ((scb->flags & SCF_DIR) == SCF_DOUT)
++ return 6; /* wrong direction */
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_IN); /* 7/25/95 */
++ outl(scb->buflen, host->addr + TUL_SCnt0);
++ outb(TSC_XF_DMA_IN, host->addr + TUL_SCmd); /* 7/25/95 */
+
+- if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3);
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_IN);
++ if (scb->flags & SCF_SG) { /* S/G xfer */
++ outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH);
++ outl(scb->bufptr, host->addr + TUL_XAddH);
++ outb(TAX_SG_IN, host->addr + TUL_XCmd);
+ } else {
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen);
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_IN);
++ outl(scb->buflen, host->addr + TUL_XCntH);
++ outl(scb->bufptr, host->addr + TUL_XAddH);
++ outb(TAX_X_IN, host->addr + TUL_XCmd);
+ }
+- pCurScb->SCB_NxtStat = 0x5;
+- return (0); /* return to OS, wait xfer done , let jas_isr come in */
++ scb->next_state = 0x5;
++ return 0; /* return to OS, wait xfer done , let jas_isr come in */
+ }
+
++/**
++ * initio_xfer_data_out - Commence data output
++ * @host: InitIO host in use
++ *
++ * Commence a block of data transfer. The transfer itself will
++ * be managed by the controller and we will get a completion (or
++ * failure) interrupt.
++ */
+
+-/***************************************************************************/
+-int tul_xfer_data_out(HCS * pCurHcb)
++static int initio_xfer_data_out(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+
+- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DIN) {
+- return (6); /* wrong direction */
+- }
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_OUT);
++ if ((scb->flags & SCF_DIR) == SCF_DIN)
++ return 6; /* wrong direction */
+
+- if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3);
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_OUT);
++ outl(scb->buflen, host->addr + TUL_SCnt0);
++ outb(TSC_XF_DMA_OUT, host->addr + TUL_SCmd);
++
++ if (scb->flags & SCF_SG) { /* S/G xfer */
++ outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH);
++ outl(scb->bufptr, host->addr + TUL_XAddH);
++ outb(TAX_SG_OUT, host->addr + TUL_XCmd);
+ } else {
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen);
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_OUT);
++ outl(scb->buflen, host->addr + TUL_XCntH);
++ outl(scb->bufptr, host->addr + TUL_XAddH);
++ outb(TAX_X_OUT, host->addr + TUL_XCmd);
+ }
+
+- pCurScb->SCB_NxtStat = 0x5;
+- return (0); /* return to OS, wait xfer done , let jas_isr come in */
++ scb->next_state = 0x5;
++ return 0; /* return to OS, wait xfer done , let jas_isr come in */
+ }
+
+-
+-/***************************************************************************/
+-int tul_xpad_in(HCS * pCurHcb)
++int initio_xpad_in(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
+
+- if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) {
+- pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */
+- }
++ if ((scb->flags & SCF_DIR) != SCF_NO_DCHK)
++ scb->hastat = HOST_DO_DU; /* over run */
+ for (;;) {
+- if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI)
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2);
++ if (active_tc->js_period & TSC_WIDE_SCSI)
++ outl(2, host->addr + TUL_SCnt0);
+ else
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
++ outl(1, host->addr + TUL_SCnt0);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if ((wait_tulip(pCurHcb)) == -1) {
+- return (-1);
+- }
+- if (pCurHcb->HCS_Phase != DATA_IN) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- return (6);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ if (host->phase != DATA_IN) {
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ return 6;
+ }
+- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++ inb(host->addr + TUL_SFifo);
+ }
+ }
+
+-int tul_xpad_out(HCS * pCurHcb)
++int initio_xpad_out(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
+
+- if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) {
+- pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */
+- }
++ if ((scb->flags & SCF_DIR) != SCF_NO_DCHK)
++ scb->hastat = HOST_DO_DU; /* over run */
+ for (;;) {
+- if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI)
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2);
++ if (active_tc->js_period & TSC_WIDE_SCSI)
++ outl(2, host->addr + TUL_SCnt0);
+ else
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
++ outl(1, host->addr + TUL_SCnt0);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- if ((wait_tulip(pCurHcb)) == -1) {
+- return (-1);
+- }
+- if (pCurHcb->HCS_Phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- return (6);
++ outb(0, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ if ((wait_tulip(host)) == -1)
++ return -1;
++ if (host->phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ return 6;
+ }
+ }
+ }
+
+-
+-/***************************************************************************/
+-int tul_status_msg(HCS * pCurHcb)
++int initio_status_msg(struct initio_host * host)
+ { /* status & MSG_IN */
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- BYTE msg;
++ struct scsi_ctrl_blk *scb = host->active;
++ u8 msg;
++
++ outb(TSC_CMD_COMP, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_CMD_COMP);
+- if ((wait_tulip(pCurHcb)) == -1) {
+- return (-1);
+- }
+ /* get status */
+- pCurScb->SCB_TaStat = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++ scb->tastat = inb(host->addr + TUL_SFifo);
+
+- if (pCurHcb->HCS_Phase == MSG_OUT) {
+- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY);
+- } else {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP);
+- }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
+- }
+- if (pCurHcb->HCS_Phase == MSG_IN) {
+- msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { /* Parity error */
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
+- if (pCurHcb->HCS_Phase != MSG_OUT)
+- return (tul_bad_seq(pCurHcb));
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
++ if (host->phase == MSG_OUT) {
++ if (host->jsstatus0 & TSS_PAR_ERROR)
++ outb(MSG_PARITY, host->addr + TUL_SFifo);
++ else
++ outb(MSG_NOP, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
++ }
++ if (host->phase == MSG_IN) {
++ msg = inb(host->addr + TUL_SFifo);
++ if (host->jsstatus0 & TSS_PAR_ERROR) { /* Parity error */
++ if ((initio_msgin_accept(host)) == -1)
++ return -1;
++ if (host->phase != MSG_OUT)
++ return initio_bad_seq(host);
++ outb(MSG_PARITY, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+ if (msg == 0) { /* Command complete */
+
+- if ((pCurScb->SCB_TaStat & 0x18) == 0x10) { /* No link support */
+- return (tul_bad_seq(pCurHcb));
+- }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+- return tul_wait_done_disc(pCurHcb);
++ if ((scb->tastat & 0x18) == 0x10) /* No link support */
++ return initio_bad_seq(host);
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++ return initio_wait_done_disc(host);
+
+ }
+- if ((msg == MSG_LINK_COMP) || (msg == MSG_LINK_FLAG)) {
+- if ((pCurScb->SCB_TaStat & 0x18) == 0x10)
+- return (tul_msgin_accept(pCurHcb));
++ if (msg == MSG_LINK_COMP || msg == MSG_LINK_FLAG) {
++ if ((scb->tastat & 0x18) == 0x10)
++ return initio_msgin_accept(host);
+ }
+ }
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+
+
+-/***************************************************************************/
+ /* scsi bus free */
+-int int_tul_busfree(HCS * pCurHcb)
++int int_initio_busfree(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
++ struct scsi_ctrl_blk *scb = host->active;
+
+- if (pCurScb != NULL) {
+- if (pCurScb->SCB_Status & SCB_SELECT) { /* selection timeout */
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+- pCurScb->SCB_HaStat = HOST_SEL_TOUT;
+- tul_append_done_scb(pCurHcb, pCurScb);
++ if (scb != NULL) {
++ if (scb->status & SCB_SELECT) { /* selection timeout */
++ initio_unlink_pend_scb(host, scb);
++ scb->hastat = HOST_SEL_TOUT;
++ initio_append_done_scb(host, scb);
+ } else { /* Unexpected bus free */
+- tul_unlink_busy_scb(pCurHcb, pCurScb);
+- pCurScb->SCB_HaStat = HOST_BUS_FREE;
+- tul_append_done_scb(pCurHcb, pCurScb);
+- }
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
+- }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */
+- return (-1);
++ initio_unlink_busy_scb(host, scb);
++ scb->hastat = HOST_BUS_FREE;
++ initio_append_done_scb(host, scb);
++ }
++ host->active = NULL;
++ host->active_tc = NULL;
++ }
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */
++ return -1;
+ }
+
+
+-/***************************************************************************/
+-/* scsi bus reset */
+-static int int_tul_scsi_rst(HCS * pCurHcb)
++/**
++ * int_initio_scsi_rst - SCSI reset occurred
++ * @host: Host seeing the reset
++ *
++ * A SCSI bus reset has occurred. Clean up any pending transfer
++ * the hardware is doing by DMA and then abort all active and
++ * disconnected commands. The mid layer should sort the rest out
++ * for us
++ */
++
++static int int_initio_scsi_rst(struct initio_host * host)
+ {
+- SCB *pCurScb;
++ struct scsi_ctrl_blk *scb;
+ int i;
+
+ /* if DMA xfer is pending, abort DMA xfer */
+- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & 0x01) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO);
++ if (inb(host->addr + TUL_XStatus) & 0x01) {
++ outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd);
+ /* wait Abort DMA xfer done */
+- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & 0x04) == 0);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++ while ((inb(host->addr + TUL_Int) & 0x04) == 0)
++ cpu_relax();
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ }
+ /* Abort all active & disconnected scb */
+- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+- pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+- tul_append_done_scb(pCurHcb, pCurScb);
++ while ((scb = initio_pop_busy_scb(host)) != NULL) {
++ scb->hastat = HOST_BAD_PHAS;
++ initio_append_done_scb(host, scb);
+ }
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
++ host->active = NULL;
++ host->active_tc = NULL;
+
+ /* clr sync nego. done flag */
+- for (i = 0; i < pCurHcb->HCS_MaxTar; i++) {
+- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+- }
+- return (-1);
++ for (i = 0; i < host->max_tar; i++)
++ host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++ return -1;
+ }
+
++/**
++ * int_initio_scsi_resel - Reselection occured
++ * @host: InitIO host adapter
++ *
++ * A SCSI reselection event has been signalled and the interrupt
++ * is now being processed. Work out which command block needs attention
++ * and continue processing that command.
++ */
+
+-/***************************************************************************/
+-/* scsi reselection */
+-int int_tul_resel(HCS * pCurHcb)
++int int_initio_resel(struct initio_host * host)
+ {
+- SCB *pCurScb;
+- TCS *pCurTcb;
+- BYTE tag, msg = 0;
+- BYTE tar, lun;
+-
+- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) {
+- if (pCurScb->SCB_Status & SCB_SELECT) { /* if waiting for selection complete */
+- pCurScb->SCB_Status &= ~SCB_SELECT;
+- }
+- pCurHcb->HCS_ActScb = NULL;
++ struct scsi_ctrl_blk *scb;
++ struct target_control *active_tc;
++ u8 tag, msg = 0;
++ u8 tar, lun;
++
++ if ((scb = host->active) != NULL) {
++ /* FIXME: Why check and not just clear ? */
++ if (scb->status & SCB_SELECT) /* if waiting for selection complete */
++ scb->status &= ~SCB_SELECT;
++ host->active = NULL;
+ }
+ /* --------- get target id---------------------- */
+- tar = TUL_RD(pCurHcb->HCS_Base, TUL_SBusId);
++ tar = inb(host->addr + TUL_SBusId);
+ /* ------ get LUN from Identify message----------- */
+- lun = TUL_RD(pCurHcb->HCS_Base, TUL_SIdent) & 0x0F;
++ lun = inb(host->addr + TUL_SIdent) & 0x0F;
+ /* 07/22/98 from 0x1F -> 0x0F */
+- pCurTcb = &pCurHcb->HCS_Tcs[tar];
+- pCurHcb->HCS_ActTcs = pCurTcb;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period);
+-
++ active_tc = &host->targets[tar];
++ host->active_tc = active_tc;
++ outb(active_tc->sconfig0, host->addr + TUL_SConfig);
++ outb(active_tc->js_period, host->addr + TUL_SPeriod);
+
+ /* ------------- tag queueing ? ------------------- */
+- if (pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG) {
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
+- if (pCurHcb->HCS_Phase != MSG_IN)
++ if (active_tc->drv_flags & TCF_DRV_EN_TAG) {
++ if ((initio_msgin_accept(host)) == -1)
++ return -1;
++ if (host->phase != MSG_IN)
+ goto no_tag;
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if ((wait_tulip(pCurHcb)) == -1)
+- return (-1);
+- msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag Message */
++ outl(1, host->addr + TUL_SCnt0);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ msg = inb(host->addr + TUL_SFifo); /* Read Tag Message */
+
+- if ((msg < MSG_STAG) || (msg > MSG_OTAG)) /* Is simple Tag */
++ if (msg < MSG_STAG || msg > MSG_OTAG) /* Is simple Tag */
+ goto no_tag;
+
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
++ if (initio_msgin_accept(host) == -1)
++ return -1;
+
+- if (pCurHcb->HCS_Phase != MSG_IN)
++ if (host->phase != MSG_IN)
+ goto no_tag;
+
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if ((wait_tulip(pCurHcb)) == -1)
+- return (-1);
+- tag = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag ID */
+- pCurScb = pCurHcb->HCS_Scb + tag;
+- if ((pCurScb->SCB_Target != tar) || (pCurScb->SCB_Lun != lun)) {
+- return tul_msgout_abort_tag(pCurHcb);
+- }
+- if (pCurScb->SCB_Status != SCB_BUSY) { /* 03/24/95 */
+- return tul_msgout_abort_tag(pCurHcb);
+- }
+- pCurHcb->HCS_ActScb = pCurScb;
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
++ outl(1, host->addr + TUL_SCnt0);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ tag = inb(host->addr + TUL_SFifo); /* Read Tag ID */
++ scb = host->scb + tag;
++ if (scb->target != tar || scb->lun != lun) {
++ return initio_msgout_abort_tag(host);
++ }
++ if (scb->status != SCB_BUSY) { /* 03/24/95 */
++ return initio_msgout_abort_tag(host);
++ }
++ host->active = scb;
++ if ((initio_msgin_accept(host)) == -1)
++ return -1;
+ } else { /* No tag */
+ no_tag:
+- if ((pCurScb = tul_find_busy_scb(pCurHcb, tar | (lun << 8))) == NULL) {
+- return tul_msgout_abort_targ(pCurHcb);
++ if ((scb = initio_find_busy_scb(host, tar | (lun << 8))) == NULL) {
++ return initio_msgout_abort_targ(host);
+ }
+- pCurHcb->HCS_ActScb = pCurScb;
+- if (!(pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG)) {
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
++ host->active = scb;
++ if (!(active_tc->drv_flags & TCF_DRV_EN_TAG)) {
++ if ((initio_msgin_accept(host)) == -1)
++ return -1;
+ }
+ }
+ return 0;
+ }
+
++/**
++ * int_initio_bad_seq - out of phase
++ * @host: InitIO host flagging event
++ *
++ * We have ended up out of phase somehow. Reset the host controller
++ * and throw all our toys out of the pram. Let the midlayer clean up
++ */
+
+-/***************************************************************************/
+-static int int_tul_bad_seq(HCS * pCurHcb)
++static int int_initio_bad_seq(struct initio_host * host)
+ { /* target wrong phase */
+- SCB *pCurScb;
++ struct scsi_ctrl_blk *scb;
+ int i;
+
+- tul_reset_scsi(pCurHcb, 10);
++ initio_reset_scsi(host, 10);
+
+- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+- pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+- tul_append_done_scb(pCurHcb, pCurScb);
++ while ((scb = initio_pop_busy_scb(host)) != NULL) {
++ scb->hastat = HOST_BAD_PHAS;
++ initio_append_done_scb(host, scb);
+ }
+- for (i = 0; i < pCurHcb->HCS_MaxTar; i++) {
+- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+- }
+- return (-1);
++ for (i = 0; i < host->max_tar; i++)
++ host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++ return -1;
+ }
+
+
+-/***************************************************************************/
+-int tul_msgout_abort_targ(HCS * pCurHcb)
++/**
++ * initio_msgout_abort_targ - abort a tag
++ * @host: InitIO host
++ *
++ * Abort when the target/lun does not match or when our SCB is not
++ * busy. Used by untagged commands.
++ */
++
++static int initio_msgout_abort_targ(struct initio_host * host)
+ {
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+- if (tul_msgin_accept(pCurHcb) == -1)
+- return (-1);
+- if (pCurHcb->HCS_Phase != MSG_OUT)
+- return (tul_bad_seq(pCurHcb));
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++ if (initio_msgin_accept(host) == -1)
++ return -1;
++ if (host->phase != MSG_OUT)
++ return initio_bad_seq(host);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
++ outb(MSG_ABORT, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
+
+- return tul_wait_disc(pCurHcb);
++ return initio_wait_disc(host);
+ }
+
+-/***************************************************************************/
+-int tul_msgout_abort_tag(HCS * pCurHcb)
++/**
++ * initio_msgout_abort_tag - abort a tag
++ * @host: InitIO host
++ *
++ * Abort when the target/lun does not match or when our SCB is not
++ * busy. Used for tagged commands.
++ */
++
++static int initio_msgout_abort_tag(struct initio_host * host)
+ {
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+- if (tul_msgin_accept(pCurHcb) == -1)
+- return (-1);
+- if (pCurHcb->HCS_Phase != MSG_OUT)
+- return (tul_bad_seq(pCurHcb));
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++ if (initio_msgin_accept(host) == -1)
++ return -1;
++ if (host->phase != MSG_OUT)
++ return initio_bad_seq(host);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT_TAG);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
++ outb(MSG_ABORT_TAG, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
+
+- return tul_wait_disc(pCurHcb);
++ return initio_wait_disc(host);
+
+ }
+
+-/***************************************************************************/
+-int tul_msgin(HCS * pCurHcb)
++/**
++ * initio_msgin - Message in
++ * @host: InitIO Host
++ *
++ * Process incoming message
++ */
++static int initio_msgin(struct initio_host * host)
+ {
+- TCS *pCurTcb;
++ struct target_control *active_tc;
+
+ for (;;) {
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if ((wait_tulip(pCurHcb)) == -1)
+- return (-1);
++ outl(1, host->addr + TUL_SCnt0);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+
+- switch (TUL_RD(pCurHcb->HCS_Base, TUL_SFifo)) {
++ switch (inb(host->addr + TUL_SFifo)) {
+ case MSG_DISC: /* Disconnect msg */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+-
+- return tul_wait_disc(pCurHcb);
+-
++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++ return initio_wait_disc(host);
+ case MSG_SDP:
+ case MSG_RESTORE:
+ case MSG_NOP:
+- tul_msgin_accept(pCurHcb);
++ initio_msgin_accept(host);
+ break;
+-
+ case MSG_REJ: /* Clear ATN first */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal,
+- (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+- pCurTcb = pCurHcb->HCS_ActTcs;
+- if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync nego */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+- }
+- tul_msgin_accept(pCurHcb);
++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)),
++ host->addr + TUL_SSignal);
++ active_tc = host->active_tc;
++ if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) /* do sync nego */
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN),
++ host->addr + TUL_SSignal);
++ initio_msgin_accept(host);
+ break;
+-
+ case MSG_EXTEND: /* extended msg */
+- tul_msgin_extend(pCurHcb);
++ initio_msgin_extend(host);
+ break;
+-
+ case MSG_IGNOREWIDE:
+- tul_msgin_accept(pCurHcb);
+- break;
+-
+- /* get */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if (wait_tulip(pCurHcb) == -1)
+- return -1;
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0); /* put pad */
+- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get IGNORE field */
+- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get pad */
+-
+- tul_msgin_accept(pCurHcb);
++ initio_msgin_accept(host);
+ break;
+-
+ case MSG_COMP:
+- {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+- return tul_wait_done_disc(pCurHcb);
+- }
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++ return initio_wait_done_disc(host);
+ default:
+- tul_msgout_reject(pCurHcb);
++ initio_msgout_reject(host);
+ break;
+ }
+- if (pCurHcb->HCS_Phase != MSG_IN)
+- return (pCurHcb->HCS_Phase);
++ if (host->phase != MSG_IN)
++ return host->phase;
+ }
+ /* statement won't reach here */
+ }
+
+-
+-
+-
+-/***************************************************************************/
+-int tul_msgout_reject(HCS * pCurHcb)
++static int initio_msgout_reject(struct initio_host * host)
+ {
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-
+- if ((tul_msgin_accept(pCurHcb)) == -1)
+- return (-1);
++ if (initio_msgin_accept(host) == -1)
++ return -1;
+
+- if (pCurHcb->HCS_Phase == MSG_OUT) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_REJ); /* Msg reject */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
++ if (host->phase == MSG_OUT) {
++ outb(MSG_REJ, host->addr + TUL_SFifo); /* Msg reject */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+- return (pCurHcb->HCS_Phase);
++ return host->phase;
+ }
+
+-
+-
+-/***************************************************************************/
+-int tul_msgout_ide(HCS * pCurHcb)
++static int initio_msgout_ide(struct initio_host * host)
+ {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_IDE); /* Initiator Detected Error */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
++ outb(MSG_IDE, host->addr + TUL_SFifo); /* Initiator Detected Error */
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+
+-
+-/***************************************************************************/
+-int tul_msgin_extend(HCS * pCurHcb)
++static int initio_msgin_extend(struct initio_host * host)
+ {
+- BYTE len, idx;
++ u8 len, idx;
+
+- if (tul_msgin_accept(pCurHcb) != MSG_IN)
+- return (pCurHcb->HCS_Phase);
++ if (initio_msgin_accept(host) != MSG_IN)
++ return host->phase;
+
+ /* Get extended msg length */
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
++ outl(1, host->addr + TUL_SCnt0);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
+
+- len = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+- pCurHcb->HCS_Msg[0] = len;
++ len = inb(host->addr + TUL_SFifo);
++ host->msg[0] = len;
+ for (idx = 1; len != 0; len--) {
+
+- if ((tul_msgin_accept(pCurHcb)) != MSG_IN)
+- return (pCurHcb->HCS_Phase);
+- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+- if (wait_tulip(pCurHcb) == -1)
+- return (-1);
+- pCurHcb->HCS_Msg[idx++] = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+- }
+- if (pCurHcb->HCS_Msg[1] == 1) { /* if it's synchronous data transfer request */
+- if (pCurHcb->HCS_Msg[0] != 3) /* if length is not right */
+- return (tul_msgout_reject(pCurHcb));
+- if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */
+- pCurHcb->HCS_Msg[3] = 0;
+- } else {
+- if ((tul_msgin_sync(pCurHcb) == 0) &&
+- (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SYNC_DONE)) {
+- tul_sync_done(pCurHcb);
+- return (tul_msgin_accept(pCurHcb));
++ if ((initio_msgin_accept(host)) != MSG_IN)
++ return host->phase;
++ outl(1, host->addr + TUL_SCnt0);
++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++ if (wait_tulip(host) == -1)
++ return -1;
++ host->msg[idx++] = inb(host->addr + TUL_SFifo);
++ }
++ if (host->msg[1] == 1) { /* if it's synchronous data transfer request */
++ u8 r;
++ if (host->msg[0] != 3) /* if length is not right */
++ return initio_msgout_reject(host);
++ if (host->active_tc->flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */
++ host->msg[3] = 0;
++ } else {
++ if (initio_msgin_sync(host) == 0 &&
++ (host->active_tc->flags & TCF_SYNC_DONE)) {
++ initio_sync_done(host);
++ return initio_msgin_accept(host);
+ }
+ }
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+- if ((tul_msgin_accept(pCurHcb)) != MSG_OUT)
+- return (pCurHcb->HCS_Phase);
++ r = inb(host->addr + TUL_SSignal);
++ outb((r & (TSC_SET_ACK | 7)) | TSC_SET_ATN,
++ host->addr + TUL_SSignal);
++ if (initio_msgin_accept(host) != MSG_OUT)
++ return host->phase;
+ /* sync msg out */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-
+- tul_sync_done(pCurHcb);
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[3]);
++ initio_sync_done(host);
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
++ outb(MSG_EXTEND, host->addr + TUL_SFifo);
++ outb(3, host->addr + TUL_SFifo);
++ outb(1, host->addr + TUL_SFifo);
++ outb(host->msg[2], host->addr + TUL_SFifo);
++ outb(host->msg[3], host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+- if ((pCurHcb->HCS_Msg[0] != 2) || (pCurHcb->HCS_Msg[1] != 3))
+- return (tul_msgout_reject(pCurHcb));
++ if (host->msg[0] != 2 || host->msg[1] != 3)
++ return initio_msgout_reject(host);
+ /* if it's WIDE DATA XFER REQ */
+- if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) {
+- pCurHcb->HCS_Msg[2] = 0;
++ if (host->active_tc->flags & TCF_NO_WDTR) {
++ host->msg[2] = 0;
+ } else {
+- if (pCurHcb->HCS_Msg[2] > 2) /* > 32 bits */
+- return (tul_msgout_reject(pCurHcb));
+- if (pCurHcb->HCS_Msg[2] == 2) { /* == 32 */
+- pCurHcb->HCS_Msg[2] = 1;
++ if (host->msg[2] > 2) /* > 32 bits */
++ return initio_msgout_reject(host);
++ if (host->msg[2] == 2) { /* == 32 */
++ host->msg[2] = 1;
+ } else {
+- if ((pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) == 0) {
+- wdtr_done(pCurHcb);
+- if ((pCurHcb->HCS_ActTcs->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0)
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+- return (tul_msgin_accept(pCurHcb));
++ if ((host->active_tc->flags & TCF_NO_WDTR) == 0) {
++ wdtr_done(host);
++ if ((host->active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0)
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++ return initio_msgin_accept(host);
+ }
+ }
+ }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
+
+- if (tul_msgin_accept(pCurHcb) != MSG_OUT)
+- return (pCurHcb->HCS_Phase);
++ if (initio_msgin_accept(host) != MSG_OUT)
++ return host->phase;
+ /* WDTR msg out */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+- return (wait_tulip(pCurHcb));
++ outb(MSG_EXTEND, host->addr + TUL_SFifo);
++ outb(2, host->addr + TUL_SFifo);
++ outb(3, host->addr + TUL_SFifo);
++ outb(host->msg[2], host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+
+-/***************************************************************************/
+-int tul_msgin_sync(HCS * pCurHcb)
++static int initio_msgin_sync(struct initio_host * host)
+ {
+ char default_period;
+
+- default_period = tul_rate_tbl[pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SCSI_RATE];
+- if (pCurHcb->HCS_Msg[3] > MAX_OFFSET) {
+- pCurHcb->HCS_Msg[3] = MAX_OFFSET;
+- if (pCurHcb->HCS_Msg[2] < default_period) {
+- pCurHcb->HCS_Msg[2] = default_period;
++ default_period = initio_rate_tbl[host->active_tc->flags & TCF_SCSI_RATE];
++ if (host->msg[3] > MAX_OFFSET) {
++ host->msg[3] = MAX_OFFSET;
++ if (host->msg[2] < default_period) {
++ host->msg[2] = default_period;
+ return 1;
+ }
+- if (pCurHcb->HCS_Msg[2] >= 59) { /* Change to async */
+- pCurHcb->HCS_Msg[3] = 0;
+- }
++ if (host->msg[2] >= 59) /* Change to async */
++ host->msg[3] = 0;
+ return 1;
+ }
+ /* offset requests asynchronous transfers ? */
+- if (pCurHcb->HCS_Msg[3] == 0) {
++ if (host->msg[3] == 0) {
+ return 0;
+ }
+- if (pCurHcb->HCS_Msg[2] < default_period) {
+- pCurHcb->HCS_Msg[2] = default_period;
++ if (host->msg[2] < default_period) {
++ host->msg[2] = default_period;
+ return 1;
+ }
+- if (pCurHcb->HCS_Msg[2] >= 59) {
+- pCurHcb->HCS_Msg[3] = 0;
++ if (host->msg[2] >= 59) {
++ host->msg[3] = 0;
+ return 1;
+ }
+ return 0;
+ }
+
+-
+-/***************************************************************************/
+-int wdtr_done(HCS * pCurHcb)
++static int wdtr_done(struct initio_host * host)
+ {
+- pCurHcb->HCS_ActTcs->TCS_Flags &= ~TCF_SYNC_DONE;
+- pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_WDTR_DONE;
++ host->active_tc->flags &= ~TCF_SYNC_DONE;
++ host->active_tc->flags |= TCF_WDTR_DONE;
+
+- pCurHcb->HCS_ActTcs->TCS_JS_Period = 0;
+- if (pCurHcb->HCS_Msg[2]) { /* if 16 bit */
+- pCurHcb->HCS_ActTcs->TCS_JS_Period |= TSC_WIDE_SCSI;
+- }
+- pCurHcb->HCS_ActTcs->TCS_SConfig0 &= ~TSC_ALT_PERIOD;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period);
++ host->active_tc->js_period = 0;
++ if (host->msg[2]) /* if 16 bit */
++ host->active_tc->js_period |= TSC_WIDE_SCSI;
++ host->active_tc->sconfig0 &= ~TSC_ALT_PERIOD;
++ outb(host->active_tc->sconfig0, host->addr + TUL_SConfig);
++ outb(host->active_tc->js_period, host->addr + TUL_SPeriod);
+
+ return 1;
+ }
+
+-/***************************************************************************/
+-int tul_sync_done(HCS * pCurHcb)
++static int initio_sync_done(struct initio_host * host)
+ {
+ int i;
+
+- pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_SYNC_DONE;
++ host->active_tc->flags |= TCF_SYNC_DONE;
+
+- if (pCurHcb->HCS_Msg[3]) {
+- pCurHcb->HCS_ActTcs->TCS_JS_Period |= pCurHcb->HCS_Msg[3];
++ if (host->msg[3]) {
++ host->active_tc->js_period |= host->msg[3];
+ for (i = 0; i < 8; i++) {
+- if (tul_rate_tbl[i] >= pCurHcb->HCS_Msg[2]) /* pick the big one */
++ if (initio_rate_tbl[i] >= host->msg[2]) /* pick the big one */
+ break;
+ }
+- pCurHcb->HCS_ActTcs->TCS_JS_Period |= (i << 4);
+- pCurHcb->HCS_ActTcs->TCS_SConfig0 |= TSC_ALT_PERIOD;
++ host->active_tc->js_period |= (i << 4);
++ host->active_tc->sconfig0 |= TSC_ALT_PERIOD;
+ }
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period);
++ outb(host->active_tc->sconfig0, host->addr + TUL_SConfig);
++ outb(host->active_tc->js_period, host->addr + TUL_SPeriod);
+
+- return (-1);
++ return -1;
+ }
+
+
+-int tul_post_scsi_rst(HCS * pCurHcb)
++static int initio_post_scsi_rst(struct initio_host * host)
+ {
+- SCB *pCurScb;
+- TCS *pCurTcb;
++ struct scsi_ctrl_blk *scb;
++ struct target_control *active_tc;
+ int i;
+
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
+- pCurHcb->HCS_Flags = 0;
+-
+- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+- pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+- tul_append_done_scb(pCurHcb, pCurScb);
++ host->active = NULL;
++ host->active_tc = NULL;
++ host->flags = 0;
++
++ while ((scb = initio_pop_busy_scb(host)) != NULL) {
++ scb->hastat = HOST_BAD_PHAS;
++ initio_append_done_scb(host, scb);
+ }
+ /* clear sync done flag */
+- pCurTcb = &pCurHcb->HCS_Tcs[0];
+- for (i = 0; i < pCurHcb->HCS_MaxTar; pCurTcb++, i++) {
+- pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++ active_tc = &host->targets[0];
++ for (i = 0; i < host->max_tar; active_tc++, i++) {
++ active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+ /* Initialize the sync. xfer register values to an asyn xfer */
+- pCurTcb->TCS_JS_Period = 0;
+- pCurTcb->TCS_SConfig0 = pCurHcb->HCS_SConf1;
+- pCurHcb->HCS_ActTags[0] = 0; /* 07/22/98 */
+- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY; /* 07/22/98 */
++ active_tc->js_period = 0;
++ active_tc->sconfig0 = host->sconf1;
++ host->act_tags[0] = 0; /* 07/22/98 */
++ host->targets[i].flags &= ~TCF_BUSY; /* 07/22/98 */
+ } /* for */
+
+- return (-1);
++ return -1;
+ }
+
+-/***************************************************************************/
+-void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+- pCurScb->SCB_Status |= SCB_SELECT;
+- pCurScb->SCB_NxtStat = 0x1;
+- pCurHcb->HCS_ActScb = pCurScb;
+- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SELATNSTOP);
+- return;
++ scb->status |= SCB_SELECT;
++ scb->next_state = 0x1;
++ host->active = scb;
++ host->active_tc = &host->targets[scb->target];
++ outb(TSC_SELATNSTOP, host->addr + TUL_SCmd);
+ }
+
+
+-/***************************************************************************/
+-void tul_select_atn(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+ int i;
+
+- pCurScb->SCB_Status |= SCB_SELECT;
+- pCurScb->SCB_NxtStat = 0x2;
++ scb->status |= SCB_SELECT;
++ scb->next_state = 0x2;
+
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+- pCurHcb->HCS_ActScb = pCurScb;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN);
+- return;
++ outb(scb->ident, host->addr + TUL_SFifo);
++ for (i = 0; i < (int) scb->cdblen; i++)
++ outb(scb->cdb[i], host->addr + TUL_SFifo);
++ host->active_tc = &host->targets[scb->target];
++ host->active = scb;
++ outb(TSC_SEL_ATN, host->addr + TUL_SCmd);
+ }
+
+-/***************************************************************************/
+-void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+ int i;
+
+- pCurScb->SCB_Status |= SCB_SELECT;
+- pCurScb->SCB_NxtStat = 0x2;
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId);
+- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+- pCurHcb->HCS_ActScb = pCurScb;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN3);
+- return;
+-}
++ scb->status |= SCB_SELECT;
++ scb->next_state = 0x2;
+
+-/***************************************************************************/
+-/* SCSI Bus Device Reset */
+-int tul_bus_device_reset(HCS * pCurHcb)
++ outb(scb->ident, host->addr + TUL_SFifo);
++ outb(scb->tagmsg, host->addr + TUL_SFifo);
++ outb(scb->tagid, host->addr + TUL_SFifo);
++ for (i = 0; i < scb->cdblen; i++)
++ outb(scb->cdb[i], host->addr + TUL_SFifo);
++ host->active_tc = &host->targets[scb->target];
++ host->active = scb;
++ outb(TSC_SEL_ATN3, host->addr + TUL_SCmd);
++}
++
++/**
++ * initio_bus_device_reset - SCSI Bus Device Reset
++ * @host: InitIO host to reset
++ *
++ * Perform a device reset and abort all pending SCBs for the
++ * victim device
++ */
++int initio_bus_device_reset(struct initio_host * host)
+ {
+- SCB *pCurScb = pCurHcb->HCS_ActScb;
+- TCS *pCurTcb = pCurHcb->HCS_ActTcs;
+- SCB *pTmpScb, *pPrevScb;
+- BYTE tar;
++ struct scsi_ctrl_blk *scb = host->active;
++ struct target_control *active_tc = host->active_tc;
++ struct scsi_ctrl_blk *tmp, *prev;
++ u8 tar;
+
+- if (pCurHcb->HCS_Phase != MSG_OUT) {
+- return (int_tul_bad_seq(pCurHcb)); /* Unexpected phase */
+- }
+- tul_unlink_pend_scb(pCurHcb, pCurScb);
+- tul_release_scb(pCurHcb, pCurScb);
++ if (host->phase != MSG_OUT)
++ return int_initio_bad_seq(host); /* Unexpected phase */
+
++ initio_unlink_pend_scb(host, scb);
++ initio_release_scb(host, scb);
+
+- tar = pCurScb->SCB_Target; /* target */
+- pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY);
++
++ tar = scb->target; /* target */
++ active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY);
+ /* clr sync. nego & WDTR flags 07/22/98 */
+
+ /* abort all SCB with same target */
+- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */
+- while (pTmpScb != NULL) {
+-
+- if (pTmpScb->SCB_Target == tar) {
++ prev = tmp = host->first_busy; /* Check Busy queue */
++ while (tmp != NULL) {
++ if (tmp->target == tar) {
+ /* unlink it */
+- if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+- pCurHcb->HCS_LastBusy = NULL;
+- } else {
+- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+- if (pTmpScb == pCurHcb->HCS_LastBusy)
+- pCurHcb->HCS_LastBusy = pPrevScb;
++ if (tmp == host->first_busy) {
++ if ((host->first_busy = tmp->next) == NULL)
++ host->last_busy = NULL;
++ } else {
++ prev->next = tmp->next;
++ if (tmp == host->last_busy)
++ host->last_busy = prev;
+ }
+- pTmpScb->SCB_HaStat = HOST_ABORTED;
+- tul_append_done_scb(pCurHcb, pTmpScb);
++ tmp->hastat = HOST_ABORTED;
++ initio_append_done_scb(host, tmp);
+ }
+ /* Previous haven't change */
+ else {
+- pPrevScb = pTmpScb;
++ prev = tmp;
+ }
+- pTmpScb = pTmpScb->SCB_NxtScb;
++ tmp = tmp->next;
+ }
+-
+- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_DEVRST);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-
+- return tul_wait_disc(pCurHcb);
++ outb(MSG_DEVRST, host->addr + TUL_SFifo);
++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++ return initio_wait_disc(host);
+
+ }
+
+-/***************************************************************************/
+-int tul_msgin_accept(HCS * pCurHcb)
++static int initio_msgin_accept(struct initio_host * host)
+ {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+- return (wait_tulip(pCurHcb));
++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++ return wait_tulip(host);
+ }
+
+-/***************************************************************************/
+-int wait_tulip(HCS * pCurHcb)
++static int wait_tulip(struct initio_host * host)
+ {
+
+- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+- & TSS_INT_PENDING));
++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0))
++ & TSS_INT_PENDING))
++ cpu_relax();
++
++ host->jsint = inb(host->addr + TUL_SInt);
++ host->phase = host->jsstatus0 & TSS_PH_MASK;
++ host->jsstatus1 = inb(host->addr + TUL_SStatus1);
+
+- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
+- pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK;
+- pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1);
+-
+- if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if SCSI bus reset detected */
+- return (int_tul_resel(pCurHcb));
+- }
+- if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) { /* if selected/reselected timeout interrupt */
+- return (int_tul_busfree(pCurHcb));
+- }
+- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */
+- return (int_tul_scsi_rst(pCurHcb));
+- }
+- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */
+- if (pCurHcb->HCS_Flags & HCF_EXPECT_DONE_DISC) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */
+- tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb);
+- pCurHcb->HCS_ActScb->SCB_HaStat = 0;
+- tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb);
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
+- pCurHcb->HCS_Flags &= ~HCF_EXPECT_DONE_DISC;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */
+- return (-1);
+- }
+- if (pCurHcb->HCS_Flags & HCF_EXPECT_DISC) {
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */
+- pCurHcb->HCS_ActScb = NULL;
+- pCurHcb->HCS_ActTcs = NULL;
+- pCurHcb->HCS_Flags &= ~HCF_EXPECT_DISC;
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */
+- return (-1);
++ if (host->jsint & TSS_RESEL_INT) /* if SCSI bus reset detected */
++ return int_initio_resel(host);
++ if (host->jsint & TSS_SEL_TIMEOUT) /* if selected/reselected timeout interrupt */
++ return int_initio_busfree(host);
++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */
++ return int_initio_scsi_rst(host);
++
++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */
++ if (host->flags & HCF_EXPECT_DONE_DISC) {
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++ initio_unlink_busy_scb(host, host->active);
++ host->active->hastat = 0;
++ initio_append_done_scb(host, host->active);
++ host->active = NULL;
++ host->active_tc = NULL;
++ host->flags &= ~HCF_EXPECT_DONE_DISC;
++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */
++ return -1;
+ }
+- return (int_tul_busfree(pCurHcb));
++ if (host->flags & HCF_EXPECT_DISC) {
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++ host->active = NULL;
++ host->active_tc = NULL;
++ host->flags &= ~HCF_EXPECT_DISC;
++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */
++ return -1;
+ }
+- if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) {
+- return (pCurHcb->HCS_Phase);
++ return int_initio_busfree(host);
+ }
+- return (pCurHcb->HCS_Phase);
++ /* The old code really does the below. Can probably be removed */
++ if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV))
++ return host->phase;
++ return host->phase;
+ }
+-/***************************************************************************/
+-int tul_wait_disc(HCS * pCurHcb)
+-{
+-
+- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+- & TSS_INT_PENDING));
+
++static int initio_wait_disc(struct initio_host * host)
++{
++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING))
++ cpu_relax();
+
+- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++ host->jsint = inb(host->addr + TUL_SInt);
+
+- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */
+- return (int_tul_scsi_rst(pCurHcb));
+- }
+- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */
+- pCurHcb->HCS_ActScb = NULL;
+- return (-1);
++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */
++ return int_initio_scsi_rst(host);
++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */
++ host->active = NULL;
++ return -1;
+ }
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+
+-/***************************************************************************/
+-int tul_wait_done_disc(HCS * pCurHcb)
++static int initio_wait_done_disc(struct initio_host * host)
+ {
++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0))
++ & TSS_INT_PENDING))
++ cpu_relax();
+
++ host->jsint = inb(host->addr + TUL_SInt);
+
+- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+- & TSS_INT_PENDING));
+-
+- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */
++ return int_initio_scsi_rst(host);
++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */
++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */
++ initio_unlink_busy_scb(host, host->active);
+
+-
+- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */
+- return (int_tul_scsi_rst(pCurHcb));
+- }
+- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */
+- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */
+- tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb);
+-
+- tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb);
+- pCurHcb->HCS_ActScb = NULL;
+- return (-1);
++ initio_append_done_scb(host, host->active);
++ host->active = NULL;
++ return -1;
+ }
+- return (tul_bad_seq(pCurHcb));
++ return initio_bad_seq(host);
+ }
+
++/**
++ * i91u_intr - IRQ handler
++ * @irqno: IRQ number
++ * @dev_id: IRQ identifier
++ *
++ * Take the relevant locks and then invoke the actual isr processing
++ * code under the lock.
++ */
++
+ static irqreturn_t i91u_intr(int irqno, void *dev_id)
+ {
+ struct Scsi_Host *dev = dev_id;
+ unsigned long flags;
++ int r;
+
+ spin_lock_irqsave(dev->host_lock, flags);
+- tul_isr((HCS *)dev->base);
++ r = initio_isr((struct initio_host *)dev->hostdata);
+ spin_unlock_irqrestore(dev->host_lock, flags);
++ if (r)
+ return IRQ_HANDLED;
++ else
++ return IRQ_NONE;
+ }
+
+-static int tul_NewReturnNumberOfAdapters(void)
+-{
+- struct pci_dev *pDev = NULL; /* Start from none */
+- int iAdapters = 0;
+- long dRegValue;
+- WORD wBIOS;
+- int i = 0;
+-
+- init_i91uAdapter_table();
+-
+- for (i = 0; i < ARRAY_SIZE(i91u_pci_devices); i++)
+- {
+- while ((pDev = pci_find_device(i91u_pci_devices[i].vendor, i91u_pci_devices[i].device, pDev)) != NULL) {
+- if (pci_enable_device(pDev))
+- continue;
+- pci_read_config_dword(pDev, 0x44, (u32 *) & dRegValue);
+- wBIOS = (UWORD) (dRegValue & 0xFF);
+- if (((dRegValue & 0xFF00) >> 8) == 0xFF)
+- dRegValue = 0;
+- wBIOS = (wBIOS << 8) + ((UWORD) ((dRegValue & 0xFF00) >> 8));
+- if (pci_set_dma_mask(pDev, DMA_32BIT_MASK)) {
+- printk(KERN_WARNING
+- "i91u: Could not set 32 bit DMA mask\n");
+- continue;
+- }
+-
+- if (Addi91u_into_Adapter_table(wBIOS,
+- (pDev->resource[0].start),
+- pDev->irq,
+- pDev->bus->number,
+- (pDev->devfn >> 3)
+- ) == 0)
+- iAdapters++;
+- }
+- }
+-
+- return (iAdapters);
+-}
+-
+-static int i91u_detect(struct scsi_host_template * tpnt)
+-{
+- HCS *pHCB;
+- struct Scsi_Host *hreg;
+- unsigned long i; /* 01/14/98 */
+- int ok = 0, iAdapters;
+- ULONG dBiosAdr;
+- BYTE *pbBiosAdr;
+-
+- /* Get total number of adapters in the motherboard */
+- iAdapters = tul_NewReturnNumberOfAdapters();
+- if (iAdapters == 0) /* If no tulip founded, return */
+- return (0);
+-
+- tul_num_ch = (iAdapters > tul_num_ch) ? tul_num_ch : iAdapters;
+- /* Update actually channel number */
+- if (tul_tag_enable) { /* 1.01i */
+- tul_num_scb = MAX_TARGETS * i91u_MAXQUEUE;
+- } else {
+- tul_num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */
+- } /* Update actually SCBs per adapter */
+-
+- /* Get total memory needed for HCS */
+- i = tul_num_ch * sizeof(HCS);
+- memset((unsigned char *) &tul_hcs[0], 0, i); /* Initialize tul_hcs 0 */
+- /* Get total memory needed for SCB */
+-
+- for (; tul_num_scb >= MAX_TARGETS + 3; tul_num_scb--) {
+- i = tul_num_ch * tul_num_scb * sizeof(SCB);
+- if ((tul_scb = kmalloc(i, GFP_ATOMIC | GFP_DMA)) != NULL)
+- break;
+- }
+- if (tul_scb == NULL) {
+- printk("i91u: SCB memory allocation error\n");
+- return (0);
+- }
+- memset((unsigned char *) tul_scb, 0, i);
+-
+- for (i = 0, pHCB = &tul_hcs[0]; /* Get pointer for control block */
+- i < tul_num_ch;
+- i++, pHCB++) {
+- get_tulipPCIConfig(pHCB, i);
+-
+- dBiosAdr = pHCB->HCS_BIOS;
+- dBiosAdr = (dBiosAdr << 4);
+
+- pbBiosAdr = phys_to_virt(dBiosAdr);
+-
+- init_tulip(pHCB, tul_scb + (i * tul_num_scb), tul_num_scb, pbBiosAdr, 10);
+- request_region(pHCB->HCS_Base, 256, "i91u"); /* Register */
+-
+- pHCB->HCS_Index = i; /* 7/29/98 */
+- hreg = scsi_register(tpnt, sizeof(HCS));
+- if(hreg == NULL) {
+- release_region(pHCB->HCS_Base, 256);
+- return 0;
+- }
+- hreg->io_port = pHCB->HCS_Base;
+- hreg->n_io_port = 0xff;
+- hreg->can_queue = tul_num_scb; /* 03/05/98 */
+- hreg->unique_id = pHCB->HCS_Base;
+- hreg->max_id = pHCB->HCS_MaxTar;
+- hreg->max_lun = 32; /* 10/21/97 */
+- hreg->irq = pHCB->HCS_Intr;
+- hreg->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */
+- hreg->base = (unsigned long)pHCB;
+- hreg->sg_tablesize = TOTAL_SG_ENTRY; /* Maximun support is 32 */
+-
+- /* Initial tulip chip */
+- ok = request_irq(pHCB->HCS_Intr, i91u_intr, IRQF_DISABLED | IRQF_SHARED, "i91u", hreg);
+- if (ok < 0) {
+- printk(KERN_WARNING "i91u: unable to request IRQ %d\n\n", pHCB->HCS_Intr);
+- return 0;
+- }
+- }
+-
+- tpnt->this_id = -1;
+- tpnt->can_queue = 1;
+-
+- return 1;
+-}
++/**
++ * initio_build_scb - Build the mappings and SCB
++ * @host: InitIO host taking the command
++ * @cblk: Firmware command block
++ * @cmnd: SCSI midlayer command block
++ *
++ * Translate the abstract SCSI command into a firmware command block
++ * suitable for feeding to the InitIO host controller. This also requires
++ * we build the scatter gather lists and ensure they are mapped properly.
++ */
+
+-static void i91uBuildSCB(HCS * pHCB, SCB * pSCB, struct scsi_cmnd * SCpnt)
++static void initio_build_scb(struct initio_host * host, struct scsi_ctrl_blk * cblk, struct scsi_cmnd * cmnd)
+ { /* Create corresponding SCB */
+- struct scatterlist *pSrbSG;
+- SG *pSG; /* Pointer to SG list */
+- int i;
+- long TotalLen;
++ struct scatterlist *sglist;
++ struct sg_entry *sg; /* Pointer to SG list */
++ int i, nseg;
++ long total_len;
+ dma_addr_t dma_addr;
+
+- pSCB->SCB_Post = i91uSCBPost; /* i91u's callback routine */
+- pSCB->SCB_Srb = SCpnt;
+- pSCB->SCB_Opcode = ExecSCSI;
+- pSCB->SCB_Flags = SCF_POST; /* After SCSI done, call post routine */
+- pSCB->SCB_Target = SCpnt->device->id;
+- pSCB->SCB_Lun = SCpnt->device->lun;
+- pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW;
++ /* Fill in the command headers */
++ cblk->post = i91uSCBPost; /* i91u's callback routine */
++ cblk->srb = cmnd;
++ cblk->opcode = ExecSCSI;
++ cblk->flags = SCF_POST; /* After SCSI done, call post routine */
++ cblk->target = cmnd->device->id;
++ cblk->lun = cmnd->device->lun;
++ cblk->ident = cmnd->device->lun | DISC_ALLOW;
+
+- pSCB->SCB_Flags |= SCF_SENSE; /* Turn on auto request sense */
+- dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->sense_buffer,
+- SENSE_SIZE, DMA_FROM_DEVICE);
+- pSCB->SCB_SensePtr = cpu_to_le32((u32)dma_addr);
+- pSCB->SCB_SenseLen = cpu_to_le32(SENSE_SIZE);
+- SCpnt->SCp.ptr = (char *)(unsigned long)dma_addr;
+-
+- pSCB->SCB_CDBLen = SCpnt->cmd_len;
+- pSCB->SCB_HaStat = 0;
+- pSCB->SCB_TaStat = 0;
+- memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, SCpnt->cmd_len);
++ cblk->flags |= SCF_SENSE; /* Turn on auto request sense */
+
+- if (SCpnt->device->tagged_supported) { /* Tag Support */
+- pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */
++ /* Map the sense buffer into bus memory */
++ dma_addr = dma_map_single(&host->pci_dev->dev, cmnd->sense_buffer,
++ SENSE_SIZE, DMA_FROM_DEVICE);
++ cblk->senseptr = cpu_to_le32((u32)dma_addr);
++ cblk->senselen = cpu_to_le32(SENSE_SIZE);
++ cmnd->SCp.ptr = (char *)(unsigned long)dma_addr;
++ cblk->cdblen = cmnd->cmd_len;
++
++ /* Clear the returned status */
++ cblk->hastat = 0;
++ cblk->tastat = 0;
++ /* Command the command */
++ memcpy(&cblk->cdb[0], &cmnd->cmnd, cmnd->cmd_len);
++
++ /* Set up tags */
++ if (cmnd->device->tagged_supported) { /* Tag Support */
++ cblk->tagmsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */
+ } else {
+- pSCB->SCB_TagMsg = 0; /* No tag support */
++ cblk->tagmsg = 0; /* No tag support */
+ }
++
+ /* todo handle map_sg error */
+- if (SCpnt->use_sg) {
+- dma_addr = dma_map_single(&pHCB->pci_dev->dev, &pSCB->SCB_SGList[0],
+- sizeof(struct SG_Struc) * TOTAL_SG_ENTRY,
++ nseg = scsi_dma_map(cmnd);
++ BUG_ON(nseg < 0);
++ if (nseg) {
++ dma_addr = dma_map_single(&host->pci_dev->dev, &cblk->sglist[0],
++ sizeof(struct sg_entry) * TOTAL_SG_ENTRY,
+ DMA_BIDIRECTIONAL);
+- pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr);
+- SCpnt->SCp.dma_handle = dma_addr;
++ cblk->bufptr = cpu_to_le32((u32)dma_addr);
++ cmnd->SCp.dma_handle = dma_addr;
+
+- pSrbSG = (struct scatterlist *) SCpnt->request_buffer;
+- pSCB->SCB_SGLen = dma_map_sg(&pHCB->pci_dev->dev, pSrbSG,
+- SCpnt->use_sg, SCpnt->sc_data_direction);
+-
+- pSCB->SCB_Flags |= SCF_SG; /* Turn on SG list flag */
+- for (i = 0, TotalLen = 0, pSG = &pSCB->SCB_SGList[0]; /* 1.01g */
+- i < pSCB->SCB_SGLen; i++, pSG++, pSrbSG++) {
+- pSG->SG_Ptr = cpu_to_le32((u32)sg_dma_address(pSrbSG));
+- TotalLen += pSG->SG_Len = cpu_to_le32((u32)sg_dma_len(pSrbSG));
+- }
+-
+- pSCB->SCB_BufLen = (SCpnt->request_bufflen > TotalLen) ?
+- TotalLen : SCpnt->request_bufflen;
+- } else if (SCpnt->request_bufflen) { /* Non SG */
+- dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->request_buffer,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- SCpnt->SCp.dma_handle = dma_addr;
+- pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr);
+- pSCB->SCB_BufLen = cpu_to_le32((u32)SCpnt->request_bufflen);
+- pSCB->SCB_SGLen = 0;
+- } else {
+- pSCB->SCB_BufLen = 0;
+- pSCB->SCB_SGLen = 0;
++
++ cblk->flags |= SCF_SG; /* Turn on SG list flag */
++ total_len = 0;
++ sg = &cblk->sglist[0];
++ scsi_for_each_sg(cmnd, sglist, cblk->sglen, i) {
++ sg->data = cpu_to_le32((u32)sg_dma_address(sglist));
++ total_len += sg->len = cpu_to_le32((u32)sg_dma_len(sglist));
++ }
++
++ cblk->buflen = (scsi_bufflen(cmnd) > total_len) ?
++ total_len : scsi_bufflen(cmnd);
++ } else { /* No data transfer required */
++ cblk->buflen = 0;
++ cblk->sglen = 0;
+ }
+ }
+
++/**
++ * i91u_queuecommand - Queue a new command if possible
++ * @cmd: SCSI command block from the mid layer
++ * @done: Completion handler
++ *
++ * Attempts to queue a new command with the host adapter. Will return
++ * zero if successful or indicate a host busy condition if not (which
++ * will cause the mid layer to call us again later with the command)
++ */
++
+ static int i91u_queuecommand(struct scsi_cmnd *cmd,
+ void (*done)(struct scsi_cmnd *))
+ {
+- HCS *pHCB = (HCS *) cmd->device->host->base;
+- register SCB *pSCB;
++ struct initio_host *host = (struct initio_host *) cmd->device->host->hostdata;
++ struct scsi_ctrl_blk *cmnd;
+
+ cmd->scsi_done = done;
+
+- pSCB = tul_alloc_scb(pHCB);
+- if (!pSCB)
++ cmnd = initio_alloc_scb(host);
++ if (!cmnd)
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+- i91uBuildSCB(pHCB, pSCB, cmd);
+- tul_exec_scb(pHCB, pSCB);
++ initio_build_scb(host, cmnd, cmd);
++ initio_exec_scb(host, cmnd);
+ return 0;
+ }
+
+-#if 0 /* no new EH yet */
+-/*
+- * Abort a queued command
+- * (commands that are on the bus can't be aborted easily)
+- */
+-static int i91u_abort(struct scsi_cmnd * SCpnt)
+-{
+- HCS *pHCB;
+-
+- pHCB = (HCS *) SCpnt->device->host->base;
+- return tul_abort_srb(pHCB, SCpnt);
+-}
+-
+-/*
+- * Reset registers, reset a hanging bus and
+- * kill active and disconnected commands for target w/o soft reset
++/**
++ * i91u_bus_reset - reset the SCSI bus
++ * @cmnd: Command block we want to trigger the reset for
++ *
++ * Initiate a SCSI bus reset sequence
+ */
+-static int i91u_reset(struct scsi_cmnd * SCpnt, unsigned int reset_flags)
+-{ /* I need Host Control Block Information */
+- HCS *pHCB;
+-
+- pHCB = (HCS *) SCpnt->device->host->base;
+-
+- if (reset_flags & (SCSI_RESET_SUGGEST_BUS_RESET | SCSI_RESET_SUGGEST_HOST_RESET))
+- return tul_reset_scsi_bus(pHCB);
+- else
+- return tul_device_reset(pHCB, SCpnt, SCpnt->device->id, reset_flags);
+-}
+-#endif
+
+-static int i91u_bus_reset(struct scsi_cmnd * SCpnt)
++static int i91u_bus_reset(struct scsi_cmnd * cmnd)
+ {
+- HCS *pHCB;
++ struct initio_host *host;
+
+- pHCB = (HCS *) SCpnt->device->host->base;
++ host = (struct initio_host *) cmnd->device->host->hostdata;
+
+- spin_lock_irq(SCpnt->device->host->host_lock);
+- tul_reset_scsi(pHCB, 0);
+- spin_unlock_irq(SCpnt->device->host->host_lock);
++ spin_lock_irq(cmnd->device->host->host_lock);
++ initio_reset_scsi(host, 0);
++ spin_unlock_irq(cmnd->device->host->host_lock);
+
+ return SUCCESS;
+ }
+
+-/*
+- * Return the "logical geometry"
++/**
++ * i91u_biospararm - return the "logical geometry
++ * @sdev: SCSI device
++ * @dev; Matching block device
++ * @capacity: Sector size of drive
++ * @info_array: Return space for BIOS geometry
++ *
++ * Map the device geometry in a manner compatible with the host
++ * controller BIOS behaviour.
++ *
++ * FIXME: limited to 2^32 sector devices.
+ */
++
+ static int i91u_biosparam(struct scsi_device *sdev, struct block_device *dev,
+ sector_t capacity, int *info_array)
+ {
+- HCS *pHcb; /* Point to Host adapter control block */
+- TCS *pTcb;
++ struct initio_host *host; /* Point to Host adapter control block */
++ struct target_control *tc;
+
+- pHcb = (HCS *) sdev->host->base;
+- pTcb = &pHcb->HCS_Tcs[sdev->id];
++ host = (struct initio_host *) sdev->host->hostdata;
++ tc = &host->targets[sdev->id];
+
+- if (pTcb->TCS_DrvHead) {
+- info_array[0] = pTcb->TCS_DrvHead;
+- info_array[1] = pTcb->TCS_DrvSector;
+- info_array[2] = (unsigned long)capacity / pTcb->TCS_DrvHead / pTcb->TCS_DrvSector;
++ if (tc->heads) {
++ info_array[0] = tc->heads;
++ info_array[1] = tc->sectors;
++ info_array[2] = (unsigned long)capacity / tc->heads / tc->sectors;
+ } else {
+- if (pTcb->TCS_DrvFlags & TCF_DRV_255_63) {
++ if (tc->drv_flags & TCF_DRV_255_63) {
+ info_array[0] = 255;
+ info_array[1] = 63;
+ info_array[2] = (unsigned long)capacity / 255 / 63;
+@@ -3047,7 +2722,16 @@
+ return 0;
+ }
+
+-static void i91u_unmap_cmnd(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd)
++/**
++ * i91u_unmap_scb - Unmap a command
++ * @pci_dev: PCI device the command is for
++ * @cmnd: The command itself
++ *
++ * Unmap any PCI mapping/IOMMU resources allocated when the command
++ * was mapped originally as part of initio_build_scb
++ */
++
++static void i91u_unmap_scb(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd)
+ {
+ /* auto sense buffer */
+ if (cmnd->SCp.ptr) {
+@@ -3058,65 +2742,63 @@
+ }
+
+ /* request buffer */
+- if (cmnd->use_sg) {
++ if (scsi_sg_count(cmnd)) {
+ dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle,
+- sizeof(struct SG_Struc) * TOTAL_SG_ENTRY,
++ sizeof(struct sg_entry) * TOTAL_SG_ENTRY,
+ DMA_BIDIRECTIONAL);
+
+- dma_unmap_sg(&pci_dev->dev, cmnd->request_buffer,
+- cmnd->use_sg,
+- cmnd->sc_data_direction);
+- } else if (cmnd->request_bufflen) {
+- dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle,
+- cmnd->request_bufflen,
+- cmnd->sc_data_direction);
++ scsi_dma_unmap(cmnd);
+ }
+ }
+
+-/*****************************************************************************
+- Function name : i91uSCBPost
+- Description : This is callback routine be called when tulip finish one
+- SCSI command.
+- Input : pHCB - Pointer to host adapter control block.
+- pSCB - Pointer to SCSI control block.
+- Output : None.
+- Return : None.
+-*****************************************************************************/
+-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb)
+-{
+- struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */
+- HCS *pHCB;
+- SCB *pSCB;
+-
+- pHCB = (HCS *) pHcb;
+- pSCB = (SCB *) pScb;
+- if ((pSRB = pSCB->SCB_Srb) == 0) {
+- printk("i91uSCBPost: SRB pointer is empty\n");
++/**
++ * i91uSCBPost - SCSI callback
++ * @host: Pointer to host adapter control block.
++ * @cmnd: Pointer to SCSI control block.
++ *
++ * This is callback routine be called when tulip finish one
++ * SCSI command.
++ */
++
++static void i91uSCBPost(u8 * host_mem, u8 * cblk_mem)
++{
++ struct scsi_cmnd *cmnd; /* Pointer to SCSI request block */
++ struct initio_host *host;
++ struct scsi_ctrl_blk *cblk;
+
+- tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */
++ host = (struct initio_host *) host_mem;
++ cblk = (struct scsi_ctrl_blk *) cblk_mem;
++ if ((cmnd = cblk->srb) == NULL) {
++ printk(KERN_ERR "i91uSCBPost: SRB pointer is empty\n");
++ WARN_ON(1);
++ initio_release_scb(host, cblk); /* Release SCB for current channel */
+ return;
+ }
+- switch (pSCB->SCB_HaStat) {
++
++ /*
++ * Remap the firmware error status into a mid layer one
++ */
++ switch (cblk->hastat) {
+ case 0x0:
+ case 0xa: /* Linked command complete without error and linked normally */
+ case 0xb: /* Linked command complete without error interrupt generated */
+- pSCB->SCB_HaStat = 0;
++ cblk->hastat = 0;
+ break;
+
+ case 0x11: /* Selection time out-The initiator selection or target
+ reselection was not complete within the SCSI Time out period */
+- pSCB->SCB_HaStat = DID_TIME_OUT;
++ cblk->hastat = DID_TIME_OUT;
+ break;
+
+ case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus
+ phase sequence was requested by the target. The host adapter
+ will generate a SCSI Reset Condition, notifying the host with
+ a SCRD interrupt */
+- pSCB->SCB_HaStat = DID_RESET;
++ cblk->hastat = DID_RESET;
+ break;
+
+ case 0x1a: /* SCB Aborted. 07/21/98 */
+- pSCB->SCB_HaStat = DID_ABORT;
++ cblk->hastat = DID_ABORT;
+ break;
+
+ case 0x12: /* Data overrun/underrun-The target attempted to transfer more data
+@@ -3126,49 +2808,196 @@
+ case 0x16: /* Invalid SCB Operation Code. */
+
+ default:
+- printk("ini9100u: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat);
+- pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */
++ printk("ini9100u: %x %x\n", cblk->hastat, cblk->tastat);
++ cblk->hastat = DID_ERROR; /* Couldn't find any better */
+ break;
+ }
+
+- pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16);
+-
+- if (pSRB == NULL) {
+- printk("pSRB is NULL\n");
+- }
+-
+- i91u_unmap_cmnd(pHCB->pci_dev, pSRB);
+- pSRB->scsi_done(pSRB); /* Notify system DONE */
+-
+- tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */
++ cmnd->result = cblk->tastat | (cblk->hastat << 16);
++ WARN_ON(cmnd == NULL);
++ i91u_unmap_scb(host->pci_dev, cmnd);
++ cmnd->scsi_done(cmnd); /* Notify system DONE */
++ initio_release_scb(host, cblk); /* Release SCB for current channel */
+ }
+
+-/*
+- * Release ressources
+- */
+-static int i91u_release(struct Scsi_Host *hreg)
+-{
+- free_irq(hreg->irq, hreg);
+- release_region(hreg->io_port, 256);
+- return 0;
+-}
+-MODULE_LICENSE("Dual BSD/GPL");
+-
+-static struct scsi_host_template driver_template = {
++static struct scsi_host_template initio_template = {
+ .proc_name = "INI9100U",
+- .name = i91u_REVID,
+- .detect = i91u_detect,
+- .release = i91u_release,
++ .name = "Initio INI-9X00U/UW SCSI device driver",
+ .queuecommand = i91u_queuecommand,
+-// .abort = i91u_abort,
+-// .reset = i91u_reset,
+ .eh_bus_reset_handler = i91u_bus_reset,
+ .bios_param = i91u_biosparam,
+- .can_queue = 1,
++ .can_queue = MAX_TARGETS * i91u_MAXQUEUE,
+ .this_id = 1,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 1,
+ .use_clustering = ENABLE_CLUSTERING,
+ };
+-#include "scsi_module.c"
+
++static int initio_probe_one(struct pci_dev *pdev,
++ const struct pci_device_id *id)
++{
++ struct Scsi_Host *shost;
++ struct initio_host *host;
++ u32 reg;
++ u16 bios_seg;
++ struct scsi_ctrl_blk *scb, *tmp, *prev = NULL /* silence gcc */;
++ int num_scb, i, error;
++
++ error = pci_enable_device(pdev);
++ if (error)
++ return error;
++
++ pci_read_config_dword(pdev, 0x44, (u32 *) & reg);
++ bios_seg = (u16) (reg & 0xFF);
++ if (((reg & 0xFF00) >> 8) == 0xFF)
++ reg = 0;
++ bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8));
++
++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
++ printk(KERN_WARNING "i91u: Could not set 32 bit DMA mask\n");
++ error = -ENODEV;
++ goto out_disable_device;
++ }
++ shost = scsi_host_alloc(&initio_template, sizeof(struct initio_host));
++ if (!shost) {
++ printk(KERN_WARNING "initio: Could not allocate host structure.\n");
++ error = -ENOMEM;
++ goto out_disable_device;
++ }
++ host = (struct initio_host *)shost->hostdata;
++ memset(host, 0, sizeof(struct initio_host));
++
++ if (!request_region(host->addr, 256, "i91u")) {
++ printk(KERN_WARNING "initio: I/O port range 0x%x is busy.\n", host->addr);
++ error = -ENODEV;
++ goto out_host_put;
++ }
++
++ if (initio_tag_enable) /* 1.01i */
++ num_scb = MAX_TARGETS * i91u_MAXQUEUE;
++ else
++ num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */
++
++ for (; num_scb >= MAX_TARGETS + 3; num_scb--) {
++ i = num_scb * sizeof(struct scsi_ctrl_blk);
++ if ((scb = kzalloc(i, GFP_DMA)) != NULL)
++ break;
++ }
++
++ if (!scb) {
++ printk(KERN_WARNING "initio: Cannot allocate SCB array.\n");
++ error = -ENOMEM;
++ goto out_release_region;
++ }
++
++ host->num_scbs = num_scb;
++ host->scb = scb;
++ host->next_pending = scb;
++ host->next_avail = scb;
++ for (i = 0, tmp = scb; i < num_scb; i++, tmp++) {
++ tmp->tagid = i;
++ if (i != 0)
++ prev->next = tmp;
++ prev = tmp;
++ }
++ prev->next = NULL;
++ host->scb_end = tmp;
++ host->first_avail = scb;
++ host->last_avail = prev;
++
++ initio_init(host, phys_to_virt(bios_seg << 4));
++
++ host->jsstatus0 = 0;
++
++ shost->io_port = host->addr;
++ shost->n_io_port = 0xff;
++ shost->can_queue = num_scb; /* 03/05/98 */
++ shost->unique_id = host->addr;
++ shost->max_id = host->max_tar;
++ shost->max_lun = 32; /* 10/21/97 */
++ shost->irq = pdev->irq;
++ shost->this_id = host->scsi_id; /* Assign HCS index */
++ shost->base = host->addr;
++ shost->sg_tablesize = TOTAL_SG_ENTRY;
++
++ error = request_irq(pdev->irq, i91u_intr, IRQF_DISABLED|IRQF_SHARED, "i91u", shost);
++ if (error < 0) {
++ printk(KERN_WARNING "initio: Unable to request IRQ %d\n", pdev->irq);
++ goto out_free_scbs;
++ }
++
++ pci_set_drvdata(pdev, shost);
++ host->pci_dev = pdev;
++
++ error = scsi_add_host(shost, &pdev->dev);
++ if (error)
++ goto out_free_irq;
++ scsi_scan_host(shost);
++ return 0;
++out_free_irq:
++ free_irq(pdev->irq, shost);
++out_free_scbs:
++ kfree(host->scb);
++out_release_region:
++ release_region(host->addr, 256);
++out_host_put:
++ scsi_host_put(shost);
++out_disable_device:
++ pci_disable_device(pdev);
++ return error;
++}
++
++/**
++ * initio_remove_one - control shutdown
++ * @pdev: PCI device being released
++ *
++ * Release the resources assigned to this adapter after it has
++ * finished being used.
++ */
++
++static void initio_remove_one(struct pci_dev *pdev)
++{
++ struct Scsi_Host *host = pci_get_drvdata(pdev);
++ struct initio_host *s = (struct initio_host *)host->hostdata;
++ scsi_remove_host(host);
++ free_irq(pdev->irq, host);
++ release_region(s->addr, 256);
++ scsi_host_put(host);
++ pci_disable_device(pdev);
++}
++
++MODULE_LICENSE("GPL");
++
++static struct pci_device_id initio_pci_tbl[] = {
++ {PCI_VENDOR_ID_INIT, 0x9500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {PCI_VENDOR_ID_INIT, 0x9400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {PCI_VENDOR_ID_INIT, 0x9401, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {PCI_VENDOR_ID_INIT, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {PCI_VENDOR_ID_DOMEX, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++ {0,}
++};
++MODULE_DEVICE_TABLE(pci, initio_pci_tbl);
++
++static struct pci_driver initio_pci_driver = {
++ .name = "initio",
++ .id_table = initio_pci_tbl,
++ .probe = initio_probe_one,
++ .remove = __devexit_p(initio_remove_one),
++};
++
++static int __init initio_init_driver(void)
++{
++ return pci_register_driver(&initio_pci_driver);
++}
++
++static void __exit initio_exit_driver(void)
++{
++ pci_unregister_driver(&initio_pci_driver);
++}
++
++MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver");
++MODULE_AUTHOR("Initio Corporation");
++MODULE_LICENSE("GPL");
++
++module_init(initio_init_driver);
++module_exit(initio_exit_driver);
+diff -Nurb linux-2.6.22-570/drivers/scsi/initio.h linux-2.6.22-591/drivers/scsi/initio.h
+--- linux-2.6.22-570/drivers/scsi/initio.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/initio.h 2007-12-21 15:36:12.000000000 -0500
+@@ -4,6 +4,8 @@
+ * Copyright (c) 1994-1998 Initio Corporation
+ * All rights reserved.
+ *
++ * Cleanups (c) Copyright 2007 Red Hat <alan@redhat.com>
++ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+@@ -18,27 +20,6 @@
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- * notice, this list of conditions, and the following disclaimer,
+- * without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimer in the
+- * documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- * derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -56,17 +37,6 @@
+
+ #include <linux/types.h>
+
+-#define ULONG unsigned long
+-#define USHORT unsigned short
+-#define UCHAR unsigned char
+-#define BYTE unsigned char
+-#define WORD unsigned short
+-#define DWORD unsigned long
+-#define UBYTE unsigned char
+-#define UWORD unsigned short
+-#define UDWORD unsigned long
+-#define U32 u32
+-
+ #define TOTAL_SG_ENTRY 32
+ #define MAX_SUPPORTED_ADAPTERS 8
+ #define MAX_OFFSET 15
+@@ -368,55 +338,55 @@
+ /************************************************************************/
+ /* Scatter-Gather Element Structure */
+ /************************************************************************/
+-typedef struct SG_Struc {
+- U32 SG_Ptr; /* Data Pointer */
+- U32 SG_Len; /* Data Length */
+-} SG;
++struct sg_entry {
++ u32 data; /* Data Pointer */
++ u32 len; /* Data Length */
++};
+
+ /***********************************************************************
+ SCSI Control Block
+ ************************************************************************/
+-typedef struct Scsi_Ctrl_Blk {
+- struct Scsi_Ctrl_Blk *SCB_NxtScb;
+- UBYTE SCB_Status; /*4 */
+- UBYTE SCB_NxtStat; /*5 */
+- UBYTE SCB_Mode; /*6 */
+- UBYTE SCB_Msgin; /*7 SCB_Res0 */
+- UWORD SCB_SGIdx; /*8 */
+- UWORD SCB_SGMax; /*A */
++struct scsi_ctrl_blk {
++ struct scsi_ctrl_blk *next;
++ u8 status; /*4 */
++ u8 next_state; /*5 */
++ u8 mode; /*6 */
++ u8 msgin; /*7 SCB_Res0 */
++ u16 sgidx; /*8 */
++ u16 sgmax; /*A */
+ #ifdef ALPHA
+- U32 SCB_Reserved[2]; /*C */
++ u32 reserved[2]; /*C */
+ #else
+- U32 SCB_Reserved[3]; /*C */
++ u32 reserved[3]; /*C */
+ #endif
+
+- U32 SCB_XferLen; /*18 Current xfer len */
+- U32 SCB_TotXLen; /*1C Total xfer len */
+- U32 SCB_PAddr; /*20 SCB phy. Addr. */
+-
+- UBYTE SCB_Opcode; /*24 SCB command code */
+- UBYTE SCB_Flags; /*25 SCB Flags */
+- UBYTE SCB_Target; /*26 Target Id */
+- UBYTE SCB_Lun; /*27 Lun */
+- U32 SCB_BufPtr; /*28 Data Buffer Pointer */
+- U32 SCB_BufLen; /*2C Data Allocation Length */
+- UBYTE SCB_SGLen; /*30 SG list # */
+- UBYTE SCB_SenseLen; /*31 Sense Allocation Length */
+- UBYTE SCB_HaStat; /*32 */
+- UBYTE SCB_TaStat; /*33 */
+- UBYTE SCB_CDBLen; /*34 CDB Length */
+- UBYTE SCB_Ident; /*35 Identify */
+- UBYTE SCB_TagMsg; /*36 Tag Message */
+- UBYTE SCB_TagId; /*37 Queue Tag */
+- UBYTE SCB_CDB[12]; /*38 */
+- U32 SCB_SGPAddr; /*44 SG List/Sense Buf phy. Addr. */
+- U32 SCB_SensePtr; /*48 Sense data pointer */
+- void (*SCB_Post) (BYTE *, BYTE *); /*4C POST routine */
+- struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */
+- SG SCB_SGList[TOTAL_SG_ENTRY]; /*54 Start of SG list */
+-} SCB;
++ u32 xferlen; /*18 Current xfer len */
++ u32 totxlen; /*1C Total xfer len */
++ u32 paddr; /*20 SCB phy. Addr. */
++
++ u8 opcode; /*24 SCB command code */
++ u8 flags; /*25 SCB Flags */
++ u8 target; /*26 Target Id */
++ u8 lun; /*27 Lun */
++ u32 bufptr; /*28 Data Buffer Pointer */
++ u32 buflen; /*2C Data Allocation Length */
++ u8 sglen; /*30 SG list # */
++ u8 senselen; /*31 Sense Allocation Length */
++ u8 hastat; /*32 */
++ u8 tastat; /*33 */
++ u8 cdblen; /*34 CDB Length */
++ u8 ident; /*35 Identify */
++ u8 tagmsg; /*36 Tag Message */
++ u8 tagid; /*37 Queue Tag */
++ u8 cdb[12]; /*38 */
++ u32 sgpaddr; /*44 SG List/Sense Buf phy. Addr. */
++ u32 senseptr; /*48 Sense data pointer */
++ void (*post) (u8 *, u8 *); /*4C POST routine */
++ struct scsi_cmnd *srb; /*50 SRB Pointer */
++ struct sg_entry sglist[TOTAL_SG_ENTRY]; /*54 Start of SG list */
++};
+
+-/* Bit Definition for SCB_Status */
++/* Bit Definition for status */
+ #define SCB_RENT 0x01
+ #define SCB_PEND 0x02
+ #define SCB_CONTIG 0x04 /* Contigent Allegiance */
+@@ -425,17 +395,17 @@
+ #define SCB_DONE 0x20
+
+
+-/* Opcodes of SCB_Opcode */
++/* Opcodes for opcode */
+ #define ExecSCSI 0x1
+ #define BusDevRst 0x2
+ #define AbortCmd 0x3
+
+
+-/* Bit Definition for SCB_Mode */
++/* Bit Definition for mode */
+ #define SCM_RSENS 0x01 /* request sense mode */
+
+
+-/* Bit Definition for SCB_Flags */
++/* Bit Definition for flags */
+ #define SCF_DONE 0x01
+ #define SCF_POST 0x02
+ #define SCF_SENSE 0x04
+@@ -492,15 +462,14 @@
+ Target Device Control Structure
+ **********************************************************************/
+
+-typedef struct Tar_Ctrl_Struc {
+- UWORD TCS_Flags; /* 0 */
+- UBYTE TCS_JS_Period; /* 2 */
+- UBYTE TCS_SConfig0; /* 3 */
+-
+- UWORD TCS_DrvFlags; /* 4 */
+- UBYTE TCS_DrvHead; /* 6 */
+- UBYTE TCS_DrvSector; /* 7 */
+-} TCS;
++struct target_control {
++ u16 flags;
++ u8 js_period;
++ u8 sconfig0;
++ u16 drv_flags;
++ u8 heads;
++ u8 sectors;
++};
+
+ /***********************************************************************
+ Target Device Control Structure
+@@ -523,62 +492,53 @@
+ #define TCF_DRV_EN_TAG 0x0800
+ #define TCF_DRV_255_63 0x0400
+
+-typedef struct I91u_Adpt_Struc {
+- UWORD ADPT_BIOS; /* 0 */
+- UWORD ADPT_BASE; /* 1 */
+- UBYTE ADPT_Bus; /* 2 */
+- UBYTE ADPT_Device; /* 3 */
+- UBYTE ADPT_INTR; /* 4 */
+-} INI_ADPT_STRUCT;
+-
+-
+ /***********************************************************************
+ Host Adapter Control Structure
+ ************************************************************************/
+-typedef struct Ha_Ctrl_Struc {
+- UWORD HCS_Base; /* 00 */
+- UWORD HCS_BIOS; /* 02 */
+- UBYTE HCS_Intr; /* 04 */
+- UBYTE HCS_SCSI_ID; /* 05 */
+- UBYTE HCS_MaxTar; /* 06 */
+- UBYTE HCS_NumScbs; /* 07 */
+-
+- UBYTE HCS_Flags; /* 08 */
+- UBYTE HCS_Index; /* 09 */
+- UBYTE HCS_HaId; /* 0A */
+- UBYTE HCS_Config; /* 0B */
+- UWORD HCS_IdMask; /* 0C */
+- UBYTE HCS_Semaph; /* 0E */
+- UBYTE HCS_Phase; /* 0F */
+- UBYTE HCS_JSStatus0; /* 10 */
+- UBYTE HCS_JSInt; /* 11 */
+- UBYTE HCS_JSStatus1; /* 12 */
+- UBYTE HCS_SConf1; /* 13 */
+-
+- UBYTE HCS_Msg[8]; /* 14 */
+- SCB *HCS_NxtAvail; /* 1C */
+- SCB *HCS_Scb; /* 20 */
+- SCB *HCS_ScbEnd; /* 24 */
+- SCB *HCS_NxtPend; /* 28 */
+- SCB *HCS_NxtContig; /* 2C */
+- SCB *HCS_ActScb; /* 30 */
+- TCS *HCS_ActTcs; /* 34 */
+-
+- SCB *HCS_FirstAvail; /* 38 */
+- SCB *HCS_LastAvail; /* 3C */
+- SCB *HCS_FirstPend; /* 40 */
+- SCB *HCS_LastPend; /* 44 */
+- SCB *HCS_FirstBusy; /* 48 */
+- SCB *HCS_LastBusy; /* 4C */
+- SCB *HCS_FirstDone; /* 50 */
+- SCB *HCS_LastDone; /* 54 */
+- UBYTE HCS_MaxTags[16]; /* 58 */
+- UBYTE HCS_ActTags[16]; /* 68 */
+- TCS HCS_Tcs[MAX_TARGETS]; /* 78 */
+- spinlock_t HCS_AvailLock;
+- spinlock_t HCS_SemaphLock;
++struct initio_host {
++ u16 addr; /* 00 */
++ u16 bios_addr; /* 02 */
++ u8 irq; /* 04 */
++ u8 scsi_id; /* 05 */
++ u8 max_tar; /* 06 */
++ u8 num_scbs; /* 07 */
++
++ u8 flags; /* 08 */
++ u8 index; /* 09 */
++ u8 ha_id; /* 0A */
++ u8 config; /* 0B */
++ u16 idmask; /* 0C */
++ u8 semaph; /* 0E */
++ u8 phase; /* 0F */
++ u8 jsstatus0; /* 10 */
++ u8 jsint; /* 11 */
++ u8 jsstatus1; /* 12 */
++ u8 sconf1; /* 13 */
++
++ u8 msg[8]; /* 14 */
++ struct scsi_ctrl_blk *next_avail; /* 1C */
++ struct scsi_ctrl_blk *scb; /* 20 */
++ struct scsi_ctrl_blk *scb_end; /* 24 */ /*UNUSED*/
++ struct scsi_ctrl_blk *next_pending; /* 28 */
++ struct scsi_ctrl_blk *next_contig; /* 2C */ /*UNUSED*/
++ struct scsi_ctrl_blk *active; /* 30 */
++ struct target_control *active_tc; /* 34 */
++
++ struct scsi_ctrl_blk *first_avail; /* 38 */
++ struct scsi_ctrl_blk *last_avail; /* 3C */
++ struct scsi_ctrl_blk *first_pending; /* 40 */
++ struct scsi_ctrl_blk *last_pending; /* 44 */
++ struct scsi_ctrl_blk *first_busy; /* 48 */
++ struct scsi_ctrl_blk *last_busy; /* 4C */
++ struct scsi_ctrl_blk *first_done; /* 50 */
++ struct scsi_ctrl_blk *last_done; /* 54 */
++ u8 max_tags[16]; /* 58 */
++ u8 act_tags[16]; /* 68 */
++ struct target_control targets[MAX_TARGETS]; /* 78 */
++ spinlock_t avail_lock;
++ spinlock_t semaph_lock;
+ struct pci_dev *pci_dev;
+-} HCS;
++};
+
+ /* Bit Definition for HCB_Config */
+ #define HCC_SCSI_RESET 0x01
+@@ -599,47 +559,47 @@
+ *******************************************************************/
+
+ typedef struct _NVRAM_SCSI { /* SCSI channel configuration */
+- UCHAR NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */
+- UCHAR NVM_ChConfig1; /* 0Dh -> Channel config 1 */
+- UCHAR NVM_ChConfig2; /* 0Eh -> Channel config 2 */
+- UCHAR NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */
++ u8 NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */
++ u8 NVM_ChConfig1; /* 0Dh -> Channel config 1 */
++ u8 NVM_ChConfig2; /* 0Eh -> Channel config 2 */
++ u8 NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */
+ /* SCSI target configuration */
+- UCHAR NVM_Targ0Config; /* 10h -> Target 0 configuration */
+- UCHAR NVM_Targ1Config; /* 11h -> Target 1 configuration */
+- UCHAR NVM_Targ2Config; /* 12h -> Target 2 configuration */
+- UCHAR NVM_Targ3Config; /* 13h -> Target 3 configuration */
+- UCHAR NVM_Targ4Config; /* 14h -> Target 4 configuration */
+- UCHAR NVM_Targ5Config; /* 15h -> Target 5 configuration */
+- UCHAR NVM_Targ6Config; /* 16h -> Target 6 configuration */
+- UCHAR NVM_Targ7Config; /* 17h -> Target 7 configuration */
+- UCHAR NVM_Targ8Config; /* 18h -> Target 8 configuration */
+- UCHAR NVM_Targ9Config; /* 19h -> Target 9 configuration */
+- UCHAR NVM_TargAConfig; /* 1Ah -> Target A configuration */
+- UCHAR NVM_TargBConfig; /* 1Bh -> Target B configuration */
+- UCHAR NVM_TargCConfig; /* 1Ch -> Target C configuration */
+- UCHAR NVM_TargDConfig; /* 1Dh -> Target D configuration */
+- UCHAR NVM_TargEConfig; /* 1Eh -> Target E configuration */
+- UCHAR NVM_TargFConfig; /* 1Fh -> Target F configuration */
++ u8 NVM_Targ0Config; /* 10h -> Target 0 configuration */
++ u8 NVM_Targ1Config; /* 11h -> Target 1 configuration */
++ u8 NVM_Targ2Config; /* 12h -> Target 2 configuration */
++ u8 NVM_Targ3Config; /* 13h -> Target 3 configuration */
++ u8 NVM_Targ4Config; /* 14h -> Target 4 configuration */
++ u8 NVM_Targ5Config; /* 15h -> Target 5 configuration */
++ u8 NVM_Targ6Config; /* 16h -> Target 6 configuration */
++ u8 NVM_Targ7Config; /* 17h -> Target 7 configuration */
++ u8 NVM_Targ8Config; /* 18h -> Target 8 configuration */
++ u8 NVM_Targ9Config; /* 19h -> Target 9 configuration */
++ u8 NVM_TargAConfig; /* 1Ah -> Target A configuration */
++ u8 NVM_TargBConfig; /* 1Bh -> Target B configuration */
++ u8 NVM_TargCConfig; /* 1Ch -> Target C configuration */
++ u8 NVM_TargDConfig; /* 1Dh -> Target D configuration */
++ u8 NVM_TargEConfig; /* 1Eh -> Target E configuration */
++ u8 NVM_TargFConfig; /* 1Fh -> Target F configuration */
+ } NVRAM_SCSI;
+
+ typedef struct _NVRAM {
+ /*----------header ---------------*/
+- USHORT NVM_Signature; /* 0,1: Signature */
+- UCHAR NVM_Size; /* 2: Size of data structure */
+- UCHAR NVM_Revision; /* 3: Revision of data structure */
++ u16 NVM_Signature; /* 0,1: Signature */
++ u8 NVM_Size; /* 2: Size of data structure */
++ u8 NVM_Revision; /* 3: Revision of data structure */
+ /* ----Host Adapter Structure ---- */
+- UCHAR NVM_ModelByte0; /* 4: Model number (byte 0) */
+- UCHAR NVM_ModelByte1; /* 5: Model number (byte 1) */
+- UCHAR NVM_ModelInfo; /* 6: Model information */
+- UCHAR NVM_NumOfCh; /* 7: Number of SCSI channel */
+- UCHAR NVM_BIOSConfig1; /* 8: BIOS configuration 1 */
+- UCHAR NVM_BIOSConfig2; /* 9: BIOS configuration 2 */
+- UCHAR NVM_HAConfig1; /* A: Hoat adapter configuration 1 */
+- UCHAR NVM_HAConfig2; /* B: Hoat adapter configuration 2 */
++ u8 NVM_ModelByte0; /* 4: Model number (byte 0) */
++ u8 NVM_ModelByte1; /* 5: Model number (byte 1) */
++ u8 NVM_ModelInfo; /* 6: Model information */
++ u8 NVM_NumOfCh; /* 7: Number of SCSI channel */
++ u8 NVM_BIOSConfig1; /* 8: BIOS configuration 1 */
++ u8 NVM_BIOSConfig2; /* 9: BIOS configuration 2 */
++ u8 NVM_HAConfig1; /* A: Hoat adapter configuration 1 */
++ u8 NVM_HAConfig2; /* B: Hoat adapter configuration 2 */
+ NVRAM_SCSI NVM_SCSIInfo[2];
+- UCHAR NVM_reserved[10];
++ u8 NVM_reserved[10];
+ /* ---------- CheckSum ---------- */
+- USHORT NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */
++ u16 NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */
+ } NVRAM, *PNVRAM;
+
+ /* Bios Configuration for nvram->BIOSConfig1 */
+@@ -681,19 +641,6 @@
+ #define DISC_ALLOW 0xC0 /* Disconnect is allowed */
+ #define SCSICMD_RequestSense 0x03
+
+-typedef struct _HCSinfo {
+- ULONG base;
+- UCHAR vec;
+- UCHAR bios; /* High byte of BIOS address */
+- USHORT BaseAndBios; /* high byte: pHcsInfo->bios,low byte:pHcsInfo->base */
+-} HCSINFO;
+-
+-#define TUL_RD(x,y) (UCHAR)(inb( (int)((ULONG)(x+y)) ))
+-#define TUL_RDLONG(x,y) (ULONG)(inl((int)((ULONG)(x+y)) ))
+-#define TUL_WR( adr,data) outb( (UCHAR)(data), (int)(adr))
+-#define TUL_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr))
+-#define TUL_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr))
+-
+ #define SCSI_ABORT_SNOOZE 0
+ #define SCSI_ABORT_SUCCESS 1
+ #define SCSI_ABORT_PENDING 2
+diff -Nurb linux-2.6.22-570/drivers/scsi/ipr.c linux-2.6.22-591/drivers/scsi/ipr.c
+--- linux-2.6.22-570/drivers/scsi/ipr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ipr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -540,32 +540,6 @@
+ }
+
+ /**
+- * ipr_unmap_sglist - Unmap scatterlist if mapped
+- * @ioa_cfg: ioa config struct
+- * @ipr_cmd: ipr command struct
+- *
+- * Return value:
+- * nothing
+- **/
+-static void ipr_unmap_sglist(struct ipr_ioa_cfg *ioa_cfg,
+- struct ipr_cmnd *ipr_cmd)
+-{
+- struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+-
+- if (ipr_cmd->dma_use_sg) {
+- if (scsi_cmd->use_sg > 0) {
+- pci_unmap_sg(ioa_cfg->pdev, scsi_cmd->request_buffer,
+- scsi_cmd->use_sg,
+- scsi_cmd->sc_data_direction);
+- } else {
+- pci_unmap_single(ioa_cfg->pdev, ipr_cmd->dma_handle,
+- scsi_cmd->request_bufflen,
+- scsi_cmd->sc_data_direction);
+- }
+- }
+-}
+-
+-/**
+ * ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts
+ * @ioa_cfg: ioa config struct
+ * @clr_ints: interrupts to clear
+@@ -677,7 +651,7 @@
+
+ scsi_cmd->result |= (DID_ERROR << 16);
+
+- ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++ scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ scsi_cmd->scsi_done(scsi_cmd);
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ }
+@@ -2465,6 +2439,7 @@
+ /**
+ * ipr_read_trace - Dump the adapter trace
+ * @kobj: kobject struct
++ * @bin_attr: bin_attribute struct
+ * @buf: buffer
+ * @off: offset
+ * @count: buffer size
+@@ -2472,8 +2447,9 @@
+ * Return value:
+ * number of bytes printed to buffer
+ **/
+-static ssize_t ipr_read_trace(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t ipr_read_trace(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -3166,6 +3142,7 @@
+ /**
+ * ipr_read_dump - Dump the adapter
+ * @kobj: kobject struct
++ * @bin_attr: bin_attribute struct
+ * @buf: buffer
+ * @off: offset
+ * @count: buffer size
+@@ -3173,8 +3150,9 @@
+ * Return value:
+ * number of bytes printed to buffer
+ **/
+-static ssize_t ipr_read_dump(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t ipr_read_dump(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -3327,6 +3305,7 @@
+ /**
+ * ipr_write_dump - Setup dump state of adapter
+ * @kobj: kobject struct
++ * @bin_attr: bin_attribute struct
+ * @buf: buffer
+ * @off: offset
+ * @count: buffer size
+@@ -3334,8 +3313,9 @@
+ * Return value:
+ * number of bytes printed to buffer
+ **/
+-static ssize_t ipr_write_dump(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t ipr_write_dump(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -4292,24 +4272,25 @@
+ static int ipr_build_ioadl(struct ipr_ioa_cfg *ioa_cfg,
+ struct ipr_cmnd *ipr_cmd)
+ {
+- int i;
+- struct scatterlist *sglist;
++ int i, nseg;
++ struct scatterlist *sg;
+ u32 length;
+ u32 ioadl_flags = 0;
+ struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+ struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
+ struct ipr_ioadl_desc *ioadl = ipr_cmd->ioadl;
+
+- length = scsi_cmd->request_bufflen;
+-
+- if (length == 0)
++ length = scsi_bufflen(scsi_cmd);
++ if (!length)
+ return 0;
+
+- if (scsi_cmd->use_sg) {
+- ipr_cmd->dma_use_sg = pci_map_sg(ioa_cfg->pdev,
+- scsi_cmd->request_buffer,
+- scsi_cmd->use_sg,
+- scsi_cmd->sc_data_direction);
++ nseg = scsi_dma_map(scsi_cmd);
++ if (nseg < 0) {
++ dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
++ return -1;
++ }
++
++ ipr_cmd->dma_use_sg = nseg;
+
+ if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) {
+ ioadl_flags = IPR_IOADL_FLAGS_WRITE;
+@@ -4324,8 +4305,6 @@
+ cpu_to_be32(sizeof(struct ipr_ioadl_desc) * ipr_cmd->dma_use_sg);
+ }
+
+- sglist = scsi_cmd->request_buffer;
+-
+ if (ipr_cmd->dma_use_sg <= ARRAY_SIZE(ioarcb->add_data.u.ioadl)) {
+ ioadl = ioarcb->add_data.u.ioadl;
+ ioarcb->write_ioadl_addr =
+@@ -4334,51 +4313,14 @@
+ ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
+ }
+
+- for (i = 0; i < ipr_cmd->dma_use_sg; i++) {
++ scsi_for_each_sg(scsi_cmd, sg, ipr_cmd->dma_use_sg, i) {
+ ioadl[i].flags_and_data_len =
+- cpu_to_be32(ioadl_flags | sg_dma_len(&sglist[i]));
+- ioadl[i].address =
+- cpu_to_be32(sg_dma_address(&sglist[i]));
++ cpu_to_be32(ioadl_flags | sg_dma_len(sg));
++ ioadl[i].address = cpu_to_be32(sg_dma_address(sg));
+ }
+
+- if (likely(ipr_cmd->dma_use_sg)) {
+- ioadl[i-1].flags_and_data_len |=
+- cpu_to_be32(IPR_IOADL_FLAGS_LAST);
+- return 0;
+- } else
+- dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
+- } else {
+- if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) {
+- ioadl_flags = IPR_IOADL_FLAGS_WRITE;
+- ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_WRITE_NOT_READ;
+- ioarcb->write_data_transfer_length = cpu_to_be32(length);
+- ioarcb->write_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc));
+- } else if (scsi_cmd->sc_data_direction == DMA_FROM_DEVICE) {
+- ioadl_flags = IPR_IOADL_FLAGS_READ;
+- ioarcb->read_data_transfer_length = cpu_to_be32(length);
+- ioarcb->read_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc));
+- }
+-
+- ipr_cmd->dma_handle = pci_map_single(ioa_cfg->pdev,
+- scsi_cmd->request_buffer, length,
+- scsi_cmd->sc_data_direction);
+-
+- if (likely(!pci_dma_mapping_error(ipr_cmd->dma_handle))) {
+- ioadl = ioarcb->add_data.u.ioadl;
+- ioarcb->write_ioadl_addr =
+- cpu_to_be32(be32_to_cpu(ioarcb->ioarcb_host_pci_addr) +
+- offsetof(struct ipr_ioarcb, add_data));
+- ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
+- ipr_cmd->dma_use_sg = 1;
+- ioadl[0].flags_and_data_len =
+- cpu_to_be32(ioadl_flags | length | IPR_IOADL_FLAGS_LAST);
+- ioadl[0].address = cpu_to_be32(ipr_cmd->dma_handle);
++ ioadl[i-1].flags_and_data_len |= cpu_to_be32(IPR_IOADL_FLAGS_LAST);
+ return 0;
+- } else
+- dev_err(&ioa_cfg->pdev->dev, "pci_map_single failed!\n");
+- }
+-
+- return -1;
+ }
+
+ /**
+@@ -4441,7 +4383,7 @@
+ res->needs_sync_complete = 1;
+ res->in_erp = 0;
+ }
+- ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++ scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ scsi_cmd->scsi_done(scsi_cmd);
+ }
+@@ -4819,7 +4761,7 @@
+ break;
+ }
+
+- ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++ scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ scsi_cmd->scsi_done(scsi_cmd);
+ }
+@@ -4840,10 +4782,10 @@
+ struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+ u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+
+- scsi_cmd->resid = be32_to_cpu(ipr_cmd->ioasa.residual_data_len);
++ scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->ioasa.residual_data_len));
+
+ if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) {
+- ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++ scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ scsi_cmd->scsi_done(scsi_cmd);
+ } else
+diff -Nurb linux-2.6.22-570/drivers/scsi/ips.c linux-2.6.22-591/drivers/scsi/ips.c
+--- linux-2.6.22-570/drivers/scsi/ips.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ips.c 2007-12-21 15:36:12.000000000 -0500
+@@ -211,19 +211,6 @@
+ #warning "This driver has only been tested on the x86/ia64/x86_64 platforms"
+ #endif
+
+-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+-#include <linux/blk.h>
+-#include "sd.h"
+-#define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags)
+-#define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags)
+-#ifndef __devexit_p
+-#define __devexit_p(x) x
+-#endif
+-#else
+-#define IPS_LOCK_SAVE(lock,flags) do{spin_lock(lock);(void)flags;}while(0)
+-#define IPS_UNLOCK_RESTORE(lock,flags) do{spin_unlock(lock);(void)flags;}while(0)
+-#endif
+-
+ #define IPS_DMA_DIR(scb) ((!scb->scsi_cmd || ips_is_passthru(scb->scsi_cmd) || \
+ DMA_NONE == scb->scsi_cmd->sc_data_direction) ? \
+ PCI_DMA_BIDIRECTIONAL : \
+@@ -381,24 +368,13 @@
+ .eh_abort_handler = ips_eh_abort,
+ .eh_host_reset_handler = ips_eh_reset,
+ .proc_name = "ips",
+-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+ .proc_info = ips_proc_info,
+ .slave_configure = ips_slave_configure,
+-#else
+- .proc_info = ips_proc24_info,
+- .select_queue_depths = ips_select_queue_depth,
+-#endif
+ .bios_param = ips_biosparam,
+ .this_id = -1,
+ .sg_tablesize = IPS_MAX_SG,
+ .cmd_per_lun = 3,
+ .use_clustering = ENABLE_CLUSTERING,
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+- .use_new_eh_code = 1,
+-#endif
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+- .highmem_io = 1,
+-#endif
+ };
+
+
+@@ -731,7 +707,7 @@
+ /* free IRQ */
+ free_irq(ha->irq, ha);
+
+- IPS_REMOVE_HOST(sh);
++ scsi_remove_host(sh);
+ scsi_host_put(sh);
+
+ ips_released_controllers++;
+@@ -813,7 +789,6 @@
+ ips_ha_t *ha;
+ ips_copp_wait_item_t *item;
+ int ret;
+- unsigned long cpu_flags;
+ struct Scsi_Host *host;
+
+ METHOD_TRACE("ips_eh_abort", 1);
+@@ -830,7 +805,7 @@
+ if (!ha->active)
+ return (FAILED);
+
+- IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++ spin_lock(host->host_lock);
+
+ /* See if the command is on the copp queue */
+ item = ha->copp_waitlist.head;
+@@ -851,7 +826,7 @@
+ ret = (FAILED);
+ }
+
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++ spin_unlock(host->host_lock);
+ return ret;
+ }
+
+@@ -1129,7 +1104,7 @@
+ /* A Reset IOCTL is only sent by the boot CD in extreme cases. */
+ /* There can never be any system activity ( network or disk ), but check */
+ /* anyway just as a good practice. */
+- pt = (ips_passthru_t *) SC->request_buffer;
++ pt = (ips_passthru_t *) scsi_sglist(SC);
+ if ((pt->CoppCP.cmd.reset.op_code == IPS_CMD_RESET_CHANNEL) &&
+ (pt->CoppCP.cmd.reset.adapter_flag == 1)) {
+ if (ha->scb_activelist.count != 0) {
+@@ -1176,18 +1151,10 @@
+ /* Set bios geometry for the controller */
+ /* */
+ /****************************************************************************/
+-static int
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-ips_biosparam(Disk * disk, kdev_t dev, int geom[])
+-{
+- ips_ha_t *ha = (ips_ha_t *) disk->device->host->hostdata;
+- unsigned long capacity = disk->capacity;
+-#else
+-ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
++static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ sector_t capacity, int geom[])
+ {
+ ips_ha_t *ha = (ips_ha_t *) sdev->host->hostdata;
+-#endif
+ int heads;
+ int sectors;
+ int cylinders;
+@@ -1225,70 +1192,6 @@
+ return (0);
+ }
+
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-
+-/* ips_proc24_info is a wrapper around ips_proc_info *
+- * for compatibility with the 2.4 scsi parameters */
+-static int
+-ips_proc24_info(char *buffer, char **start, off_t offset, int length,
+- int hostno, int func)
+-{
+- int i;
+-
+- for (i = 0; i < ips_next_controller; i++) {
+- if (ips_sh[i] && ips_sh[i]->host_no == hostno) {
+- return ips_proc_info(ips_sh[i], buffer, start,
+- offset, length, func);
+- }
+- }
+- return -EINVAL;
+-}
+-
+-/****************************************************************************/
+-/* */
+-/* Routine Name: ips_select_queue_depth */
+-/* */
+-/* Routine Description: */
+-/* */
+-/* Select queue depths for the devices on the contoller */
+-/* */
+-/****************************************************************************/
+-static void
+-ips_select_queue_depth(struct Scsi_Host *host, struct scsi_device * scsi_devs)
+-{
+- struct scsi_device *device;
+- ips_ha_t *ha;
+- int count = 0;
+- int min;
+-
+- ha = IPS_HA(host);
+- min = ha->max_cmds / 4;
+-
+- for (device = scsi_devs; device; device = device->next) {
+- if (device->host == host) {
+- if ((device->channel == 0) && (device->type == 0))
+- count++;
+- }
+- }
+-
+- for (device = scsi_devs; device; device = device->next) {
+- if (device->host == host) {
+- if ((device->channel == 0) && (device->type == 0)) {
+- device->queue_depth =
+- (ha->max_cmds - 1) / count;
+- if (device->queue_depth < min)
+- device->queue_depth = min;
+- } else {
+- device->queue_depth = 2;
+- }
+-
+- if (device->queue_depth < 2)
+- device->queue_depth = 2;
+- }
+- }
+-}
+-
+-#else
+ /****************************************************************************/
+ /* */
+ /* Routine Name: ips_slave_configure */
+@@ -1316,7 +1219,6 @@
+ SDptr->skip_ms_page_3f = 1;
+ return 0;
+ }
+-#endif
+
+ /****************************************************************************/
+ /* */
+@@ -1331,7 +1233,6 @@
+ do_ipsintr(int irq, void *dev_id)
+ {
+ ips_ha_t *ha;
+- unsigned long cpu_flags;
+ struct Scsi_Host *host;
+ int irqstatus;
+
+@@ -1347,16 +1248,16 @@
+ return IRQ_HANDLED;
+ }
+
+- IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++ spin_lock(host->host_lock);
+
+ if (!ha->active) {
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++ spin_unlock(host->host_lock);
+ return IRQ_HANDLED;
+ }
+
+ irqstatus = (*ha->func.intr) (ha);
+
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++ spin_unlock(host->host_lock);
+
+ /* start the next command */
+ ips_next(ha, IPS_INTR_ON);
+@@ -1606,15 +1507,8 @@
+ if ((SC->cmnd[0] == IPS_IOCTL_COMMAND) &&
+ (SC->device->channel == 0) &&
+ (SC->device->id == IPS_ADAPTER_ID) &&
+- (SC->device->lun == 0) && SC->request_buffer) {
+- if ((!SC->use_sg) && SC->request_bufflen &&
+- (((char *) SC->request_buffer)[0] == 'C') &&
+- (((char *) SC->request_buffer)[1] == 'O') &&
+- (((char *) SC->request_buffer)[2] == 'P') &&
+- (((char *) SC->request_buffer)[3] == 'P'))
+- return 1;
+- else if (SC->use_sg) {
+- struct scatterlist *sg = SC->request_buffer;
++ (SC->device->lun == 0) && scsi_sglist(SC)) {
++ struct scatterlist *sg = scsi_sglist(SC);
+ char *buffer;
+
+ /* kmap_atomic() ensures addressability of the user buffer.*/
+@@ -1630,7 +1524,6 @@
+ kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+ local_irq_restore(flags);
+ }
+- }
+ return 0;
+ }
+
+@@ -1680,18 +1573,14 @@
+ {
+ ips_passthru_t *pt;
+ int length = 0;
+- int ret;
++ int i, ret;
++ struct scatterlist *sg = scsi_sglist(SC);
+
+ METHOD_TRACE("ips_make_passthru", 1);
+
+- if (!SC->use_sg) {
+- length = SC->request_bufflen;
+- } else {
+- struct scatterlist *sg = SC->request_buffer;
+- int i;
+- for (i = 0; i < SC->use_sg; i++)
++ scsi_for_each_sg(SC, sg, scsi_sg_count(SC), i)
+ length += sg[i].length;
+- }
++
+ if (length < sizeof (ips_passthru_t)) {
+ /* wrong size */
+ DEBUG_VAR(1, "(%s%d) Passthru structure wrong size",
+@@ -2115,7 +2004,7 @@
+
+ METHOD_TRACE("ips_cleanup_passthru", 1);
+
+- if ((!scb) || (!scb->scsi_cmd) || (!scb->scsi_cmd->request_buffer)) {
++ if ((!scb) || (!scb->scsi_cmd) || (!scsi_sglist(scb->scsi_cmd))) {
+ DEBUG_VAR(1, "(%s%d) couldn't cleanup after passthru",
+ ips_name, ha->host_num);
+
+@@ -2730,7 +2619,6 @@
+ struct scsi_cmnd *q;
+ ips_copp_wait_item_t *item;
+ int ret;
+- unsigned long cpu_flags = 0;
+ struct Scsi_Host *host;
+ METHOD_TRACE("ips_next", 1);
+
+@@ -2742,7 +2630,7 @@
+ * this command won't time out
+ */
+ if (intr == IPS_INTR_ON)
+- IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++ spin_lock(host->host_lock);
+
+ if ((ha->subsys->param[3] & 0x300000)
+ && (ha->scb_activelist.count == 0)) {
+@@ -2769,14 +2657,14 @@
+ item = ips_removeq_copp_head(&ha->copp_waitlist);
+ ha->num_ioctl++;
+ if (intr == IPS_INTR_ON)
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++ spin_unlock(host->host_lock);
+ scb->scsi_cmd = item->scsi_cmd;
+ kfree(item);
+
+ ret = ips_make_passthru(ha, scb->scsi_cmd, scb, intr);
+
+ if (intr == IPS_INTR_ON)
+- IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++ spin_lock(host->host_lock);
+ switch (ret) {
+ case IPS_FAILURE:
+ if (scb->scsi_cmd) {
+@@ -2846,7 +2734,7 @@
+ SC = ips_removeq_wait(&ha->scb_waitlist, q);
+
+ if (intr == IPS_INTR_ON)
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); /* Unlock HA after command is taken off queue */
++ spin_unlock(host->host_lock); /* Unlock HA after command is taken off queue */
+
+ SC->result = DID_OK;
+ SC->host_scribble = NULL;
+@@ -2866,43 +2754,28 @@
+ /* copy in the CDB */
+ memcpy(scb->cdb, SC->cmnd, SC->cmd_len);
+
+- /* Now handle the data buffer */
+- if (SC->use_sg) {
++ scb->sg_count = scsi_dma_map(SC);
++ BUG_ON(scb->sg_count < 0);
++ if (scb->sg_count) {
+ struct scatterlist *sg;
+ int i;
+
+- sg = SC->request_buffer;
+- scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg,
+- SC->sc_data_direction);
+ scb->flags |= IPS_SCB_MAP_SG;
+- for (i = 0; i < scb->sg_count; i++) {
++
++ scsi_for_each_sg(SC, sg, scb->sg_count, i) {
+ if (ips_fill_scb_sg_single
+- (ha, sg_dma_address(&sg[i]), scb, i,
+- sg_dma_len(&sg[i])) < 0)
++ (ha, sg_dma_address(sg), scb, i,
++ sg_dma_len(sg)) < 0)
+ break;
+ }
+ scb->dcdb.transfer_length = scb->data_len;
+ } else {
+- if (SC->request_bufflen) {
+- scb->data_busaddr =
+- pci_map_single(ha->pcidev,
+- SC->request_buffer,
+- SC->request_bufflen,
+- SC->sc_data_direction);
+- scb->flags |= IPS_SCB_MAP_SINGLE;
+- ips_fill_scb_sg_single(ha, scb->data_busaddr,
+- scb, 0,
+- SC->request_bufflen);
+- scb->dcdb.transfer_length = scb->data_len;
+- } else {
+ scb->data_busaddr = 0L;
+ scb->sg_len = 0;
+ scb->data_len = 0;
+ scb->dcdb.transfer_length = 0;
+ }
+
+- }
+-
+ scb->dcdb.cmd_attribute =
+ ips_command_direction[scb->scsi_cmd->cmnd[0]];
+
+@@ -2919,7 +2792,7 @@
+ scb->dcdb.transfer_length = 0;
+ }
+ if (intr == IPS_INTR_ON)
+- IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++ spin_lock(host->host_lock);
+
+ ret = ips_send_cmd(ha, scb);
+
+@@ -2958,7 +2831,7 @@
+ } /* end while */
+
+ if (intr == IPS_INTR_ON)
+- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++ spin_unlock(host->host_lock);
+ }
+
+ /****************************************************************************/
+@@ -3377,29 +3250,24 @@
+ * the rest of the data and continue.
+ */
+ if ((scb->breakup) || (scb->sg_break)) {
++ struct scatterlist *sg;
++ int sg_dma_index, ips_sg_index = 0;
++
+ /* we had a data breakup */
+ scb->data_len = 0;
+
+- if (scb->sg_count) {
+- /* S/G request */
+- struct scatterlist *sg;
+- int ips_sg_index = 0;
+- int sg_dma_index;
+-
+- sg = scb->scsi_cmd->request_buffer;
++ sg = scsi_sglist(scb->scsi_cmd);
+
+ /* Spin forward to last dma chunk */
+ sg_dma_index = scb->breakup;
+
+ /* Take care of possible partial on last chunk */
+ ips_fill_scb_sg_single(ha,
+- sg_dma_address(&sg
+- [sg_dma_index]),
++ sg_dma_address(&sg[sg_dma_index]),
+ scb, ips_sg_index++,
+- sg_dma_len(&sg
+- [sg_dma_index]));
++ sg_dma_len(&sg[sg_dma_index]));
+
+- for (; sg_dma_index < scb->sg_count;
++ for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd);
+ sg_dma_index++) {
+ if (ips_fill_scb_sg_single
+ (ha,
+@@ -3407,21 +3275,6 @@
+ scb, ips_sg_index++,
+ sg_dma_len(&sg[sg_dma_index])) < 0)
+ break;
+-
+- }
+-
+- } else {
+- /* Non S/G Request */
+- (void) ips_fill_scb_sg_single(ha,
+- scb->
+- data_busaddr +
+- (scb->sg_break *
+- ha->max_xfer),
+- scb, 0,
+- scb->scsi_cmd->
+- request_bufflen -
+- (scb->sg_break *
+- ha->max_xfer));
+ }
+
+ scb->dcdb.transfer_length = scb->data_len;
+@@ -3653,15 +3506,15 @@
+ static void
+ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count)
+ {
+- if (scmd->use_sg) {
+ int i;
+ unsigned int min_cnt, xfer_cnt;
+ char *cdata = (char *) data;
+ unsigned char *buffer;
+ unsigned long flags;
+- struct scatterlist *sg = scmd->request_buffer;
++ struct scatterlist *sg = scsi_sglist(scmd);
++
+ for (i = 0, xfer_cnt = 0;
+- (i < scmd->use_sg) && (xfer_cnt < count); i++) {
++ (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) {
+ min_cnt = min(count - xfer_cnt, sg[i].length);
+
+ /* kmap_atomic() ensures addressability of the data buffer.*/
+@@ -3674,11 +3527,6 @@
+
+ xfer_cnt += min_cnt;
+ }
+-
+- } else {
+- unsigned int min_cnt = min(count, scmd->request_bufflen);
+- memcpy(scmd->request_buffer, data, min_cnt);
+- }
+ }
+
+ /****************************************************************************/
+@@ -3691,15 +3539,15 @@
+ static void
+ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count)
+ {
+- if (scmd->use_sg) {
+ int i;
+ unsigned int min_cnt, xfer_cnt;
+ char *cdata = (char *) data;
+ unsigned char *buffer;
+ unsigned long flags;
+- struct scatterlist *sg = scmd->request_buffer;
++ struct scatterlist *sg = scsi_sglist(scmd);
++
+ for (i = 0, xfer_cnt = 0;
+- (i < scmd->use_sg) && (xfer_cnt < count); i++) {
++ (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) {
+ min_cnt = min(count - xfer_cnt, sg[i].length);
+
+ /* kmap_atomic() ensures addressability of the data buffer.*/
+@@ -3712,11 +3560,6 @@
+
+ xfer_cnt += min_cnt;
+ }
+-
+- } else {
+- unsigned int min_cnt = min(count, scmd->request_bufflen);
+- memcpy(data, scmd->request_buffer, min_cnt);
+- }
+ }
+
+ /****************************************************************************/
+@@ -4350,7 +4193,7 @@
+
+ METHOD_TRACE("ips_rdcap", 1);
+
+- if (scb->scsi_cmd->request_bufflen < 8)
++ if (scsi_bufflen(scb->scsi_cmd) < 8)
+ return (0);
+
+ cap.lba =
+@@ -4735,8 +4578,7 @@
+
+ METHOD_TRACE("ips_freescb", 1);
+ if (scb->flags & IPS_SCB_MAP_SG)
+- pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer,
+- scb->scsi_cmd->use_sg, IPS_DMA_DIR(scb));
++ scsi_dma_unmap(scb->scsi_cmd);
+ else if (scb->flags & IPS_SCB_MAP_SINGLE)
+ pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len,
+ IPS_DMA_DIR(scb));
+@@ -7004,7 +6846,6 @@
+ kfree(oldha);
+ ips_sh[index] = sh;
+ ips_ha[index] = ha;
+- IPS_SCSI_SET_DEVICE(sh, ha);
+
+ /* Store away needed values for later use */
+ sh->io_port = ha->io_addr;
+@@ -7016,17 +6857,16 @@
+ sh->cmd_per_lun = sh->hostt->cmd_per_lun;
+ sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma;
+ sh->use_clustering = sh->hostt->use_clustering;
+-
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7)
+ sh->max_sectors = 128;
+-#endif
+
+ sh->max_id = ha->ntargets;
+ sh->max_lun = ha->nlun;
+ sh->max_channel = ha->nbus - 1;
+ sh->can_queue = ha->max_cmds - 1;
+
+- IPS_ADD_HOST(sh, NULL);
++ scsi_add_host(sh, NULL);
++ scsi_scan_host(sh);
++
+ return 0;
+ }
+
+@@ -7069,7 +6909,7 @@
+ return -ENODEV;
+ ips_driver_template.module = THIS_MODULE;
+ ips_order_controllers();
+- if (IPS_REGISTER_HOSTS(&ips_driver_template)) {
++ if (!ips_detect(&ips_driver_template)) {
+ pci_unregister_driver(&ips_pci_driver);
+ return -ENODEV;
+ }
+@@ -7087,7 +6927,6 @@
+ static void __exit
+ ips_module_exit(void)
+ {
+- IPS_UNREGISTER_HOSTS(&ips_driver_template);
+ pci_unregister_driver(&ips_pci_driver);
+ unregister_reboot_notifier(&ips_notifier);
+ }
+@@ -7443,15 +7282,9 @@
+ return SUCCESS;
+ }
+
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9)
+ MODULE_LICENSE("GPL");
+-#endif
+-
+ MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING);
+-
+-#ifdef MODULE_VERSION
+ MODULE_VERSION(IPS_VER_STRING);
+-#endif
+
+
+ /*
+diff -Nurb linux-2.6.22-570/drivers/scsi/ips.h linux-2.6.22-591/drivers/scsi/ips.h
+--- linux-2.6.22-570/drivers/scsi/ips.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ips.h 2007-12-21 15:36:12.000000000 -0500
+@@ -58,10 +58,6 @@
+ /*
+ * Some handy macros
+ */
+- #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined CONFIG_HIGHIO
+- #define IPS_HIGHIO
+- #endif
+-
+ #define IPS_HA(x) ((ips_ha_t *) x->hostdata)
+ #define IPS_COMMAND_ID(ha, scb) (int) (scb - ha->scbs)
+ #define IPS_IS_TROMBONE(ha) (((ha->device_id == IPS_DEVICEID_COPPERHEAD) && \
+@@ -84,38 +80,8 @@
+ #define IPS_SGLIST_SIZE(ha) (IPS_USE_ENH_SGLIST(ha) ? \
+ sizeof(IPS_ENH_SG_LIST) : sizeof(IPS_STD_SG_LIST))
+
+- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4)
+- #define pci_set_dma_mask(dev,mask) ( mask > 0xffffffff ? 1:0 )
+- #define scsi_set_pci_device(sh,dev) (0)
+- #endif
+-
+- #ifndef IRQ_NONE
+- typedef void irqreturn_t;
+- #define IRQ_NONE
+- #define IRQ_HANDLED
+- #define IRQ_RETVAL(x)
+- #endif
+-
+- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+- #define IPS_REGISTER_HOSTS(SHT) scsi_register_module(MODULE_SCSI_HA,SHT)
+- #define IPS_UNREGISTER_HOSTS(SHT) scsi_unregister_module(MODULE_SCSI_HA,SHT)
+- #define IPS_ADD_HOST(shost,device)
+- #define IPS_REMOVE_HOST(shost)
+- #define IPS_SCSI_SET_DEVICE(sh,ha) scsi_set_pci_device(sh, (ha)->pcidev)
+- #define IPS_PRINTK(level, pcidev, format, arg...) \
+- printk(level "%s %s:" format , "ips" , \
+- (pcidev)->slot_name , ## arg)
+- #define scsi_host_alloc(sh,size) scsi_register(sh,size)
+- #define scsi_host_put(sh) scsi_unregister(sh)
+- #else
+- #define IPS_REGISTER_HOSTS(SHT) (!ips_detect(SHT))
+- #define IPS_UNREGISTER_HOSTS(SHT)
+- #define IPS_ADD_HOST(shost,device) do { scsi_add_host(shost,device); scsi_scan_host(shost); } while (0)
+- #define IPS_REMOVE_HOST(shost) scsi_remove_host(shost)
+- #define IPS_SCSI_SET_DEVICE(sh,ha) do { } while (0)
+ #define IPS_PRINTK(level, pcidev, format, arg...) \
+ dev_printk(level , &((pcidev)->dev) , format , ## arg)
+- #endif
+
+ #define MDELAY(n) \
+ do { \
+@@ -134,7 +100,7 @@
+ #define pci_dma_hi32(a) ((a >> 16) >> 16)
+ #define pci_dma_lo32(a) (a & 0xffffffff)
+
+- #if (BITS_PER_LONG > 32) || (defined CONFIG_HIGHMEM64G && defined IPS_HIGHIO)
++ #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G)
+ #define IPS_ENABLE_DMA64 (1)
+ #else
+ #define IPS_ENABLE_DMA64 (0)
+@@ -451,16 +417,10 @@
+ /*
+ * Scsi_Host Template
+ */
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+- static int ips_proc24_info(char *, char **, off_t, int, int, int);
+- static void ips_select_queue_depth(struct Scsi_Host *, struct scsi_device *);
+- static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]);
+-#else
+ static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
+ static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ sector_t capacity, int geom[]);
+ static int ips_slave_configure(struct scsi_device *SDptr);
+-#endif
+
+ /*
+ * Raid Command Formats
+diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.c linux-2.6.22-591/drivers/scsi/iscsi_tcp.c
+--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -29,14 +29,15 @@
+ #include <linux/types.h>
+ #include <linux/list.h>
+ #include <linux/inet.h>
++#include <linux/file.h>
+ #include <linux/blkdev.h>
+ #include <linux/crypto.h>
+ #include <linux/delay.h>
+ #include <linux/kfifo.h>
+ #include <linux/scatterlist.h>
+-#include <linux/mutex.h>
+ #include <net/tcp.h>
+ #include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_transport_iscsi.h>
+@@ -109,7 +110,7 @@
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+
+ crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc);
+- buf->sg.length = tcp_conn->hdr_size;
++ buf->sg.length += sizeof(u32);
+ }
+
+ static inline int
+@@ -211,16 +212,14 @@
+ static int
+ iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+- int rc;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+ struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
+ struct iscsi_session *session = conn->session;
++ struct scsi_cmnd *sc = ctask->sc;
+ int datasn = be32_to_cpu(rhdr->datasn);
+
+- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+- if (rc)
+- return rc;
++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
+ /*
+ * setup Data-In byte counter (gets decremented..)
+ */
+@@ -229,31 +228,36 @@
+ if (tcp_conn->in.datalen == 0)
+ return 0;
+
+- if (ctask->datasn != datasn)
++ if (tcp_ctask->exp_datasn != datasn) {
++ debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
++ __FUNCTION__, tcp_ctask->exp_datasn, datasn);
+ return ISCSI_ERR_DATASN;
++ }
+
+- ctask->datasn++;
++ tcp_ctask->exp_datasn++;
+
+ tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
+- if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length)
++ if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) {
++ debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
++ __FUNCTION__, tcp_ctask->data_offset,
++ tcp_conn->in.datalen, scsi_bufflen(sc));
+ return ISCSI_ERR_DATA_OFFSET;
++ }
+
+ if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
+- struct scsi_cmnd *sc = ctask->sc;
+-
+ conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+ if (rhdr->flags & ISCSI_FLAG_DATA_UNDERFLOW) {
+ int res_count = be32_to_cpu(rhdr->residual_count);
+
+ if (res_count > 0 &&
+- res_count <= sc->request_bufflen) {
+- sc->resid = res_count;
++ res_count <= scsi_bufflen(sc)) {
++ scsi_set_resid(sc, res_count);
+ sc->result = (DID_OK << 16) | rhdr->cmd_status;
+ } else
+ sc->result = (DID_BAD_TARGET << 16) |
+ rhdr->cmd_status;
+ } else if (rhdr->flags & ISCSI_FLAG_DATA_OVERFLOW) {
+- sc->resid = be32_to_cpu(rhdr->residual_count);
++ scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count));
+ sc->result = (DID_OK << 16) | rhdr->cmd_status;
+ } else
+ sc->result = (DID_OK << 16) | rhdr->cmd_status;
+@@ -281,6 +285,8 @@
+ {
+ struct iscsi_data *hdr;
+ struct scsi_cmnd *sc = ctask->sc;
++ int i, sg_count = 0;
++ struct scatterlist *sg;
+
+ hdr = &r2t->dtask.hdr;
+ memset(hdr, 0, sizeof(struct iscsi_data));
+@@ -308,12 +314,9 @@
+ iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
+ sizeof(struct iscsi_hdr));
+
+- if (sc->use_sg) {
+- int i, sg_count = 0;
+- struct scatterlist *sg = sc->request_buffer;
+-
++ sg = scsi_sglist(sc);
+ r2t->sg = NULL;
+- for (i = 0; i < sc->use_sg; i++, sg += 1) {
++ for (i = 0; i < scsi_sg_count(sc); i++, sg += 1) {
+ /* FIXME: prefetch ? */
+ if (sg_count + sg->length > r2t->data_offset) {
+ int page_offset;
+@@ -335,12 +338,6 @@
+ sg_count += sg->length;
+ }
+ BUG_ON(r2t->sg == NULL);
+- } else {
+- iscsi_buf_init_iov(&r2t->sendbuf,
+- (char*)sc->request_buffer + r2t->data_offset,
+- r2t->data_count);
+- r2t->sg = NULL;
+- }
+ }
+
+ /**
+@@ -365,17 +362,16 @@
+ return ISCSI_ERR_DATALEN;
+ }
+
+- if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
++ if (tcp_ctask->exp_datasn != r2tsn){
++ debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
++ __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
+ return ISCSI_ERR_R2TSN;
+-
+- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+- if (rc)
+- return rc;
+-
+- /* FIXME: use R2TSN to detect missing R2T */
++ }
+
+ /* fill-in new R2T associated with the task */
+ spin_lock(&session->lock);
++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
++
+ if (!ctask->sc || ctask->mtask ||
+ session->state != ISCSI_STATE_LOGGED_IN) {
+ printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in "
+@@ -401,11 +397,11 @@
+ r2t->data_length, session->max_burst);
+
+ r2t->data_offset = be32_to_cpu(rhdr->data_offset);
+- if (r2t->data_offset + r2t->data_length > ctask->total_length) {
++ if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) {
+ spin_unlock(&session->lock);
+ printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
+ "offset %u and total length %d\n", r2t->data_length,
+- r2t->data_offset, ctask->total_length);
++ r2t->data_offset, scsi_bufflen(ctask->sc));
+ return ISCSI_ERR_DATALEN;
+ }
+
+@@ -414,9 +410,9 @@
+
+ iscsi_solicit_data_init(conn, ctask, r2t);
+
+- tcp_ctask->exp_r2tsn = r2tsn + 1;
++ tcp_ctask->exp_datasn = r2tsn + 1;
+ __kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
+- tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
++ tcp_ctask->xmstate |= XMSTATE_SOL_HDR_INIT;
+ list_move_tail(&ctask->running, &conn->xmitqueue);
+
+ scsi_queue_work(session->host, &conn->xmitwork);
+@@ -600,7 +596,7 @@
+ {
+ struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+ int buf_left = buf_size - (tcp_conn->data_copied + offset);
+- int size = min(tcp_conn->in.copy, buf_left);
++ unsigned size = min(tcp_conn->in.copy, buf_left);
+ int rc;
+
+ size = min(size, ctask->data_count);
+@@ -609,7 +605,7 @@
+ size, tcp_conn->in.offset, tcp_conn->in.copied);
+
+ BUG_ON(size <= 0);
+- BUG_ON(tcp_ctask->sent + size > ctask->total_length);
++ BUG_ON(tcp_ctask->sent + size > scsi_bufflen(ctask->sc));
+
+ rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
+ (char*)buf + (offset + tcp_conn->data_copied), size);
+@@ -707,25 +703,8 @@
+
+ BUG_ON((void*)ctask != sc->SCp.ptr);
+
+- /*
+- * copying Data-In into the Scsi_Cmnd
+- */
+- if (!sc->use_sg) {
+- i = ctask->data_count;
+- rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer,
+- sc->request_bufflen,
+- tcp_ctask->data_offset);
+- if (rc == -EAGAIN)
+- return rc;
+- if (conn->datadgst_en)
+- iscsi_recv_digest_update(tcp_conn, sc->request_buffer,
+- i);
+- rc = 0;
+- goto done;
+- }
+-
+ offset = tcp_ctask->data_offset;
+- sg = sc->request_buffer;
++ sg = scsi_sglist(sc);
+
+ if (tcp_ctask->data_offset)
+ for (i = 0; i < tcp_ctask->sg_count; i++)
+@@ -734,7 +713,7 @@
+ if (offset < 0)
+ offset = 0;
+
+- for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) {
++ for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) {
+ char *dest;
+
+ dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
+@@ -779,7 +758,6 @@
+ }
+ BUG_ON(ctask->data_count);
+
+-done:
+ /* check for non-exceptional status */
+ if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+ debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n",
+@@ -895,11 +873,27 @@
+ }
+ }
+
+- if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) {
++ if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV &&
++ tcp_conn->in.copy) {
+ uint32_t recv_digest;
+
+ debug_tcp("extra data_recv offset %d copy %d\n",
+ tcp_conn->in.offset, tcp_conn->in.copy);
++
++ if (!tcp_conn->data_copied) {
++ if (tcp_conn->in.padding) {
++ debug_tcp("padding -> %d\n",
++ tcp_conn->in.padding);
++ memset(pad, 0, tcp_conn->in.padding);
++ sg_init_one(&sg, pad, tcp_conn->in.padding);
++ crypto_hash_update(&tcp_conn->rx_hash,
++ &sg, sg.length);
++ }
++ crypto_hash_final(&tcp_conn->rx_hash,
++ (u8 *) &tcp_conn->in.datadgst);
++ debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
++ }
++
+ rc = iscsi_tcp_copy(conn, sizeof(uint32_t));
+ if (rc) {
+ if (rc == -EAGAIN)
+@@ -925,7 +919,6 @@
+
+ if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV &&
+ tcp_conn->in.copy) {
+-
+ debug_tcp("data_recv offset %d copy %d\n",
+ tcp_conn->in.offset, tcp_conn->in.copy);
+
+@@ -936,24 +929,32 @@
+ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+ return 0;
+ }
+- tcp_conn->in.copy -= tcp_conn->in.padding;
+- tcp_conn->in.offset += tcp_conn->in.padding;
+- if (conn->datadgst_en) {
+- if (tcp_conn->in.padding) {
+- debug_tcp("padding -> %d\n",
+- tcp_conn->in.padding);
+- memset(pad, 0, tcp_conn->in.padding);
+- sg_init_one(&sg, pad, tcp_conn->in.padding);
+- crypto_hash_update(&tcp_conn->rx_hash,
+- &sg, sg.length);
+- }
+- crypto_hash_final(&tcp_conn->rx_hash,
+- (u8 *) &tcp_conn->in.datadgst);
+- debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
++
++ if (tcp_conn->in.padding)
++ tcp_conn->in_progress = IN_PROGRESS_PAD_RECV;
++ else if (conn->datadgst_en)
+ tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
++ else
++ tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+ tcp_conn->data_copied = 0;
+- } else
++ }
++
++ if (tcp_conn->in_progress == IN_PROGRESS_PAD_RECV &&
++ tcp_conn->in.copy) {
++ int copylen = min(tcp_conn->in.padding - tcp_conn->data_copied,
++ tcp_conn->in.copy);
++
++ tcp_conn->in.copy -= copylen;
++ tcp_conn->in.offset += copylen;
++ tcp_conn->data_copied += copylen;
++
++ if (tcp_conn->data_copied != tcp_conn->in.padding)
++ tcp_conn->in_progress = IN_PROGRESS_PAD_RECV;
++ else if (conn->datadgst_en)
++ tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
++ else
+ tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
++ tcp_conn->data_copied = 0;
+ }
+
+ debug_tcp("f, processed %d from out of %d padding %d\n",
+@@ -1215,7 +1216,6 @@
+ struct iscsi_r2t_info *r2t, int left)
+ {
+ struct iscsi_data *hdr;
+- struct scsi_cmnd *sc = ctask->sc;
+ int new_offset;
+
+ hdr = &r2t->dtask.hdr;
+@@ -1245,15 +1245,8 @@
+ if (iscsi_buf_left(&r2t->sendbuf))
+ return;
+
+- if (sc->use_sg) {
+ iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
+ r2t->sg += 1;
+- } else {
+- iscsi_buf_init_iov(&r2t->sendbuf,
+- (char*)sc->request_buffer + new_offset,
+- r2t->data_count);
+- r2t->sg = NULL;
+- }
+ }
+
+ static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask,
+@@ -1277,41 +1270,10 @@
+ static void
+ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
+ {
+- struct scsi_cmnd *sc = ctask->sc;
+ struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+
+ BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
+-
+- tcp_ctask->sent = 0;
+- tcp_ctask->sg_count = 0;
+-
+- if (sc->sc_data_direction == DMA_TO_DEVICE) {
+- tcp_ctask->xmstate = XMSTATE_W_HDR;
+- tcp_ctask->exp_r2tsn = 0;
+- BUG_ON(ctask->total_length == 0);
+-
+- if (sc->use_sg) {
+- struct scatterlist *sg = sc->request_buffer;
+-
+- iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
+- tcp_ctask->sg = sg + 1;
+- tcp_ctask->bad_sg = sg + sc->use_sg;
+- } else {
+- iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+- sc->request_buffer,
+- sc->request_bufflen);
+- tcp_ctask->sg = NULL;
+- tcp_ctask->bad_sg = NULL;
+- }
+- debug_scsi("cmd [itt 0x%x total %d imm_data %d "
+- "unsol count %d, unsol offset %d]\n",
+- ctask->itt, ctask->total_length, ctask->imm_count,
+- ctask->unsol_count, ctask->unsol_offset);
+- } else
+- tcp_ctask->xmstate = XMSTATE_R_HDR;
+-
+- iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
+- sizeof(struct iscsi_hdr));
++ tcp_ctask->xmstate = XMSTATE_CMD_HDR_INIT;
+ }
+
+ /**
+@@ -1324,9 +1286,11 @@
+ * call it again later, or recover. '0' return code means successful
+ * xmit.
+ *
+- * Management xmit state machine consists of two states:
+- * IN_PROGRESS_IMM_HEAD - PDU Header xmit in progress
+- * IN_PROGRESS_IMM_DATA - PDU Data xmit in progress
++ * Management xmit state machine consists of these states:
++ * XMSTATE_IMM_HDR_INIT - calculate digest of PDU Header
++ * XMSTATE_IMM_HDR - PDU Header xmit in progress
++ * XMSTATE_IMM_DATA - PDU Data xmit in progress
++ * XMSTATE_IDLE - management PDU is done
+ **/
+ static int
+ iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
+@@ -1337,23 +1301,34 @@
+ debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n",
+ conn->id, tcp_mtask->xmstate, mtask->itt);
+
+- if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
+- tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
+- if (mtask->data_count)
++ if (tcp_mtask->xmstate & XMSTATE_IMM_HDR_INIT) {
++ iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
++ sizeof(struct iscsi_hdr));
++
++ if (mtask->data_count) {
+ tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
++ iscsi_buf_init_iov(&tcp_mtask->sendbuf,
++ (char*)mtask->data,
++ mtask->data_count);
++ }
++
+ if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
+ conn->stop_stage != STOP_CONN_RECOVER &&
+ conn->hdrdgst_en)
+ iscsi_hdr_digest(conn, &tcp_mtask->headbuf,
+ (u8*)tcp_mtask->hdrext);
++
++ tcp_mtask->sent = 0;
++ tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR_INIT;
++ tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
++ }
++
++ if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
+ rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf,
+ mtask->data_count);
+- if (rc) {
+- tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
+- if (mtask->data_count)
+- tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
++ if (rc)
+ return rc;
+- }
++ tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
+ }
+
+ if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) {
+@@ -1387,55 +1362,67 @@
+ return 0;
+ }
+
+-static inline int
+-iscsi_send_read_hdr(struct iscsi_conn *conn,
+- struct iscsi_tcp_cmd_task *tcp_ctask)
++static int
++iscsi_send_cmd_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+- int rc;
++ struct scsi_cmnd *sc = ctask->sc;
++ struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
++ int rc = 0;
+
+- tcp_ctask->xmstate &= ~XMSTATE_R_HDR;
+- if (conn->hdrdgst_en)
+- iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
+- (u8*)tcp_ctask->hdrext);
+- rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0);
+- if (!rc) {
+- BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE);
+- return 0; /* wait for Data-In */
++ if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_INIT) {
++ tcp_ctask->sent = 0;
++ tcp_ctask->sg_count = 0;
++ tcp_ctask->exp_datasn = 0;
++
++ if (sc->sc_data_direction == DMA_TO_DEVICE) {
++ struct scatterlist *sg = scsi_sglist(sc);
++
++ iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
++ tcp_ctask->sg = sg + 1;
++ tcp_ctask->bad_sg = sg + scsi_sg_count(sc);
++
++ debug_scsi("cmd [itt 0x%x total %d imm_data %d "
++ "unsol count %d, unsol offset %d]\n",
++ ctask->itt, scsi_bufflen(sc),
++ ctask->imm_count, ctask->unsol_count,
++ ctask->unsol_offset);
+ }
+- tcp_ctask->xmstate |= XMSTATE_R_HDR;
+- return rc;
+-}
+
+-static inline int
+-iscsi_send_write_hdr(struct iscsi_conn *conn,
+- struct iscsi_cmd_task *ctask)
+-{
+- struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+- int rc;
++ iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
++ sizeof(struct iscsi_hdr));
+
+- tcp_ctask->xmstate &= ~XMSTATE_W_HDR;
+ if (conn->hdrdgst_en)
+ iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
+ (u8*)tcp_ctask->hdrext);
++ tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_INIT;
++ tcp_ctask->xmstate |= XMSTATE_CMD_HDR_XMIT;
++ }
++
++ if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_XMIT) {
+ rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
+- if (rc) {
+- tcp_ctask->xmstate |= XMSTATE_W_HDR;
++ if (rc)
+ return rc;
+- }
++ tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_XMIT;
++
++ if (sc->sc_data_direction != DMA_TO_DEVICE)
++ return 0;
+
+ if (ctask->imm_count) {
+ tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+ iscsi_set_padding(tcp_ctask, ctask->imm_count);
+
+ if (ctask->conn->datadgst_en) {
+- iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
++ iscsi_data_digest_init(ctask->conn->dd_data,
++ tcp_ctask);
+ tcp_ctask->immdigest = 0;
+ }
+ }
+
+ if (ctask->unsol_count)
+- tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
+- return 0;
++ tcp_ctask->xmstate |=
++ XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
++ }
++ return rc;
+ }
+
+ static int
+@@ -1624,9 +1611,7 @@
+ struct iscsi_data_task *dtask;
+ int left, rc;
+
+- if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
+- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+- tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
++ if (tcp_ctask->xmstate & XMSTATE_SOL_HDR_INIT) {
+ if (!tcp_ctask->r2t) {
+ spin_lock_bh(&session->lock);
+ __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
+@@ -1640,13 +1625,20 @@
+ if (conn->hdrdgst_en)
+ iscsi_hdr_digest(conn, &r2t->headbuf,
+ (u8*)dtask->hdrext);
+- rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
+- if (rc) {
+- tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
++ tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR_INIT;
+ tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
+- return rc;
+ }
+
++ if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
++ r2t = tcp_ctask->r2t;
++ dtask = &r2t->dtask;
++
++ rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
++ if (rc)
++ return rc;
++ tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
++ tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
++
+ if (conn->datadgst_en) {
+ iscsi_data_digest_init(conn->dd_data, tcp_ctask);
+ dtask->digest = 0;
+@@ -1677,8 +1669,6 @@
+ left = r2t->data_length - r2t->sent;
+ if (left) {
+ iscsi_solicit_data_cont(conn, ctask, r2t, left);
+- tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+ goto send_hdr;
+ }
+
+@@ -1693,8 +1683,6 @@
+ if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t,
+ sizeof(void*))) {
+ tcp_ctask->r2t = r2t;
+- tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+ spin_unlock_bh(&session->lock);
+ goto send_hdr;
+ }
+@@ -1703,6 +1691,46 @@
+ return 0;
+ }
+
++/**
++ * iscsi_tcp_ctask_xmit - xmit normal PDU task
++ * @conn: iscsi connection
++ * @ctask: iscsi command task
++ *
++ * Notes:
++ * The function can return -EAGAIN in which case caller must
++ * call it again later, or recover. '0' return code means successful
++ * xmit.
++ * The function is devided to logical helpers (above) for the different
++ * xmit stages.
++ *
++ *iscsi_send_cmd_hdr()
++ * XMSTATE_CMD_HDR_INIT - prepare Header and Data buffers Calculate
++ * Header Digest
++ * XMSTATE_CMD_HDR_XMIT - Transmit header in progress
++ *
++ *iscsi_send_padding
++ * XMSTATE_W_PAD - Prepare and send pading
++ * XMSTATE_W_RESEND_PAD - retry send pading
++ *
++ *iscsi_send_digest
++ * XMSTATE_W_RESEND_DATA_DIGEST - Finalize and send Data Digest
++ * XMSTATE_W_RESEND_DATA_DIGEST - retry sending digest
++ *
++ *iscsi_send_unsol_hdr
++ * XMSTATE_UNS_INIT - prepare un-solicit data header and digest
++ * XMSTATE_UNS_HDR - send un-solicit header
++ *
++ *iscsi_send_unsol_pdu
++ * XMSTATE_UNS_DATA - send un-solicit data in progress
++ *
++ *iscsi_send_sol_pdu
++ * XMSTATE_SOL_HDR_INIT - solicit data header and digest initialize
++ * XMSTATE_SOL_HDR - send solicit header
++ * XMSTATE_SOL_DATA - send solicit data
++ *
++ *iscsi_tcp_ctask_xmit
++ * XMSTATE_IMM_DATA - xmit managment data (??)
++ **/
+ static int
+ iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+@@ -1712,20 +1740,11 @@
+ debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n",
+ conn->id, tcp_ctask->xmstate, ctask->itt);
+
+- /*
+- * serialize with TMF AbortTask
+- */
+- if (ctask->mtask)
+- return rc;
+-
+- if (tcp_ctask->xmstate & XMSTATE_R_HDR)
+- return iscsi_send_read_hdr(conn, tcp_ctask);
+-
+- if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
+- rc = iscsi_send_write_hdr(conn, ctask);
++ rc = iscsi_send_cmd_hdr(conn, ctask);
+ if (rc)
+ return rc;
+- }
++ if (ctask->sc->sc_data_direction != DMA_TO_DEVICE)
++ return 0;
+
+ if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
+ rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+@@ -1810,18 +1829,22 @@
+ static void
+ iscsi_tcp_release_conn(struct iscsi_conn *conn)
+ {
++ struct iscsi_session *session = conn->session;
+ struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++ struct socket *sock = tcp_conn->sock;
+
+- if (!tcp_conn->sock)
++ if (!sock)
+ return;
+
+- sock_hold(tcp_conn->sock->sk);
++ sock_hold(sock->sk);
+ iscsi_conn_restore_callbacks(tcp_conn);
+- sock_put(tcp_conn->sock->sk);
++ sock_put(sock->sk);
+
+- sock_release(tcp_conn->sock);
++ spin_lock_bh(&session->lock);
+ tcp_conn->sock = NULL;
+ conn->recv_lock = NULL;
++ spin_unlock_bh(&session->lock);
++ sockfd_put(sock);
+ }
+
+ static void
+@@ -1852,6 +1875,46 @@
+ tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
+ }
+
++static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
++ char *buf, int *port,
++ int (*getname)(struct socket *, struct sockaddr *,
++ int *addrlen))
++{
++ struct sockaddr_storage *addr;
++ struct sockaddr_in6 *sin6;
++ struct sockaddr_in *sin;
++ int rc = 0, len;
++
++ addr = kmalloc(GFP_KERNEL, sizeof(*addr));
++ if (!addr)
++ return -ENOMEM;
++
++ if (getname(sock, (struct sockaddr *) addr, &len)) {
++ rc = -ENODEV;
++ goto free_addr;
++ }
++
++ switch (addr->ss_family) {
++ case AF_INET:
++ sin = (struct sockaddr_in *)addr;
++ spin_lock_bh(&conn->session->lock);
++ sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
++ *port = be16_to_cpu(sin->sin_port);
++ spin_unlock_bh(&conn->session->lock);
++ break;
++ case AF_INET6:
++ sin6 = (struct sockaddr_in6 *)addr;
++ spin_lock_bh(&conn->session->lock);
++ sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
++ *port = be16_to_cpu(sin6->sin6_port);
++ spin_unlock_bh(&conn->session->lock);
++ break;
++ }
++free_addr:
++ kfree(addr);
++ return rc;
++}
++
+ static int
+ iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
+ struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+@@ -1869,10 +1932,24 @@
+ printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err);
+ return -EEXIST;
+ }
++ /*
++ * copy these values now because if we drop the session
++ * userspace may still want to query the values since we will
++ * be using them for the reconnect
++ */
++ err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
++ &conn->portal_port, kernel_getpeername);
++ if (err)
++ goto free_socket;
++
++ err = iscsi_tcp_get_addr(conn, sock, conn->local_address,
++ &conn->local_port, kernel_getsockname);
++ if (err)
++ goto free_socket;
+
+ err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
+ if (err)
+- return err;
++ goto free_socket;
+
+ /* bind iSCSI connection and socket */
+ tcp_conn->sock = sock;
+@@ -1896,25 +1973,19 @@
+ * set receive state machine into initial state
+ */
+ tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+-
+ return 0;
++
++free_socket:
++ sockfd_put(sock);
++ return err;
+ }
+
+ /* called with host lock */
+ static void
+-iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
+- char *data, uint32_t data_size)
++iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
+ {
+ struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
+-
+- iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
+- sizeof(struct iscsi_hdr));
+- tcp_mtask->xmstate = XMSTATE_IMM_HDR;
+- tcp_mtask->sent = 0;
+-
+- if (mtask->data_count)
+- iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data,
+- mtask->data_count);
++ tcp_mtask->xmstate = XMSTATE_IMM_HDR_INIT;
+ }
+
+ static int
+@@ -2026,41 +2097,18 @@
+ enum iscsi_param param, char *buf)
+ {
+ struct iscsi_conn *conn = cls_conn->dd_data;
+- struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+- struct inet_sock *inet;
+- struct ipv6_pinfo *np;
+- struct sock *sk;
+ int len;
+
+ switch(param) {
+ case ISCSI_PARAM_CONN_PORT:
+- mutex_lock(&conn->xmitmutex);
+- if (!tcp_conn->sock) {
+- mutex_unlock(&conn->xmitmutex);
+- return -EINVAL;
+- }
+-
+- inet = inet_sk(tcp_conn->sock->sk);
+- len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport));
+- mutex_unlock(&conn->xmitmutex);
++ spin_lock_bh(&conn->session->lock);
++ len = sprintf(buf, "%hu\n", conn->portal_port);
++ spin_unlock_bh(&conn->session->lock);
+ break;
+ case ISCSI_PARAM_CONN_ADDRESS:
+- mutex_lock(&conn->xmitmutex);
+- if (!tcp_conn->sock) {
+- mutex_unlock(&conn->xmitmutex);
+- return -EINVAL;
+- }
+-
+- sk = tcp_conn->sock->sk;
+- if (sk->sk_family == PF_INET) {
+- inet = inet_sk(sk);
+- len = sprintf(buf, NIPQUAD_FMT "\n",
+- NIPQUAD(inet->daddr));
+- } else {
+- np = inet6_sk(sk);
+- len = sprintf(buf, NIP6_FMT "\n", NIP6(np->daddr));
+- }
+- mutex_unlock(&conn->xmitmutex);
++ spin_lock_bh(&conn->session->lock);
++ len = sprintf(buf, "%s\n", conn->portal_address);
++ spin_unlock_bh(&conn->session->lock);
+ break;
+ default:
+ return iscsi_conn_get_param(cls_conn, param, buf);
+@@ -2069,6 +2117,29 @@
+ return len;
+ }
+
++static int
++iscsi_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++ char *buf)
++{
++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++ int len;
++
++ switch (param) {
++ case ISCSI_HOST_PARAM_IPADDRESS:
++ spin_lock_bh(&session->lock);
++ if (!session->leadconn)
++ len = -ENODEV;
++ else
++ len = sprintf(buf, "%s\n",
++ session->leadconn->local_address);
++ spin_unlock_bh(&session->lock);
++ break;
++ default:
++ return iscsi_host_get_param(shost, param, buf);
++ }
++ return len;
++}
++
+ static void
+ iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
+ {
+@@ -2096,6 +2167,7 @@
+ static struct iscsi_cls_session *
+ iscsi_tcp_session_create(struct iscsi_transport *iscsit,
+ struct scsi_transport_template *scsit,
++ uint16_t cmds_max, uint16_t qdepth,
+ uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+ struct iscsi_cls_session *cls_session;
+@@ -2103,7 +2175,7 @@
+ uint32_t hn;
+ int cmd_i;
+
+- cls_session = iscsi_session_setup(iscsit, scsit,
++ cls_session = iscsi_session_setup(iscsit, scsit, cmds_max, qdepth,
+ sizeof(struct iscsi_tcp_cmd_task),
+ sizeof(struct iscsi_tcp_mgmt_task),
+ initial_cmdsn, &hn);
+@@ -2142,17 +2214,24 @@
+ iscsi_session_teardown(cls_session);
+ }
+
++static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
++{
++ blk_queue_dma_alignment(sdev->request_queue, 0);
++ return 0;
++}
++
+ static struct scsi_host_template iscsi_sht = {
+ .name = "iSCSI Initiator over TCP/IP",
+ .queuecommand = iscsi_queuecommand,
+ .change_queue_depth = iscsi_change_queue_depth,
+- .can_queue = ISCSI_XMIT_CMDS_MAX - 1,
++ .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1,
+ .sg_tablesize = ISCSI_SG_TABLESIZE,
+ .max_sectors = 0xFFFF,
+ .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
+ .eh_abort_handler = iscsi_eh_abort,
+ .eh_host_reset_handler = iscsi_eh_host_reset,
+ .use_clustering = DISABLE_CLUSTERING,
++ .slave_configure = iscsi_tcp_slave_configure,
+ .proc_name = "iscsi_tcp",
+ .this_id = -1,
+ };
+@@ -2179,8 +2258,12 @@
+ ISCSI_EXP_STATSN |
+ ISCSI_PERSISTENT_PORT |
+ ISCSI_PERSISTENT_ADDRESS |
+- ISCSI_TARGET_NAME |
+- ISCSI_TPGT,
++ ISCSI_TARGET_NAME | ISCSI_TPGT |
++ ISCSI_USERNAME | ISCSI_PASSWORD |
++ ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN,
++ .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
++ ISCSI_HOST_INITIATOR_NAME |
++ ISCSI_HOST_NETDEV_NAME,
+ .host_template = &iscsi_sht,
+ .conndata_size = sizeof(struct iscsi_conn),
+ .max_conn = 1,
+@@ -2197,6 +2280,9 @@
+ .get_session_param = iscsi_session_get_param,
+ .start_conn = iscsi_conn_start,
+ .stop_conn = iscsi_tcp_conn_stop,
++ /* iscsi host params */
++ .get_host_param = iscsi_tcp_host_get_param,
++ .set_host_param = iscsi_host_set_param,
+ /* IO */
+ .send_pdu = iscsi_conn_send_pdu,
+ .get_stats = iscsi_conn_get_stats,
+diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.h linux-2.6.22-591/drivers/scsi/iscsi_tcp.h
+--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.h 2007-12-21 15:36:12.000000000 -0500
+@@ -29,11 +29,12 @@
+ #define IN_PROGRESS_HEADER_GATHER 0x1
+ #define IN_PROGRESS_DATA_RECV 0x2
+ #define IN_PROGRESS_DDIGEST_RECV 0x3
++#define IN_PROGRESS_PAD_RECV 0x4
+
+ /* xmit state machine */
+ #define XMSTATE_IDLE 0x0
+-#define XMSTATE_R_HDR 0x1
+-#define XMSTATE_W_HDR 0x2
++#define XMSTATE_CMD_HDR_INIT 0x1
++#define XMSTATE_CMD_HDR_XMIT 0x2
+ #define XMSTATE_IMM_HDR 0x4
+ #define XMSTATE_IMM_DATA 0x8
+ #define XMSTATE_UNS_INIT 0x10
+@@ -44,6 +45,8 @@
+ #define XMSTATE_W_PAD 0x200
+ #define XMSTATE_W_RESEND_PAD 0x400
+ #define XMSTATE_W_RESEND_DATA_DIGEST 0x800
++#define XMSTATE_IMM_HDR_INIT 0x1000
++#define XMSTATE_SOL_HDR_INIT 0x2000
+
+ #define ISCSI_PAD_LEN 4
+ #define ISCSI_SG_TABLESIZE SG_ALL
+@@ -152,7 +155,7 @@
+ struct scatterlist *sg; /* per-cmd SG list */
+ struct scatterlist *bad_sg; /* assert statement */
+ int sg_count; /* SG's to process */
+- uint32_t exp_r2tsn;
++ uint32_t exp_datasn; /* expected target's R2TSN/DataSN */
+ int data_offset;
+ struct iscsi_r2t_info *r2t; /* in progress R2T */
+ struct iscsi_queue r2tpool;
+diff -Nurb linux-2.6.22-570/drivers/scsi/jazz_esp.c linux-2.6.22-591/drivers/scsi/jazz_esp.c
+--- linux-2.6.22-570/drivers/scsi/jazz_esp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/jazz_esp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,6 +1,6 @@
+ /* jazz_esp.c: ESP front-end for MIPS JAZZ systems.
+ *
+- * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende)
++ * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende)
+ */
+
+ #include <linux/kernel.h>
+@@ -143,7 +143,7 @@
+ goto fail;
+
+ host->max_id = 8;
+- esp = host_to_esp(host);
++ esp = shost_priv(host);
+
+ esp->host = host;
+ esp->dev = dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/libiscsi.c linux-2.6.22-591/drivers/scsi/libiscsi.c
+--- linux-2.6.22-570/drivers/scsi/libiscsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/libiscsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -22,7 +22,6 @@
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+ #include <linux/types.h>
+-#include <linux/mutex.h>
+ #include <linux/kfifo.h>
+ #include <linux/delay.h>
+ #include <asm/unaligned.h>
+@@ -46,27 +45,53 @@
+ }
+ EXPORT_SYMBOL_GPL(class_to_transport_session);
+
+-#define INVALID_SN_DELTA 0xffff
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++#define SNA32_CHECK 2147483648UL
+
+-int
+-iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
++static int iscsi_sna_lt(u32 n1, u32 n2)
++{
++ return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++ (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++static int iscsi_sna_lte(u32 n1, u32 n2)
++{
++ return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++ (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++void
++iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+ {
+ uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
+ uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
+
+- if (max_cmdsn < exp_cmdsn -1 &&
+- max_cmdsn > exp_cmdsn - INVALID_SN_DELTA)
+- return ISCSI_ERR_MAX_CMDSN;
+- if (max_cmdsn > session->max_cmdsn ||
+- max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA)
+- session->max_cmdsn = max_cmdsn;
+- if (exp_cmdsn > session->exp_cmdsn ||
+- exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA)
++ /*
++ * standard specifies this check for when to update expected and
++ * max sequence numbers
++ */
++ if (iscsi_sna_lt(max_cmdsn, exp_cmdsn - 1))
++ return;
++
++ if (exp_cmdsn != session->exp_cmdsn &&
++ !iscsi_sna_lt(exp_cmdsn, session->exp_cmdsn))
+ session->exp_cmdsn = exp_cmdsn;
+
+- return 0;
++ if (max_cmdsn != session->max_cmdsn &&
++ !iscsi_sna_lt(max_cmdsn, session->max_cmdsn)) {
++ session->max_cmdsn = max_cmdsn;
++ /*
++ * if the window closed with IO queued, then kick the
++ * xmit thread
++ */
++ if (!list_empty(&session->leadconn->xmitqueue) ||
++ __kfifo_len(session->leadconn->mgmtqueue))
++ scsi_queue_work(session->host,
++ &session->leadconn->xmitwork);
++ }
+ }
+-EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn);
++EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
+
+ void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
+ struct iscsi_data *hdr)
+@@ -115,14 +140,17 @@
+ hdr->flags = ISCSI_ATTR_SIMPLE;
+ int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
+ hdr->itt = build_itt(ctask->itt, conn->id, session->age);
+- hdr->data_length = cpu_to_be32(sc->request_bufflen);
++ hdr->data_length = cpu_to_be32(scsi_bufflen(sc));
+ hdr->cmdsn = cpu_to_be32(session->cmdsn);
+ session->cmdsn++;
+ hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
+ memcpy(hdr->cdb, sc->cmnd, sc->cmd_len);
+- memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
++ if (sc->cmd_len < MAX_COMMAND_SIZE)
++ memset(&hdr->cdb[sc->cmd_len], 0,
++ MAX_COMMAND_SIZE - sc->cmd_len);
+
+ ctask->data_count = 0;
++ ctask->imm_count = 0;
+ if (sc->sc_data_direction == DMA_TO_DEVICE) {
+ hdr->flags |= ISCSI_FLAG_CMD_WRITE;
+ /*
+@@ -139,25 +167,24 @@
+ *
+ * pad_count bytes to be sent as zero-padding
+ */
+- ctask->imm_count = 0;
+ ctask->unsol_count = 0;
+ ctask->unsol_offset = 0;
+ ctask->unsol_datasn = 0;
+
+ if (session->imm_data_en) {
+- if (ctask->total_length >= session->first_burst)
++ if (scsi_bufflen(sc) >= session->first_burst)
+ ctask->imm_count = min(session->first_burst,
+ conn->max_xmit_dlength);
+ else
+- ctask->imm_count = min(ctask->total_length,
++ ctask->imm_count = min(scsi_bufflen(sc),
+ conn->max_xmit_dlength);
+ hton24(ctask->hdr->dlength, ctask->imm_count);
+ } else
+ zero_data(ctask->hdr->dlength);
+
+ if (!session->initial_r2t_en) {
+- ctask->unsol_count = min(session->first_burst,
+- ctask->total_length) - ctask->imm_count;
++ ctask->unsol_count = min((session->first_burst),
++ (scsi_bufflen(sc))) - ctask->imm_count;
+ ctask->unsol_offset = ctask->imm_count;
+ }
+
+@@ -165,7 +192,6 @@
+ /* No unsolicit Data-Out's */
+ ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ } else {
+- ctask->datasn = 0;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ zero_data(hdr->dlength);
+
+@@ -174,8 +200,13 @@
+ }
+
+ conn->scsicmd_pdus_cnt++;
++
++ debug_scsi("iscsi prep [%s cid %d sc %p cdb 0x%x itt 0x%x len %d "
++ "cmdsn %d win %d]\n",
++ sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
++ conn->id, sc, sc->cmnd[0], ctask->itt, scsi_bufflen(sc),
++ session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+ }
+-EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu);
+
+ /**
+ * iscsi_complete_command - return command back to scsi-ml
+@@ -204,26 +235,12 @@
+ atomic_inc(&ctask->refcount);
+ }
+
+-static void iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+-{
+- spin_lock_bh(&ctask->conn->session->lock);
+- __iscsi_get_ctask(ctask);
+- spin_unlock_bh(&ctask->conn->session->lock);
+-}
+-
+ static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+ {
+ if (atomic_dec_and_test(&ctask->refcount))
+ iscsi_complete_command(ctask);
+ }
+
+-static void iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+-{
+- spin_lock_bh(&ctask->conn->session->lock);
+- __iscsi_put_ctask(ctask);
+- spin_unlock_bh(&ctask->conn->session->lock);
+-}
+-
+ /**
+ * iscsi_cmd_rsp - SCSI Command Response processing
+ * @conn: iscsi connection
+@@ -235,21 +252,15 @@
+ * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and
+ * then completes the command and task.
+ **/
+-static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
++static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ struct iscsi_cmd_task *ctask, char *data,
+ int datalen)
+ {
+- int rc;
+ struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+ struct iscsi_session *session = conn->session;
+ struct scsi_cmnd *sc = ctask->sc;
+
+- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+- if (rc) {
+- sc->result = DID_ERROR << 16;
+- goto out;
+- }
+-
++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
+ conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+
+ sc->result = (DID_OK << 16) | rhdr->cmd_status;
+@@ -286,14 +297,14 @@
+ if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+ int res_count = be32_to_cpu(rhdr->residual_count);
+
+- if (res_count > 0 && res_count <= sc->request_bufflen)
+- sc->resid = res_count;
++ if (res_count > 0 && res_count <= scsi_bufflen(sc))
++ scsi_set_resid(sc, res_count);
+ else
+ sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+ } else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW)
+ sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+ else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW)
+- sc->resid = be32_to_cpu(rhdr->residual_count);
++ scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count));
+
+ out:
+ debug_scsi("done [sc %lx res %d itt 0x%x]\n",
+@@ -301,7 +312,6 @@
+ conn->scsirsp_pdus_cnt++;
+
+ __iscsi_put_ctask(ctask);
+- return rc;
+ }
+
+ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
+@@ -381,7 +391,7 @@
+ switch(opcode) {
+ case ISCSI_OP_SCSI_CMD_RSP:
+ BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+- rc = iscsi_scsi_cmd_rsp(conn, hdr, ctask, data,
++ iscsi_scsi_cmd_rsp(conn, hdr, ctask, data,
+ datalen);
+ break;
+ case ISCSI_OP_SCSI_DATA_IN:
+@@ -405,11 +415,7 @@
+ debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n",
+ opcode, conn->id, mtask->itt, datalen);
+
+- rc = iscsi_check_assign_cmdsn(session,
+- (struct iscsi_nopin*)hdr);
+- if (rc)
+- goto done;
+-
++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
+ switch(opcode) {
+ case ISCSI_OP_LOGOUT_RSP:
+ if (datalen) {
+@@ -458,10 +464,7 @@
+ break;
+ }
+ } else if (itt == ~0U) {
+- rc = iscsi_check_assign_cmdsn(session,
+- (struct iscsi_nopin*)hdr);
+- if (rc)
+- goto done;
++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
+
+ switch(opcode) {
+ case ISCSI_OP_NOOP_IN:
+@@ -491,7 +494,6 @@
+ } else
+ rc = ISCSI_ERR_BAD_ITT;
+
+-done:
+ return rc;
+ }
+ EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
+@@ -578,17 +580,47 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_failure);
+
++static void iscsi_prep_mtask(struct iscsi_conn *conn,
++ struct iscsi_mgmt_task *mtask)
++{
++ struct iscsi_session *session = conn->session;
++ struct iscsi_hdr *hdr = mtask->hdr;
++ struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
++
++ if (hdr->opcode != (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) &&
++ hdr->opcode != (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
++ nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
++ /*
++ * pre-format CmdSN for outgoing PDU.
++ */
++ nop->cmdsn = cpu_to_be32(session->cmdsn);
++ if (hdr->itt != RESERVED_ITT) {
++ hdr->itt = build_itt(mtask->itt, conn->id, session->age);
++ if (conn->c_stage == ISCSI_CONN_STARTED &&
++ !(hdr->opcode & ISCSI_OP_IMMEDIATE))
++ session->cmdsn++;
++ }
++
++ if (session->tt->init_mgmt_task)
++ session->tt->init_mgmt_task(conn, mtask);
++
++ debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
++ hdr->opcode, hdr->itt, mtask->data_count);
++}
++
+ static int iscsi_xmit_mtask(struct iscsi_conn *conn)
+ {
+ struct iscsi_hdr *hdr = conn->mtask->hdr;
+ int rc, was_logout = 0;
+
++ spin_unlock_bh(&conn->session->lock);
+ if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT) {
+ conn->session->state = ISCSI_STATE_IN_RECOVERY;
+ iscsi_block_session(session_to_cls(conn->session));
+ was_logout = 1;
+ }
+ rc = conn->session->tt->xmit_mgmt_task(conn, conn->mtask);
++ spin_lock_bh(&conn->session->lock);
+ if (rc)
+ return rc;
+
+@@ -602,6 +634,45 @@
+ return 0;
+ }
+
++static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
++{
++ struct iscsi_session *session = conn->session;
++
++ /*
++ * Check for iSCSI window and take care of CmdSN wrap-around
++ */
++ if (!iscsi_sna_lte(session->cmdsn, session->max_cmdsn)) {
++ debug_scsi("iSCSI CmdSN closed. MaxCmdSN %u CmdSN %u\n",
++ session->max_cmdsn, session->cmdsn);
++ return -ENOSPC;
++ }
++ return 0;
++}
++
++static int iscsi_xmit_ctask(struct iscsi_conn *conn)
++{
++ struct iscsi_cmd_task *ctask = conn->ctask;
++ int rc = 0;
++
++ /*
++ * serialize with TMF AbortTask
++ */
++ if (ctask->state == ISCSI_TASK_ABORTING)
++ goto done;
++
++ __iscsi_get_ctask(ctask);
++ spin_unlock_bh(&conn->session->lock);
++ rc = conn->session->tt->xmit_cmd_task(conn, ctask);
++ spin_lock_bh(&conn->session->lock);
++ __iscsi_put_ctask(ctask);
++
++done:
++ if (!rc)
++ /* done with this ctask */
++ conn->ctask = NULL;
++ return rc;
++}
++
+ /**
+ * iscsi_data_xmit - xmit any command into the scheduled connection
+ * @conn: iscsi connection
+@@ -613,106 +684,79 @@
+ **/
+ static int iscsi_data_xmit(struct iscsi_conn *conn)
+ {
+- struct iscsi_transport *tt;
+ int rc = 0;
+
++ spin_lock_bh(&conn->session->lock);
+ if (unlikely(conn->suspend_tx)) {
+ debug_scsi("conn %d Tx suspended!\n", conn->id);
++ spin_unlock_bh(&conn->session->lock);
+ return -ENODATA;
+ }
+- tt = conn->session->tt;
+-
+- /*
+- * Transmit in the following order:
+- *
+- * 1) un-finished xmit (ctask or mtask)
+- * 2) immediate control PDUs
+- * 3) write data
+- * 4) SCSI commands
+- * 5) non-immediate control PDUs
+- *
+- * No need to lock around __kfifo_get as long as
+- * there's one producer and one consumer.
+- */
+-
+- BUG_ON(conn->ctask && conn->mtask);
+
+ if (conn->ctask) {
+- iscsi_get_ctask(conn->ctask);
+- rc = tt->xmit_cmd_task(conn, conn->ctask);
+- iscsi_put_ctask(conn->ctask);
++ rc = iscsi_xmit_ctask(conn);
+ if (rc)
+ goto again;
+- /* done with this in-progress ctask */
+- conn->ctask = NULL;
+ }
++
+ if (conn->mtask) {
+ rc = iscsi_xmit_mtask(conn);
+ if (rc)
+ goto again;
+ }
+
+- /* process immediate first */
+- if (unlikely(__kfifo_len(conn->immqueue))) {
+- while (__kfifo_get(conn->immqueue, (void*)&conn->mtask,
++ /*
++ * process mgmt pdus like nops before commands since we should
++ * only have one nop-out as a ping from us and targets should not
++ * overflow us with nop-ins
++ */
++check_mgmt:
++ while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+ sizeof(void*))) {
+- spin_lock_bh(&conn->session->lock);
+- list_add_tail(&conn->mtask->running,
+- &conn->mgmt_run_list);
+- spin_unlock_bh(&conn->session->lock);
++ iscsi_prep_mtask(conn, conn->mtask);
++ list_add_tail(&conn->mtask->running, &conn->mgmt_run_list);
+ rc = iscsi_xmit_mtask(conn);
+ if (rc)
+ goto again;
+ }
+- }
+
+ /* process command queue */
+- spin_lock_bh(&conn->session->lock);
+ while (!list_empty(&conn->xmitqueue)) {
++ rc = iscsi_check_cmdsn_window_closed(conn);
++ if (rc) {
++ spin_unlock_bh(&conn->session->lock);
++ return rc;
++ }
+ /*
+ * iscsi tcp may readd the task to the xmitqueue to send
+ * write data
+ */
+ conn->ctask = list_entry(conn->xmitqueue.next,
+ struct iscsi_cmd_task, running);
++ if (conn->ctask->state == ISCSI_TASK_PENDING) {
++ iscsi_prep_scsi_cmd_pdu(conn->ctask);
++ conn->session->tt->init_cmd_task(conn->ctask);
++ }
+ conn->ctask->state = ISCSI_TASK_RUNNING;
+ list_move_tail(conn->xmitqueue.next, &conn->run_list);
+- __iscsi_get_ctask(conn->ctask);
+- spin_unlock_bh(&conn->session->lock);
+-
+- rc = tt->xmit_cmd_task(conn, conn->ctask);
+-
+- spin_lock_bh(&conn->session->lock);
+- __iscsi_put_ctask(conn->ctask);
+- if (rc) {
+- spin_unlock_bh(&conn->session->lock);
+- goto again;
+- }
+- }
+- spin_unlock_bh(&conn->session->lock);
+- /* done with this ctask */
+- conn->ctask = NULL;
+-
+- /* process the rest control plane PDUs, if any */
+- if (unlikely(__kfifo_len(conn->mgmtqueue))) {
+- while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+- sizeof(void*))) {
+- spin_lock_bh(&conn->session->lock);
+- list_add_tail(&conn->mtask->running,
+- &conn->mgmt_run_list);
+- spin_unlock_bh(&conn->session->lock);
+- rc = iscsi_xmit_mtask(conn);
++ rc = iscsi_xmit_ctask(conn);
+ if (rc)
+ goto again;
++ /*
++ * we could continuously get new ctask requests so
++ * we need to check the mgmt queue for nops that need to
++ * be sent to aviod starvation
++ */
++ if (__kfifo_len(conn->mgmtqueue))
++ goto check_mgmt;
+ }
+- }
+-
++ spin_unlock_bh(&conn->session->lock);
+ return -ENODATA;
+
+ again:
+ if (unlikely(conn->suspend_tx))
+- return -ENODATA;
+-
++ rc = -ENODATA;
++ spin_unlock_bh(&conn->session->lock);
+ return rc;
+ }
+
+@@ -724,11 +768,9 @@
+ /*
+ * serialize Xmit worker on a per-connection basis.
+ */
+- mutex_lock(&conn->xmitmutex);
+ do {
+ rc = iscsi_data_xmit(conn);
+ } while (rc >= 0 || rc == -EAGAIN);
+- mutex_unlock(&conn->xmitmutex);
+ }
+
+ enum {
+@@ -786,20 +828,23 @@
+ goto fault;
+ }
+
+- /*
+- * Check for iSCSI window and take care of CmdSN wrap-around
+- */
+- if ((int)(session->max_cmdsn - session->cmdsn) < 0) {
+- reason = FAILURE_WINDOW_CLOSED;
+- goto reject;
+- }
+-
+ conn = session->leadconn;
+ if (!conn) {
+ reason = FAILURE_SESSION_FREED;
+ goto fault;
+ }
+
++ /*
++ * We check this here and in data xmit, because if we get to the point
++ * that this check is hitting the window then we have enough IO in
++ * flight and enough IO waiting to be transmitted it is better
++ * to let the scsi/block layer queue up.
++ */
++ if (iscsi_check_cmdsn_window_closed(conn)) {
++ reason = FAILURE_WINDOW_CLOSED;
++ goto reject;
++ }
++
+ if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask,
+ sizeof(void*))) {
+ reason = FAILURE_OOM;
+@@ -814,18 +859,8 @@
+ ctask->conn = conn;
+ ctask->sc = sc;
+ INIT_LIST_HEAD(&ctask->running);
+- ctask->total_length = sc->request_bufflen;
+- iscsi_prep_scsi_cmd_pdu(ctask);
+-
+- session->tt->init_cmd_task(ctask);
+
+ list_add_tail(&ctask->running, &conn->xmitqueue);
+- debug_scsi(
+- "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d "
+- "win %d]\n",
+- sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
+- conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen,
+- session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+ spin_unlock(&session->lock);
+
+ scsi_queue_work(host, &conn->xmitwork);
+@@ -841,7 +876,7 @@
+ printk(KERN_ERR "iscsi: cmd 0x%x is not queued (%d)\n",
+ sc->cmnd[0], reason);
+ sc->result = (DID_NO_CONNECT << 16);
+- sc->resid = sc->request_bufflen;
++ scsi_set_resid(sc, scsi_bufflen(sc));
+ sc->scsi_done(sc);
+ return 0;
+ }
+@@ -856,19 +891,16 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_change_queue_depth);
+
+-static int
+-iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
++static struct iscsi_mgmt_task *
++__iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ char *data, uint32_t data_size)
+ {
+ struct iscsi_session *session = conn->session;
+- struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
+ struct iscsi_mgmt_task *mtask;
+
+- spin_lock_bh(&session->lock);
+- if (session->state == ISCSI_STATE_TERMINATE) {
+- spin_unlock_bh(&session->lock);
+- return -EPERM;
+- }
++ if (session->state == ISCSI_STATE_TERMINATE)
++ return NULL;
++
+ if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
+ hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
+ /*
+@@ -882,27 +914,11 @@
+ BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+ BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+
+- nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
+ if (!__kfifo_get(session->mgmtpool.queue,
+- (void*)&mtask, sizeof(void*))) {
+- spin_unlock_bh(&session->lock);
+- return -ENOSPC;
+- }
++ (void*)&mtask, sizeof(void*)))
++ return NULL;
+ }
+
+- /*
+- * pre-format CmdSN for outgoing PDU.
+- */
+- if (hdr->itt != RESERVED_ITT) {
+- hdr->itt = build_itt(mtask->itt, conn->id, session->age);
+- nop->cmdsn = cpu_to_be32(session->cmdsn);
+- if (conn->c_stage == ISCSI_CONN_STARTED &&
+- !(hdr->opcode & ISCSI_OP_IMMEDIATE))
+- session->cmdsn++;
+- } else
+- /* do not advance CmdSN */
+- nop->cmdsn = cpu_to_be32(session->cmdsn);
+-
+ if (data_size) {
+ memcpy(mtask->data, data, data_size);
+ mtask->data_count = data_size;
+@@ -911,38 +927,23 @@
+
+ INIT_LIST_HEAD(&mtask->running);
+ memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr));
+- if (session->tt->init_mgmt_task)
+- session->tt->init_mgmt_task(conn, mtask, data, data_size);
+- spin_unlock_bh(&session->lock);
+-
+- debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
+- hdr->opcode, hdr->itt, data_size);
+-
+- /*
+- * since send_pdu() could be called at least from two contexts,
+- * we need to serialize __kfifo_put, so we don't have to take
+- * additional lock on fast data-path
+- */
+- if (hdr->opcode & ISCSI_OP_IMMEDIATE)
+- __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*));
+- else
+ __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*));
+-
+- scsi_queue_work(session->host, &conn->xmitwork);
+- return 0;
++ return mtask;
+ }
+
+ int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
+ char *data, uint32_t data_size)
+ {
+ struct iscsi_conn *conn = cls_conn->dd_data;
+- int rc;
+-
+- mutex_lock(&conn->xmitmutex);
+- rc = iscsi_conn_send_generic(conn, hdr, data, data_size);
+- mutex_unlock(&conn->xmitmutex);
++ struct iscsi_session *session = conn->session;
++ int err = 0;
+
+- return rc;
++ spin_lock_bh(&session->lock);
++ if (!__iscsi_conn_send_pdu(conn, hdr, data, data_size))
++ err = -EPERM;
++ spin_unlock_bh(&session->lock);
++ scsi_queue_work(session->host, &conn->xmitwork);
++ return err;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu);
+
+@@ -1027,14 +1028,12 @@
+ spin_unlock(&session->lock);
+ }
+
+-/* must be called with the mutex lock */
+ static int iscsi_exec_abort_task(struct scsi_cmnd *sc,
+ struct iscsi_cmd_task *ctask)
+ {
+ struct iscsi_conn *conn = ctask->conn;
+ struct iscsi_session *session = conn->session;
+ struct iscsi_tm *hdr = &conn->tmhdr;
+- int rc;
+
+ /*
+ * ctask timed out but session is OK requests must be serialized.
+@@ -1047,32 +1046,27 @@
+ hdr->rtt = ctask->hdr->itt;
+ hdr->refcmdsn = ctask->hdr->cmdsn;
+
+- rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr,
++ ctask->mtask = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)hdr,
+ NULL, 0);
+- if (rc) {
++ if (!ctask->mtask) {
+ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+- debug_scsi("abort sent failure [itt 0x%x] %d\n", ctask->itt,
+- rc);
+- return rc;
++ debug_scsi("abort sent failure [itt 0x%x]\n", ctask->itt);
++ return -EPERM;
+ }
++ ctask->state = ISCSI_TASK_ABORTING;
+
+ debug_scsi("abort sent [itt 0x%x]\n", ctask->itt);
+
+- spin_lock_bh(&session->lock);
+- ctask->mtask = (struct iscsi_mgmt_task *)
+- session->mgmt_cmds[get_itt(hdr->itt) -
+- ISCSI_MGMT_ITT_OFFSET];
+-
+ if (conn->tmabort_state == TMABORT_INITIAL) {
+ conn->tmfcmd_pdus_cnt++;
+- conn->tmabort_timer.expires = 10*HZ + jiffies;
++ conn->tmabort_timer.expires = 20*HZ + jiffies;
+ conn->tmabort_timer.function = iscsi_tmabort_timedout;
+ conn->tmabort_timer.data = (unsigned long)ctask;
+ add_timer(&conn->tmabort_timer);
+ debug_scsi("abort set timeout [itt 0x%x]\n", ctask->itt);
+ }
+ spin_unlock_bh(&session->lock);
+- mutex_unlock(&conn->xmitmutex);
++ scsi_queue_work(session->host, &conn->xmitwork);
+
+ /*
+ * block eh thread until:
+@@ -1089,13 +1083,12 @@
+ if (signal_pending(current))
+ flush_signals(current);
+ del_timer_sync(&conn->tmabort_timer);
+-
+- mutex_lock(&conn->xmitmutex);
++ spin_lock_bh(&session->lock);
+ return 0;
+ }
+
+ /*
+- * xmit mutex and session lock must be held
++ * session lock must be held
+ */
+ static struct iscsi_mgmt_task *
+ iscsi_remove_mgmt_task(struct kfifo *fifo, uint32_t itt)
+@@ -1127,7 +1120,7 @@
+ if (!ctask->mtask)
+ return -EINVAL;
+
+- if (!iscsi_remove_mgmt_task(conn->immqueue, ctask->mtask->itt))
++ if (!iscsi_remove_mgmt_task(conn->mgmtqueue, ctask->mtask->itt))
+ list_del(&ctask->mtask->running);
+ __kfifo_put(session->mgmtpool.queue, (void*)&ctask->mtask,
+ sizeof(void*));
+@@ -1136,7 +1129,7 @@
+ }
+
+ /*
+- * session lock and xmitmutex must be held
++ * session lock must be held
+ */
+ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
+ int err)
+@@ -1147,11 +1140,14 @@
+ if (!sc)
+ return;
+
++ if (ctask->state != ISCSI_TASK_PENDING)
+ conn->session->tt->cleanup_cmd_task(conn, ctask);
+ iscsi_ctask_mtask_cleanup(ctask);
+
+ sc->result = err;
+- sc->resid = sc->request_bufflen;
++ scsi_set_resid(sc, scsi_bufflen(sc));
++ if (conn->ctask == ctask)
++ conn->ctask = NULL;
+ /* release ref from queuecommand */
+ __iscsi_put_ctask(ctask);
+ }
+@@ -1179,7 +1175,6 @@
+ conn->eh_abort_cnt++;
+ debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt);
+
+- mutex_lock(&conn->xmitmutex);
+ spin_lock_bh(&session->lock);
+
+ /*
+@@ -1192,9 +1187,8 @@
+
+ /* ctask completed before time out */
+ if (!ctask->sc) {
+- spin_unlock_bh(&session->lock);
+ debug_scsi("sc completed while abort in progress\n");
+- goto success_rel_mutex;
++ goto success;
+ }
+
+ /* what should we do here ? */
+@@ -1204,15 +1198,13 @@
+ goto failed;
+ }
+
+- if (ctask->state == ISCSI_TASK_PENDING)
+- goto success_cleanup;
++ if (ctask->state == ISCSI_TASK_PENDING) {
++ fail_command(conn, ctask, DID_ABORT << 16);
++ goto success;
++ }
+
+ conn->tmabort_state = TMABORT_INITIAL;
+-
+- spin_unlock_bh(&session->lock);
+ rc = iscsi_exec_abort_task(sc, ctask);
+- spin_lock_bh(&session->lock);
+-
+ if (rc || sc->SCp.phase != session->age ||
+ session->state != ISCSI_STATE_LOGGED_IN)
+ goto failed;
+@@ -1220,45 +1212,44 @@
+
+ switch (conn->tmabort_state) {
+ case TMABORT_SUCCESS:
+- goto success_cleanup;
++ spin_unlock_bh(&session->lock);
++ /*
++ * clean up task if aborted. grab the recv lock as a writer
++ */
++ write_lock_bh(conn->recv_lock);
++ spin_lock(&session->lock);
++ fail_command(conn, ctask, DID_ABORT << 16);
++ spin_unlock(&session->lock);
++ write_unlock_bh(conn->recv_lock);
++ /*
++ * make sure xmit thread is not still touching the
++ * ctask/scsi_cmnd
++ */
++ scsi_flush_work(session->host);
++ goto success_unlocked;
+ case TMABORT_NOT_FOUND:
+ if (!ctask->sc) {
+ /* ctask completed before tmf abort response */
+- spin_unlock_bh(&session->lock);
+ debug_scsi("sc completed while abort in progress\n");
+- goto success_rel_mutex;
++ goto success;
+ }
+ /* fall through */
+ default:
+ /* timedout or failed */
+ spin_unlock_bh(&session->lock);
+ iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+- spin_lock_bh(&session->lock);
+- goto failed;
++ goto failed_unlocked;
+ }
+
+-success_cleanup:
+- debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++success:
+ spin_unlock_bh(&session->lock);
+-
+- /*
+- * clean up task if aborted. we have the xmitmutex so grab
+- * the recv lock as a writer
+- */
+- write_lock_bh(conn->recv_lock);
+- spin_lock(&session->lock);
+- fail_command(conn, ctask, DID_ABORT << 16);
+- spin_unlock(&session->lock);
+- write_unlock_bh(conn->recv_lock);
+-
+-success_rel_mutex:
+- mutex_unlock(&conn->xmitmutex);
++success_unlocked:
++ debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+ return SUCCESS;
+
+ failed:
+ spin_unlock_bh(&session->lock);
+- mutex_unlock(&conn->xmitmutex);
+-
++failed_unlocked:
+ debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+ return FAILED;
+ }
+@@ -1339,6 +1330,10 @@
+ * iscsi_session_setup - create iscsi cls session and host and session
+ * @scsit: scsi transport template
+ * @iscsit: iscsi transport template
++ * @cmds_max: scsi host can queue
++ * @qdepth: scsi host cmds per lun
++ * @cmd_task_size: LLD ctask private data size
++ * @mgmt_task_size: LLD mtask private data size
+ * @initial_cmdsn: initial CmdSN
+ * @hostno: host no allocated
+ *
+@@ -1348,6 +1343,7 @@
+ struct iscsi_cls_session *
+ iscsi_session_setup(struct iscsi_transport *iscsit,
+ struct scsi_transport_template *scsit,
++ uint16_t cmds_max, uint16_t qdepth,
+ int cmd_task_size, int mgmt_task_size,
+ uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+@@ -1356,11 +1352,32 @@
+ struct iscsi_cls_session *cls_session;
+ int cmd_i;
+
++ if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) {
++ if (qdepth != 0)
++ printk(KERN_ERR "iscsi: invalid queue depth of %d. "
++ "Queue depth must be between 1 and %d.\n",
++ qdepth, ISCSI_MAX_CMD_PER_LUN);
++ qdepth = ISCSI_DEF_CMD_PER_LUN;
++ }
++
++ if (cmds_max < 2 || (cmds_max & (cmds_max - 1)) ||
++ cmds_max >= ISCSI_MGMT_ITT_OFFSET) {
++ if (cmds_max != 0)
++ printk(KERN_ERR "iscsi: invalid can_queue of %d. "
++ "can_queue must be a power of 2 and between "
++ "2 and %d - setting to %d.\n", cmds_max,
++ ISCSI_MGMT_ITT_OFFSET, ISCSI_DEF_XMIT_CMDS_MAX);
++ cmds_max = ISCSI_DEF_XMIT_CMDS_MAX;
++ }
++
+ shost = scsi_host_alloc(iscsit->host_template,
+ hostdata_privsize(sizeof(*session)));
+ if (!shost)
+ return NULL;
+
++ /* the iscsi layer takes one task for reserve */
++ shost->can_queue = cmds_max - 1;
++ shost->cmd_per_lun = qdepth;
+ shost->max_id = 1;
+ shost->max_channel = 0;
+ shost->max_lun = iscsit->max_lun;
+@@ -1374,7 +1391,7 @@
+ session->host = shost;
+ session->state = ISCSI_STATE_FREE;
+ session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX;
+- session->cmds_max = ISCSI_XMIT_CMDS_MAX;
++ session->cmds_max = cmds_max;
+ session->cmdsn = initial_cmdsn;
+ session->exp_cmdsn = initial_cmdsn + 1;
+ session->max_cmdsn = initial_cmdsn + 1;
+@@ -1461,7 +1478,14 @@
+ iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+ iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
+
++ kfree(session->password);
++ kfree(session->password_in);
++ kfree(session->username);
++ kfree(session->username_in);
+ kfree(session->targetname);
++ kfree(session->netdev);
++ kfree(session->hwaddress);
++ kfree(session->initiatorname);
+
+ iscsi_destroy_session(cls_session);
+ scsi_host_put(shost);
+@@ -1499,11 +1523,6 @@
+ INIT_LIST_HEAD(&conn->xmitqueue);
+
+ /* initialize general immediate & non-immediate PDU commands queue */
+- conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+- GFP_KERNEL, NULL);
+- if (conn->immqueue == ERR_PTR(-ENOMEM))
+- goto immqueue_alloc_fail;
+-
+ conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+ GFP_KERNEL, NULL);
+ if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
+@@ -1527,7 +1546,6 @@
+ conn->login_mtask->data = conn->data = data;
+
+ init_timer(&conn->tmabort_timer);
+- mutex_init(&conn->xmitmutex);
+ init_waitqueue_head(&conn->ehwait);
+
+ return cls_conn;
+@@ -1538,8 +1556,6 @@
+ login_mtask_alloc_fail:
+ kfifo_free(conn->mgmtqueue);
+ mgmtqueue_alloc_fail:
+- kfifo_free(conn->immqueue);
+-immqueue_alloc_fail:
+ iscsi_destroy_conn(cls_conn);
+ return NULL;
+ }
+@@ -1558,10 +1574,8 @@
+ struct iscsi_session *session = conn->session;
+ unsigned long flags;
+
+- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+- mutex_lock(&conn->xmitmutex);
+-
+ spin_lock_bh(&session->lock);
++ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+ conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
+ if (session->leadconn == conn) {
+ /*
+@@ -1572,8 +1586,6 @@
+ }
+ spin_unlock_bh(&session->lock);
+
+- mutex_unlock(&conn->xmitmutex);
+-
+ /*
+ * Block until all in-progress commands for this connection
+ * time out or fail.
+@@ -1610,7 +1622,6 @@
+ }
+ spin_unlock_bh(&session->lock);
+
+- kfifo_free(conn->immqueue);
+ kfifo_free(conn->mgmtqueue);
+
+ iscsi_destroy_conn(cls_conn);
+@@ -1671,8 +1682,7 @@
+ struct iscsi_mgmt_task *mtask, *tmp;
+
+ /* handle pending */
+- while (__kfifo_get(conn->immqueue, (void*)&mtask, sizeof(void*)) ||
+- __kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) {
++ while (__kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) {
+ if (mtask == conn->login_mtask)
+ continue;
+ debug_scsi("flushing pending mgmt task itt 0x%x\n", mtask->itt);
+@@ -1742,12 +1752,12 @@
+ conn->c_stage = ISCSI_CONN_STOPPED;
+ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+ spin_unlock_bh(&session->lock);
++ scsi_flush_work(session->host);
+
+ write_lock_bh(conn->recv_lock);
+ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+ write_unlock_bh(conn->recv_lock);
+
+- mutex_lock(&conn->xmitmutex);
+ /*
+ * for connection level recovery we should not calculate
+ * header digest. conn->hdr_size used for optimization
+@@ -1771,8 +1781,6 @@
+ fail_all_commands(conn);
+ flush_control_queues(session, conn);
+ spin_unlock_bh(&session->lock);
+-
+- mutex_unlock(&conn->xmitmutex);
+ }
+
+ void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
+@@ -1867,6 +1875,30 @@
+ case ISCSI_PARAM_EXP_STATSN:
+ sscanf(buf, "%u", &conn->exp_statsn);
+ break;
++ case ISCSI_PARAM_USERNAME:
++ kfree(session->username);
++ session->username = kstrdup(buf, GFP_KERNEL);
++ if (!session->username)
++ return -ENOMEM;
++ break;
++ case ISCSI_PARAM_USERNAME_IN:
++ kfree(session->username_in);
++ session->username_in = kstrdup(buf, GFP_KERNEL);
++ if (!session->username_in)
++ return -ENOMEM;
++ break;
++ case ISCSI_PARAM_PASSWORD:
++ kfree(session->password);
++ session->password = kstrdup(buf, GFP_KERNEL);
++ if (!session->password)
++ return -ENOMEM;
++ break;
++ case ISCSI_PARAM_PASSWORD_IN:
++ kfree(session->password_in);
++ session->password_in = kstrdup(buf, GFP_KERNEL);
++ if (!session->password_in)
++ return -ENOMEM;
++ break;
+ case ISCSI_PARAM_TARGET_NAME:
+ /* this should not change between logins */
+ if (session->targetname)
+@@ -1940,6 +1972,18 @@
+ case ISCSI_PARAM_TPGT:
+ len = sprintf(buf, "%d\n", session->tpgt);
+ break;
++ case ISCSI_PARAM_USERNAME:
++ len = sprintf(buf, "%s\n", session->username);
++ break;
++ case ISCSI_PARAM_USERNAME_IN:
++ len = sprintf(buf, "%s\n", session->username_in);
++ break;
++ case ISCSI_PARAM_PASSWORD:
++ len = sprintf(buf, "%s\n", session->password);
++ break;
++ case ISCSI_PARAM_PASSWORD_IN:
++ len = sprintf(buf, "%s\n", session->password_in);
++ break;
+ default:
+ return -ENOSYS;
+ }
+@@ -1990,6 +2034,66 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_get_param);
+
++int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++ char *buf)
++{
++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++ int len;
++
++ switch (param) {
++ case ISCSI_HOST_PARAM_NETDEV_NAME:
++ if (!session->netdev)
++ len = sprintf(buf, "%s\n", "default");
++ else
++ len = sprintf(buf, "%s\n", session->netdev);
++ break;
++ case ISCSI_HOST_PARAM_HWADDRESS:
++ if (!session->hwaddress)
++ len = sprintf(buf, "%s\n", "default");
++ else
++ len = sprintf(buf, "%s\n", session->hwaddress);
++ break;
++ case ISCSI_HOST_PARAM_INITIATOR_NAME:
++ if (!session->initiatorname)
++ len = sprintf(buf, "%s\n", "unknown");
++ else
++ len = sprintf(buf, "%s\n", session->initiatorname);
++ break;
++
++ default:
++ return -ENOSYS;
++ }
++
++ return len;
++}
++EXPORT_SYMBOL_GPL(iscsi_host_get_param);
++
++int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++ char *buf, int buflen)
++{
++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++
++ switch (param) {
++ case ISCSI_HOST_PARAM_NETDEV_NAME:
++ if (!session->netdev)
++ session->netdev = kstrdup(buf, GFP_KERNEL);
++ break;
++ case ISCSI_HOST_PARAM_HWADDRESS:
++ if (!session->hwaddress)
++ session->hwaddress = kstrdup(buf, GFP_KERNEL);
++ break;
++ case ISCSI_HOST_PARAM_INITIATOR_NAME:
++ if (!session->initiatorname)
++ session->initiatorname = kstrdup(buf, GFP_KERNEL);
++ break;
++ default:
++ return -ENOSYS;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(iscsi_host_set_param);
++
+ MODULE_AUTHOR("Mike Christie");
+ MODULE_DESCRIPTION("iSCSI library functions");
+ MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c
+--- linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c 2007-12-21 15:36:12.000000000 -0500
+@@ -38,8 +38,10 @@
+
+ #if 0
+ /* FIXME: smp needs to migrate into the sas class */
+-static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t);
+-static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t);
++static ssize_t smp_portal_read(struct kobject *, struct bin_attribute *,
++ char *, loff_t, size_t);
++static ssize_t smp_portal_write(struct kobject *, struct bin_attribute *,
++ char *, loff_t, size_t);
+ #endif
+
+ /* ---------- SMP task management ---------- */
+@@ -1368,7 +1370,6 @@
+ memset(bin_attr, 0, sizeof(*bin_attr));
+
+ bin_attr->attr.name = SMP_BIN_ATTR_NAME;
+- bin_attr->attr.owner = THIS_MODULE;
+ bin_attr->attr.mode = 0600;
+
+ bin_attr->size = 0;
+@@ -1846,8 +1847,9 @@
+ #if 0
+ /* ---------- SMP portal ---------- */
+
+-static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs,
+- size_t size)
++static ssize_t smp_portal_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t offs, size_t size)
+ {
+ struct domain_device *dev = to_dom_device(kobj);
+ struct expander_device *ex = &dev->ex_dev;
+@@ -1873,8 +1875,9 @@
+ return size;
+ }
+
+-static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs,
+- size_t size)
++static ssize_t smp_portal_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t offs, size_t size)
+ {
+ struct domain_device *dev = to_dom_device(kobj);
+ struct expander_device *ex = &dev->ex_dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c
+--- linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c 2007-12-21 15:36:12.000000000 -0500
+@@ -40,6 +40,7 @@
+
+ #include <linux/err.h>
+ #include <linux/blkdev.h>
++#include <linux/freezer.h>
+ #include <linux/scatterlist.h>
+
+ /* ---------- SCSI Host glue ---------- */
+@@ -76,8 +77,8 @@
+ hs = DID_NO_CONNECT;
+ break;
+ case SAS_DATA_UNDERRUN:
+- sc->resid = ts->residual;
+- if (sc->request_bufflen - sc->resid < sc->underflow)
++ scsi_set_resid(sc, ts->residual);
++ if (scsi_bufflen(sc) - scsi_get_resid(sc) < sc->underflow)
+ hs = DID_ERROR;
+ break;
+ case SAS_DATA_OVERRUN:
+@@ -161,9 +162,9 @@
+ task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
+ memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
+
+- task->scatter = cmd->request_buffer;
+- task->num_scatter = cmd->use_sg;
+- task->total_xfer_len = cmd->request_bufflen;
++ task->scatter = scsi_sglist(cmd);
++ task->num_scatter = scsi_sg_count(cmd);
++ task->total_xfer_len = scsi_bufflen(cmd);
+ task->data_dir = cmd->sc_data_direction;
+
+ task->task_done = sas_scsi_task_done;
+@@ -868,8 +869,6 @@
+ {
+ struct sas_ha_struct *sas_ha = _sas_ha;
+
+- current->flags |= PF_NOFREEZE;
+-
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/Makefile linux-2.6.22-591/drivers/scsi/lpfc/Makefile
+--- linux-2.6.22-570/drivers/scsi/lpfc/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -1,7 +1,7 @@
+ #/*******************************************************************
+ # * This file is part of the Emulex Linux Device Driver for *
+ # * Fibre Channel Host Bus Adapters. *
+-# * Copyright (C) 2004-2005 Emulex. All rights reserved. *
++# * Copyright (C) 2004-2006 Emulex. All rights reserved. *
+ # * EMULEX and SLI are trademarks of Emulex. *
+ # * www.emulex.com *
+ # * *
+@@ -27,4 +27,5 @@
+ obj-$(CONFIG_SCSI_LPFC) := lpfc.o
+
+ lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o \
+- lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o
++ lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \
++ lpfc_vport.o lpfc_debugfs.o
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -19,8 +19,9 @@
+ * included with this package. *
+ *******************************************************************/
+
+-struct lpfc_sli2_slim;
++#include <scsi/scsi_host.h>
+
++struct lpfc_sli2_slim;
+
+ #define LPFC_MAX_TARGET 256 /* max number of targets supported */
+ #define LPFC_MAX_DISC_THREADS 64 /* max outstanding discovery els
+@@ -32,6 +33,20 @@
+ #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */
+ #define LPFC_Q_RAMP_UP_INTERVAL 120 /* lun q_depth ramp up interval */
+
++/*
++ * Following time intervals are used of adjusting SCSI device
++ * queue depths when there are driver resource error or Firmware
++ * resource error.
++ */
++#define QUEUE_RAMP_DOWN_INTERVAL (1 * HZ) /* 1 Second */
++#define QUEUE_RAMP_UP_INTERVAL (300 * HZ) /* 5 minutes */
++
++/* Number of exchanges reserved for discovery to complete */
++#define LPFC_DISC_IOCB_BUFF_COUNT 20
++
++#define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */
++#define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */
++
+ /* Define macros for 64 bit support */
+ #define putPaddrLow(addr) ((uint32_t) (0xffffffff & (u64)(addr)))
+ #define putPaddrHigh(addr) ((uint32_t) (0xffffffff & (((u64)(addr))>>32)))
+@@ -61,6 +76,11 @@
+ uint32_t current_count;
+ };
+
++struct hbq_dmabuf {
++ struct lpfc_dmabuf dbuf;
++ uint32_t tag;
++};
++
+ /* Priority bit. Set value to exceed low water mark in lpfc_mem. */
+ #define MEM_PRI 0x100
+
+@@ -90,6 +110,29 @@
+ uint32_t sli2FwRev;
+ uint8_t sli2FwName[16];
+ } rev;
++ struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd2 :24; /* Reserved */
++ uint32_t cmv : 1; /* Configure Max VPIs */
++ uint32_t ccrp : 1; /* Config Command Ring Polling */
++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */
++ uint32_t chbs : 1; /* Cofigure Host Backing store */
++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */
++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */
++ uint32_t cmx : 1; /* Configure Max XRIs */
++ uint32_t cmr : 1; /* Configure Max RPIs */
++#else /* __LITTLE_ENDIAN */
++ uint32_t cmr : 1; /* Configure Max RPIs */
++ uint32_t cmx : 1; /* Configure Max XRIs */
++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */
++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */
++ uint32_t chbs : 1; /* Cofigure Host Backing store */
++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */
++ uint32_t ccrp : 1; /* Config Command Ring Polling */
++ uint32_t cmv : 1; /* Configure Max VPIs */
++ uint32_t rsvd2 :24; /* Reserved */
++#endif
++ } sli3Feat;
+ } lpfc_vpd_t;
+
+ struct lpfc_scsi_buf;
+@@ -122,6 +165,7 @@
+ uint32_t elsRcvRPS;
+ uint32_t elsRcvRPL;
+ uint32_t elsXmitFLOGI;
++ uint32_t elsXmitFDISC;
+ uint32_t elsXmitPLOGI;
+ uint32_t elsXmitPRLI;
+ uint32_t elsXmitADISC;
+@@ -165,70 +209,53 @@
+ struct lpfcMboxq * mbox;
+ };
+
+-struct lpfc_hba {
+- struct lpfc_sli sli;
+- struct lpfc_sli2_slim *slim2p;
+- dma_addr_t slim2p_mapping;
+- uint16_t pci_cfg_value;
++struct lpfc_hba;
+
+- int32_t hba_state;
+
+-#define LPFC_STATE_UNKNOWN 0 /* HBA state is unknown */
+-#define LPFC_WARM_START 1 /* HBA state after selective reset */
+-#define LPFC_INIT_START 2 /* Initial state after board reset */
+-#define LPFC_INIT_MBX_CMDS 3 /* Initialize HBA with mbox commands */
+-#define LPFC_LINK_DOWN 4 /* HBA initialized, link is down */
+-#define LPFC_LINK_UP 5 /* Link is up - issue READ_LA */
+-#define LPFC_LOCAL_CFG_LINK 6 /* local NPORT Id configured */
+-#define LPFC_FLOGI 7 /* FLOGI sent to Fabric */
+-#define LPFC_FABRIC_CFG_LINK 8 /* Fabric assigned NPORT Id
+- configured */
+-#define LPFC_NS_REG 9 /* Register with NameServer */
+-#define LPFC_NS_QRY 10 /* Query NameServer for NPort ID list */
+-#define LPFC_BUILD_DISC_LIST 11 /* Build ADISC and PLOGI lists for
++enum discovery_state {
++ LPFC_VPORT_UNKNOWN = 0, /* vport state is unknown */
++ LPFC_VPORT_FAILED = 1, /* vport has failed */
++ LPFC_LOCAL_CFG_LINK = 6, /* local NPORT Id configured */
++ LPFC_FLOGI = 7, /* FLOGI sent to Fabric */
++ LPFC_FDISC = 8, /* FDISC sent for vport */
++ LPFC_FABRIC_CFG_LINK = 9, /* Fabric assigned NPORT Id
++ * configured */
++ LPFC_NS_REG = 10, /* Register with NameServer */
++ LPFC_NS_QRY = 11, /* Query NameServer for NPort ID list */
++ LPFC_BUILD_DISC_LIST = 12, /* Build ADISC and PLOGI lists for
+ * device authentication / discovery */
+-#define LPFC_DISC_AUTH 12 /* Processing ADISC list */
+-#define LPFC_CLEAR_LA 13 /* authentication cmplt - issue
+- CLEAR_LA */
+-#define LPFC_HBA_READY 32
+-#define LPFC_HBA_ERROR -1
++ LPFC_DISC_AUTH = 13, /* Processing ADISC list */
++ LPFC_VPORT_READY = 32,
++};
+
+- int32_t stopped; /* HBA has not been restarted since last ERATT */
+- uint8_t fc_linkspeed; /* Link speed after last READ_LA */
++enum hba_state {
++ LPFC_LINK_UNKNOWN = 0, /* HBA state is unknown */
++ LPFC_WARM_START = 1, /* HBA state after selective reset */
++ LPFC_INIT_START = 2, /* Initial state after board reset */
++ LPFC_INIT_MBX_CMDS = 3, /* Initialize HBA with mbox commands */
++ LPFC_LINK_DOWN = 4, /* HBA initialized, link is down */
++ LPFC_LINK_UP = 5, /* Link is up - issue READ_LA */
++ LPFC_CLEAR_LA = 6, /* authentication cmplt - issue
++ * CLEAR_LA */
++ LPFC_HBA_READY = 32,
++ LPFC_HBA_ERROR = -1
++};
+
+- uint32_t fc_eventTag; /* event tag for link attention */
+- uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */
++struct lpfc_vport {
++ struct list_head listentry;
++ struct lpfc_hba *phba;
++ uint8_t port_type;
++#define LPFC_PHYSICAL_PORT 1
++#define LPFC_NPIV_PORT 2
++#define LPFC_FABRIC_PORT 3
++ enum discovery_state port_state;
+
+- uint32_t num_disc_nodes; /*in addition to hba_state */
++ uint16_t vpi;
+
+- struct timer_list fc_estabtmo; /* link establishment timer */
+- struct timer_list fc_disctmo; /* Discovery rescue timer */
+- struct timer_list fc_fdmitmo; /* fdmi timer */
+- /* These fields used to be binfo */
+- struct lpfc_name fc_nodename; /* fc nodename */
+- struct lpfc_name fc_portname; /* fc portname */
+- uint32_t fc_pref_DID; /* preferred D_ID */
+- uint8_t fc_pref_ALPA; /* preferred AL_PA */
+- uint32_t fc_edtov; /* E_D_TOV timer value */
+- uint32_t fc_arbtov; /* ARB_TOV timer value */
+- uint32_t fc_ratov; /* R_A_TOV timer value */
+- uint32_t fc_rttov; /* R_T_TOV timer value */
+- uint32_t fc_altov; /* AL_TOV timer value */
+- uint32_t fc_crtov; /* C_R_TOV timer value */
+- uint32_t fc_citov; /* C_I_TOV timer value */
+- uint32_t fc_myDID; /* fibre channel S_ID */
+- uint32_t fc_prevDID; /* previous fibre channel S_ID */
+-
+- struct serv_parm fc_sparam; /* buffer for our service parameters */
+- struct serv_parm fc_fabparam; /* fabric service parameters buffer */
+- uint8_t alpa_map[128]; /* AL_PA map from READ_LA */
+-
+- uint8_t fc_ns_retry; /* retries for fabric nameserver */
+- uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */
+- uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */
+- struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
+- uint32_t lmt;
+ uint32_t fc_flag; /* FC flags */
++/* Several of these flags are HBA centric and should be moved to
++ * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP)
++ */
+ #define FC_PT2PT 0x1 /* pt2pt with no fabric */
+ #define FC_PT2PT_PLOGI 0x2 /* pt2pt initiate PLOGI */
+ #define FC_DISC_TMO 0x4 /* Discovery timer running */
+@@ -239,22 +266,14 @@
+ #define FC_OFFLINE_MODE 0x80 /* Interface is offline for diag */
+ #define FC_FABRIC 0x100 /* We are fabric attached */
+ #define FC_ESTABLISH_LINK 0x200 /* Reestablish Link */
+-#define FC_RSCN_DISCOVERY 0x400 /* Authenticate all devices after RSCN*/
+-#define FC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */
+-#define FC_LOADING 0x1000 /* HBA in process of loading drvr */
+-#define FC_UNLOADING 0x2000 /* HBA in process of unloading drvr */
++#define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */
+ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */
+ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */
+ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */
+ #define FC_BYPASSED_MODE 0x20000 /* NPort is in bypassed mode */
+-#define FC_LOOPBACK_MODE 0x40000 /* NPort is in Loopback mode */
+- /* This flag is set while issuing */
+- /* INIT_LINK mailbox command */
+-#define FC_IGNORE_ERATT 0x80000 /* intr handler should ignore ERATT */
+-
+- uint32_t fc_topology; /* link topology, from LINK INIT */
+-
+- struct lpfc_stats fc_stat;
++#define FC_RFF_NOT_SUPPORTED 0x40000 /* RFF_ID was rejected by switch */
++#define FC_VPORT_NEEDS_REG_VPI 0x80000 /* Needs to have its vpi registered */
++#define FC_RSCN_DEFERRED 0x100000 /* A deferred RSCN being processed */
+
+ struct list_head fc_nodes;
+
+@@ -267,10 +286,131 @@
+ uint16_t fc_map_cnt;
+ uint16_t fc_npr_cnt;
+ uint16_t fc_unused_cnt;
++ struct serv_parm fc_sparam; /* buffer for our service parameters */
++
++ uint32_t fc_myDID; /* fibre channel S_ID */
++ uint32_t fc_prevDID; /* previous fibre channel S_ID */
++
++ int32_t stopped; /* HBA has not been restarted since last ERATT */
++ uint8_t fc_linkspeed; /* Link speed after last READ_LA */
++
++ uint32_t num_disc_nodes; /*in addition to hba_state */
++
++ uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */
++ uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */
++ struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
++ struct lpfc_name fc_nodename; /* fc nodename */
++ struct lpfc_name fc_portname; /* fc portname */
++
++ struct lpfc_work_evt disc_timeout_evt;
++
++ struct timer_list fc_disctmo; /* Discovery rescue timer */
++ uint8_t fc_ns_retry; /* retries for fabric nameserver */
++ uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */
++
++ spinlock_t work_port_lock;
++ uint32_t work_port_events; /* Timeout to be handled */
++#define WORKER_DISC_TMO 0x1 /* vport: Discovery timeout */
++#define WORKER_ELS_TMO 0x2 /* vport: ELS timeout */
++#define WORKER_FDMI_TMO 0x4 /* vport: FDMI timeout */
++
++#define WORKER_MBOX_TMO 0x100 /* hba: MBOX timeout */
++#define WORKER_HB_TMO 0x200 /* hba: Heart beat timeout */
++#define WORKER_FABRIC_BLOCK_TMO 0x400 /* hba: fabric block timout */
++#define WORKER_RAMP_DOWN_QUEUE 0x800 /* hba: Decrease Q depth */
++#define WORKER_RAMP_UP_QUEUE 0x1000 /* hba: Increase Q depth */
++
++ struct timer_list fc_fdmitmo;
++ struct timer_list els_tmofunc;
++
++ int unreg_vpi_cmpl;
++
++ uint8_t load_flag;
++#define FC_LOADING 0x1 /* HBA in process of loading drvr */
++#define FC_UNLOADING 0x2 /* HBA in process of unloading drvr */
++ char *vname; /* Application assigned name */
++ struct fc_vport *fc_vport;
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++ struct dentry *debug_disc_trc;
++ struct dentry *debug_nodelist;
++ struct dentry *vport_debugfs_root;
++ struct lpfc_disc_trc *disc_trc;
++ atomic_t disc_trc_cnt;
++#endif
++};
++
++struct hbq_s {
++ uint16_t entry_count; /* Current number of HBQ slots */
++ uint32_t next_hbqPutIdx; /* Index to next HBQ slot to use */
++ uint32_t hbqPutIdx; /* HBQ slot to use */
++ uint32_t local_hbqGetIdx; /* Local copy of Get index from Port */
++};
++
++#define LPFC_MAX_HBQS 16
++/* this matches the possition in the lpfc_hbq_defs array */
++#define LPFC_ELS_HBQ 0
++
++struct lpfc_hba {
++ struct lpfc_sli sli;
++ uint32_t sli_rev; /* SLI2 or SLI3 */
++ uint32_t sli3_options; /* Mask of enabled SLI3 options */
++#define LPFC_SLI3_ENABLED 0x01
++#define LPFC_SLI3_HBQ_ENABLED 0x02
++#define LPFC_SLI3_NPIV_ENABLED 0x04
++#define LPFC_SLI3_VPORT_TEARDOWN 0x08
++ uint32_t iocb_cmd_size;
++ uint32_t iocb_rsp_size;
++
++ enum hba_state link_state;
++ uint32_t link_flag; /* link state flags */
++#define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */
++ /* This flag is set while issuing */
++ /* INIT_LINK mailbox command */
++#define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */
++#define LS_IGNORE_ERATT 0x3 /* intr handler should ignore ERATT */
++
++ struct lpfc_sli2_slim *slim2p;
++ struct lpfc_dmabuf hbqslimp;
++
++ dma_addr_t slim2p_mapping;
++
++ uint16_t pci_cfg_value;
++
++ uint8_t work_found;
++#define LPFC_MAX_WORKER_ITERATION 4
++
++ uint8_t fc_linkspeed; /* Link speed after last READ_LA */
++
++ uint32_t fc_eventTag; /* event tag for link attention */
++
++
++ struct timer_list fc_estabtmo; /* link establishment timer */
++ /* These fields used to be binfo */
++ uint32_t fc_pref_DID; /* preferred D_ID */
++ uint8_t fc_pref_ALPA; /* preferred AL_PA */
++ uint32_t fc_edtov; /* E_D_TOV timer value */
++ uint32_t fc_arbtov; /* ARB_TOV timer value */
++ uint32_t fc_ratov; /* R_A_TOV timer value */
++ uint32_t fc_rttov; /* R_T_TOV timer value */
++ uint32_t fc_altov; /* AL_TOV timer value */
++ uint32_t fc_crtov; /* C_R_TOV timer value */
++ uint32_t fc_citov; /* C_I_TOV timer value */
++
++ struct serv_parm fc_fabparam; /* fabric service parameters buffer */
++ uint8_t alpa_map[128]; /* AL_PA map from READ_LA */
++
++ uint32_t lmt;
++
++ uint32_t fc_topology; /* link topology, from LINK INIT */
++
++ struct lpfc_stats fc_stat;
++
+ struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */
+ uint32_t nport_event_cnt; /* timestamp for nlplist entry */
+
+- uint32_t wwnn[2];
++ uint8_t wwnn[8];
++ uint8_t wwpn[8];
+ uint32_t RandomData[7];
+
+ uint32_t cfg_log_verbose;
+@@ -278,6 +418,9 @@
+ uint32_t cfg_nodev_tmo;
+ uint32_t cfg_devloss_tmo;
+ uint32_t cfg_hba_queue_depth;
++ uint32_t cfg_peer_port_login;
++ uint32_t cfg_vport_restrict_login;
++ uint32_t cfg_npiv_enable;
+ uint32_t cfg_fcp_class;
+ uint32_t cfg_use_adisc;
+ uint32_t cfg_ack0;
+@@ -304,22 +447,20 @@
+
+ lpfc_vpd_t vpd; /* vital product data */
+
+- struct Scsi_Host *host;
+ struct pci_dev *pcidev;
+ struct list_head work_list;
+ uint32_t work_ha; /* Host Attention Bits for WT */
+ uint32_t work_ha_mask; /* HA Bits owned by WT */
+ uint32_t work_hs; /* HS stored in case of ERRAT */
+ uint32_t work_status[2]; /* Extra status from SLIM */
+- uint32_t work_hba_events; /* Timeout to be handled */
+-#define WORKER_DISC_TMO 0x1 /* Discovery timeout */
+-#define WORKER_ELS_TMO 0x2 /* ELS timeout */
+-#define WORKER_MBOX_TMO 0x4 /* MBOX timeout */
+-#define WORKER_FDMI_TMO 0x8 /* FDMI timeout */
+
+ wait_queue_head_t *work_wait;
+ struct task_struct *worker_thread;
+
++ struct list_head hbq_buffer_list;
++ uint32_t hbq_count; /* Count of configured HBQs */
++ struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */
++
+ unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */
+ unsigned long pci_bar2_map; /* Physical address for PCI BAR2 */
+ void __iomem *slim_memmap_p; /* Kernel memory mapped address for
+@@ -334,6 +475,10 @@
+ reg */
+ void __iomem *HCregaddr; /* virtual address for host ctl reg */
+
++ struct lpfc_hgp __iomem *host_gp; /* Host side get/put pointers */
++ uint32_t __iomem *hbq_put; /* Address in SLIM to HBQ put ptrs */
++ uint32_t *hbq_get; /* Host mem address of HBQ get ptrs */
++
+ int brd_no; /* FC board number */
+
+ char SerialNumber[32]; /* adapter Serial Number */
+@@ -353,7 +498,6 @@
+ uint8_t soft_wwn_enable;
+
+ struct timer_list fcp_poll_timer;
+- struct timer_list els_tmofunc;
+
+ /*
+ * stat counters
+@@ -370,31 +514,69 @@
+ uint32_t total_scsi_bufs;
+ struct list_head lpfc_iocb_list;
+ uint32_t total_iocbq_bufs;
++ spinlock_t hbalock;
+
+ /* pci_mem_pools */
+ struct pci_pool *lpfc_scsi_dma_buf_pool;
+ struct pci_pool *lpfc_mbuf_pool;
++ struct pci_pool *lpfc_hbq_pool;
+ struct lpfc_dma_pool lpfc_mbuf_safety_pool;
+
+ mempool_t *mbox_mem_pool;
+ mempool_t *nlp_mem_pool;
+
+ struct fc_host_statistics link_stats;
++
++ struct list_head port_list;
++ struct lpfc_vport *pport; /* physical lpfc_vport pointer */
++ uint16_t max_vpi; /* Maximum virtual nports */
++#define LPFC_MAX_VPI 100 /* Max number of VPorts supported */
++ unsigned long *vpi_bmask; /* vpi allocation table */
++
++ /* Data structure used by fabric iocb scheduler */
++ struct list_head fabric_iocb_list;
++ atomic_t fabric_iocb_count;
++ struct timer_list fabric_block_timer;
++ unsigned long bit_flags;
++#define FABRIC_COMANDS_BLOCKED 0
++ atomic_t num_rsrc_err;
++ atomic_t num_cmd_success;
++ unsigned long last_rsrc_error_time;
++ unsigned long last_ramp_down_time;
++ unsigned long last_ramp_up_time;
++#ifdef CONFIG_LPFC_DEBUG_FS
++ struct dentry *hba_debugfs_root;
++ atomic_t debugfs_vport_count;
++#endif
++
++ /* Fields used for heart beat. */
++ unsigned long last_completion_time;
++ struct timer_list hb_tmofunc;
++ uint8_t hb_outstanding;
+ };
+
++static inline struct Scsi_Host *
++lpfc_shost_from_vport(struct lpfc_vport *vport)
++{
++ return container_of((void *) vport, struct Scsi_Host, hostdata[0]);
++}
++
+ static inline void
+-lpfc_set_loopback_flag(struct lpfc_hba *phba) {
++lpfc_set_loopback_flag(struct lpfc_hba *phba)
++{
+ if (phba->cfg_topology == FLAGS_LOCAL_LB)
+- phba->fc_flag |= FC_LOOPBACK_MODE;
++ phba->link_flag |= LS_LOOPBACK_MODE;
+ else
+- phba->fc_flag &= ~FC_LOOPBACK_MODE;
++ phba->link_flag &= ~LS_LOOPBACK_MODE;
+ }
+
+-struct rnidrsp {
+- void *buf;
+- uint32_t uniqueid;
+- struct list_head list;
+- uint32_t data;
+-};
++static inline int
++lpfc_is_link_up(struct lpfc_hba *phba)
++{
++ return phba->link_state == LPFC_LINK_UP ||
++ phba->link_state == LPFC_CLEAR_LA ||
++ phba->link_state == LPFC_HBA_READY;
++}
+
+ #define FC_REG_DUMP_EVENT 0x10 /* Register for Dump events */
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include "lpfc_version.h"
+ #include "lpfc_compat.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+
+ #define LPFC_DEF_DEVLOSS_TMO 30
+ #define LPFC_MIN_DEVLOSS_TMO 1
+@@ -76,116 +77,156 @@
+ lpfc_info_show(struct class_device *cdev, char *buf)
+ {
+ struct Scsi_Host *host = class_to_shost(cdev);
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host));
+ }
+
+ static ssize_t
+ lpfc_serialnum_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber);
+ }
+
+ static ssize_t
+ lpfc_modeldesc_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc);
+ }
+
+ static ssize_t
+ lpfc_modelname_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName);
+ }
+
+ static ssize_t
+ lpfc_programtype_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType);
+ }
+
+ static ssize_t
+-lpfc_portnum_show(struct class_device *cdev, char *buf)
++lpfc_vportnum_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port);
+ }
+
+ static ssize_t
+ lpfc_fwrev_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ char fwrev[32];
++
+ lpfc_decode_firmware_rev(phba, fwrev, 1);
+- return snprintf(buf, PAGE_SIZE, "%s\n",fwrev);
++ return snprintf(buf, PAGE_SIZE, "%s, sli-%d\n", fwrev, phba->sli_rev);
+ }
+
+ static ssize_t
+ lpfc_hdw_show(struct class_device *cdev, char *buf)
+ {
+ char hdw[9];
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ lpfc_vpd_t *vp = &phba->vpd;
++
+ lpfc_jedec_to_ascii(vp->rev.biuRev, hdw);
+ return snprintf(buf, PAGE_SIZE, "%s\n", hdw);
+ }
+ static ssize_t
+ lpfc_option_rom_version_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion);
+ }
+ static ssize_t
+ lpfc_state_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ int len = 0;
+- switch (phba->hba_state) {
+- case LPFC_STATE_UNKNOWN:
++
++ switch (phba->link_state) {
++ case LPFC_LINK_UNKNOWN:
+ case LPFC_WARM_START:
+ case LPFC_INIT_START:
+ case LPFC_INIT_MBX_CMDS:
+ case LPFC_LINK_DOWN:
++ case LPFC_HBA_ERROR:
+ len += snprintf(buf + len, PAGE_SIZE-len, "Link Down\n");
+ break;
+ case LPFC_LINK_UP:
++ case LPFC_CLEAR_LA:
++ case LPFC_HBA_READY:
++ len += snprintf(buf + len, PAGE_SIZE-len, "Link Up - \n");
++
++ switch (vport->port_state) {
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "initializing\n");
++ break;
+ case LPFC_LOCAL_CFG_LINK:
+- len += snprintf(buf + len, PAGE_SIZE-len, "Link Up\n");
++ len += snprintf(buf + len, PAGE_SIZE-len,
++ "Configuring Link\n");
+ break;
++ case LPFC_FDISC:
+ case LPFC_FLOGI:
+ case LPFC_FABRIC_CFG_LINK:
+ case LPFC_NS_REG:
+ case LPFC_NS_QRY:
+ case LPFC_BUILD_DISC_LIST:
+ case LPFC_DISC_AUTH:
+- case LPFC_CLEAR_LA:
+- len += snprintf(buf + len, PAGE_SIZE-len,
+- "Link Up - Discovery\n");
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "Discovery\n");
+ break;
+- case LPFC_HBA_READY:
+- len += snprintf(buf + len, PAGE_SIZE-len,
+- "Link Up - Ready:\n");
++ case LPFC_VPORT_READY:
++ len += snprintf(buf + len, PAGE_SIZE - len, "Ready\n");
++ break;
++
++ case LPFC_VPORT_FAILED:
++ len += snprintf(buf + len, PAGE_SIZE - len, "Failed\n");
++ break;
++
++ case LPFC_VPORT_UNKNOWN:
++ len += snprintf(buf + len, PAGE_SIZE - len,
++ "Unknown\n");
++ break;
++ }
++
+ if (phba->fc_topology == TOPOLOGY_LOOP) {
+- if (phba->fc_flag & FC_PUBLIC_LOOP)
++ if (vport->fc_flag & FC_PUBLIC_LOOP)
+ len += snprintf(buf + len, PAGE_SIZE-len,
+ " Public Loop\n");
+ else
+ len += snprintf(buf + len, PAGE_SIZE-len,
+ " Private Loop\n");
+ } else {
+- if (phba->fc_flag & FC_FABRIC)
++ if (vport->fc_flag & FC_FABRIC)
+ len += snprintf(buf + len, PAGE_SIZE-len,
+ " Fabric\n");
+ else
+@@ -193,29 +234,32 @@
+ " Point-2-Point\n");
+ }
+ }
++
+ return len;
+ }
+
+ static ssize_t
+ lpfc_num_discovered_ports_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+- return snprintf(buf, PAGE_SIZE, "%d\n", phba->fc_map_cnt +
+- phba->fc_unmap_cnt);
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++
++ return snprintf(buf, PAGE_SIZE, "%d\n",
++ vport->fc_map_cnt + vport->fc_unmap_cnt);
+ }
+
+
+ static int
+-lpfc_issue_lip(struct Scsi_Host *host)
++lpfc_issue_lip(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ LPFC_MBOXQ_t *pmboxq;
+ int mbxstatus = MBXERR_ERROR;
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
+- (phba->fc_flag & FC_BLOCK_MGMT_IO) ||
+- (phba->hba_state != LPFC_HBA_READY))
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
++ (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) ||
++ (vport->port_state != LPFC_VPORT_READY))
+ return -EPERM;
+
+ pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
+@@ -238,9 +282,7 @@
+ }
+
+ lpfc_set_loopback_flag(phba);
+- if (mbxstatus == MBX_TIMEOUT)
+- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+- else
++ if (mbxstatus != MBX_TIMEOUT)
+ mempool_free(pmboxq, phba->mbox_mem_pool);
+
+ if (mbxstatus == MBXERR_ERROR)
+@@ -320,8 +362,10 @@
+ static ssize_t
+ lpfc_issue_reset(struct class_device *cdev, const char *buf, size_t count)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ int status = -EINVAL;
+
+ if (strncmp(buf, "selective", sizeof("selective") - 1) == 0)
+@@ -336,23 +380,26 @@
+ static ssize_t
+ lpfc_nport_evt_cnt_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt);
+ }
+
+ static ssize_t
+ lpfc_board_mode_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ char * state;
+
+- if (phba->hba_state == LPFC_HBA_ERROR)
++ if (phba->link_state == LPFC_HBA_ERROR)
+ state = "error";
+- else if (phba->hba_state == LPFC_WARM_START)
++ else if (phba->link_state == LPFC_WARM_START)
+ state = "warm start";
+- else if (phba->hba_state == LPFC_INIT_START)
++ else if (phba->link_state == LPFC_INIT_START)
+ state = "offline";
+ else
+ state = "online";
+@@ -363,8 +410,9 @@
+ static ssize_t
+ lpfc_board_mode_store(struct class_device *cdev, const char *buf, size_t count)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct completion online_compl;
+ int status=0;
+
+@@ -389,11 +437,166 @@
+ return -EIO;
+ }
+
++int
++lpfc_get_hba_info(struct lpfc_hba *phba,
++ uint32_t *mxri, uint32_t *axri,
++ uint32_t *mrpi, uint32_t *arpi,
++ uint32_t *mvpi, uint32_t *avpi)
++{
++ struct lpfc_sli *psli = &phba->sli;
++ LPFC_MBOXQ_t *pmboxq;
++ MAILBOX_t *pmb;
++ int rc = 0;
++
++ /*
++ * prevent udev from issuing mailbox commands until the port is
++ * configured.
++ */
++ if (phba->link_state < LPFC_LINK_DOWN ||
++ !phba->mbox_mem_pool ||
++ (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
++ return 0;
++
++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
++ return 0;
++
++ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!pmboxq)
++ return 0;
++ memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
++
++ pmb = &pmboxq->mb;
++ pmb->mbxCommand = MBX_READ_CONFIG;
++ pmb->mbxOwner = OWN_HOST;
++ pmboxq->context1 = NULL;
++
++ if ((phba->pport->fc_flag & FC_OFFLINE_MODE) ||
++ (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
++ rc = MBX_NOT_FINISHED;
++ else
++ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
++
++ if (rc != MBX_SUCCESS) {
++ if (rc != MBX_TIMEOUT)
++ mempool_free(pmboxq, phba->mbox_mem_pool);
++ return 0;
++ }
++
++ if (mrpi)
++ *mrpi = pmb->un.varRdConfig.max_rpi;
++ if (arpi)
++ *arpi = pmb->un.varRdConfig.avail_rpi;
++ if (mxri)
++ *mxri = pmb->un.varRdConfig.max_xri;
++ if (axri)
++ *axri = pmb->un.varRdConfig.avail_xri;
++ if (mvpi)
++ *mvpi = pmb->un.varRdConfig.max_vpi;
++ if (avpi)
++ *avpi = pmb->un.varRdConfig.avail_vpi;
++
++ mempool_free(pmboxq, phba->mbox_mem_pool);
++ return 1;
++}
++
++static ssize_t
++lpfc_max_rpi_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt;
++
++ if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, NULL, NULL, NULL))
++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_rpi_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt, acnt;
++
++ if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, &acnt, NULL, NULL))
++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_max_xri_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt;
++
++ if (lpfc_get_hba_info(phba, &cnt, NULL, NULL, NULL, NULL, NULL))
++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_xri_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt, acnt;
++
++ if (lpfc_get_hba_info(phba, &cnt, &acnt, NULL, NULL, NULL, NULL))
++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_max_vpi_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt;
++
++ if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, NULL))
++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_vpi_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ uint32_t cnt, acnt;
++
++ if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, &acnt))
++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++ return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_npiv_info_show(struct class_device *cdev, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
++ if (!(phba->max_vpi))
++ return snprintf(buf, PAGE_SIZE, "NPIV Not Supported\n");
++ if (vport->port_type == LPFC_PHYSICAL_PORT)
++ return snprintf(buf, PAGE_SIZE, "NPIV Physical\n");
++ return snprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi);
++}
++
+ static ssize_t
+ lpfc_poll_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll);
+ }
+@@ -402,8 +605,9 @@
+ lpfc_poll_store(struct class_device *cdev, const char *buf,
+ size_t count)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t creg_val;
+ uint32_t old_val;
+ int val=0;
+@@ -417,7 +621,7 @@
+ if ((val & 0x3) != val)
+ return -EINVAL;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ old_val = phba->cfg_poll;
+
+@@ -432,16 +636,16 @@
+ lpfc_poll_start_timer(phba);
+ }
+ } else if (val != 0x0) {
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EINVAL;
+ }
+
+ if (!(val & DISABLE_FCP_RING_INT) &&
+ (old_val & DISABLE_FCP_RING_INT))
+ {
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ del_timer(&phba->fcp_poll_timer);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ creg_val = readl(phba->HCregaddr);
+ creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
+ writel(creg_val, phba->HCregaddr);
+@@ -450,7 +654,7 @@
+
+ phba->cfg_poll = val;
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return strlen(buf);
+ }
+@@ -459,8 +663,9 @@
+ static ssize_t \
+ lpfc_##attr##_show(struct class_device *cdev, char *buf) \
+ { \
+- struct Scsi_Host *host = class_to_shost(cdev);\
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++ struct Scsi_Host *shost = class_to_shost(cdev);\
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++ struct lpfc_hba *phba = vport->phba;\
+ int val = 0;\
+ val = phba->cfg_##attr;\
+ return snprintf(buf, PAGE_SIZE, "%d\n",\
+@@ -471,8 +676,9 @@
+ static ssize_t \
+ lpfc_##attr##_show(struct class_device *cdev, char *buf) \
+ { \
+- struct Scsi_Host *host = class_to_shost(cdev);\
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++ struct Scsi_Host *shost = class_to_shost(cdev);\
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++ struct lpfc_hba *phba = vport->phba;\
+ int val = 0;\
+ val = phba->cfg_##attr;\
+ return snprintf(buf, PAGE_SIZE, "%#x\n",\
+@@ -514,8 +720,9 @@
+ static ssize_t \
+ lpfc_##attr##_store(struct class_device *cdev, const char *buf, size_t count) \
+ { \
+- struct Scsi_Host *host = class_to_shost(cdev);\
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++ struct Scsi_Host *shost = class_to_shost(cdev);\
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++ struct lpfc_hba *phba = vport->phba;\
+ int val=0;\
+ if (!isdigit(buf[0]))\
+ return -EINVAL;\
+@@ -576,7 +783,7 @@
+ static CLASS_DEVICE_ATTR(modeldesc, S_IRUGO, lpfc_modeldesc_show, NULL);
+ static CLASS_DEVICE_ATTR(modelname, S_IRUGO, lpfc_modelname_show, NULL);
+ static CLASS_DEVICE_ATTR(programtype, S_IRUGO, lpfc_programtype_show, NULL);
+-static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_portnum_show, NULL);
++static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_vportnum_show, NULL);
+ static CLASS_DEVICE_ATTR(fwrev, S_IRUGO, lpfc_fwrev_show, NULL);
+ static CLASS_DEVICE_ATTR(hdw, S_IRUGO, lpfc_hdw_show, NULL);
+ static CLASS_DEVICE_ATTR(state, S_IRUGO, lpfc_state_show, NULL);
+@@ -592,6 +799,13 @@
+ static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR,
+ lpfc_board_mode_show, lpfc_board_mode_store);
+ static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset);
++static CLASS_DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL);
++static CLASS_DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL);
++static CLASS_DEVICE_ATTR(max_rpi, S_IRUGO, lpfc_max_rpi_show, NULL);
++static CLASS_DEVICE_ATTR(used_rpi, S_IRUGO, lpfc_used_rpi_show, NULL);
++static CLASS_DEVICE_ATTR(max_xri, S_IRUGO, lpfc_max_xri_show, NULL);
++static CLASS_DEVICE_ATTR(used_xri, S_IRUGO, lpfc_used_xri_show, NULL);
++static CLASS_DEVICE_ATTR(npiv_info, S_IRUGO, lpfc_npiv_info_show, NULL);
+
+
+ static char *lpfc_soft_wwn_key = "C99G71SL8032A";
+@@ -600,8 +814,9 @@
+ lpfc_soft_wwn_enable_store(struct class_device *cdev, const char *buf,
+ size_t count)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ unsigned int cnt = count;
+
+ /*
+@@ -634,8 +849,10 @@
+ static ssize_t
+ lpfc_soft_wwpn_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)phba->cfg_soft_wwpn);
+ }
+@@ -644,8 +861,9 @@
+ static ssize_t
+ lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct completion online_compl;
+ int stat1=0, stat2=0;
+ unsigned int i, j, cnt=count;
+@@ -680,9 +898,9 @@
+ }
+ }
+ phba->cfg_soft_wwpn = wwn_to_u64(wwpn);
+- fc_host_port_name(host) = phba->cfg_soft_wwpn;
++ fc_host_port_name(shost) = phba->cfg_soft_wwpn;
+ if (phba->cfg_soft_wwnn)
+- fc_host_node_name(host) = phba->cfg_soft_wwnn;
++ fc_host_node_name(shost) = phba->cfg_soft_wwnn;
+
+ dev_printk(KERN_NOTICE, &phba->pcidev->dev,
+ "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no);
+@@ -777,6 +995,15 @@
+ static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
+ lpfc_poll_show, lpfc_poll_store);
+
++int lpfc_sli_mode = 0;
++module_param(lpfc_sli_mode, int, 0);
++MODULE_PARM_DESC(lpfc_sli_mode, "SLI mode selector:"
++ " 0 - auto (SLI-3 if supported),"
++ " 2 - select SLI-2 even on SLI-3 capable HBAs,"
++ " 3 - select SLI-3");
++
++LPFC_ATTR_R(npiv_enable, 0, 0, 1, "Enable NPIV functionality");
++
+ /*
+ # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
+ # until the timer expires. Value range is [0,255]. Default value is 30.
+@@ -790,8 +1017,9 @@
+ static ssize_t
+ lpfc_nodev_tmo_show(struct class_device *cdev, char *buf)
+ {
+- struct Scsi_Host *host = class_to_shost(cdev);
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ int val = 0;
+ val = phba->cfg_devloss_tmo;
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+@@ -832,13 +1060,19 @@
+ static void
+ lpfc_update_rport_devloss_tmo(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport;
++ struct Scsi_Host *shost;
+ struct lpfc_nodelist *ndlp;
+
+- spin_lock_irq(phba->host->host_lock);
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp)
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ shost = lpfc_shost_from_vport(vport);
++ spin_lock_irq(shost->host_lock);
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp)
+ if (ndlp->rport)
+- ndlp->rport->dev_loss_tmo = phba->cfg_devloss_tmo;
+- spin_unlock_irq(phba->host->host_lock);
++ ndlp->rport->dev_loss_tmo =
++ phba->cfg_devloss_tmo;
++ spin_unlock_irq(shost->host_lock);
++ }
+ }
+
+ static int
+@@ -946,6 +1180,33 @@
+ "Max number of FCP commands we can queue to a lpfc HBA");
+
+ /*
++# peer_port_login: This parameter allows/prevents logins
++# between peer ports hosted on the same physical port.
++# When this parameter is set 0 peer ports of same physical port
++# are not allowed to login to each other.
++# When this parameter is set 1 peer ports of same physical port
++# are allowed to login to each other.
++# Default value of this parameter is 0.
++*/
++LPFC_ATTR_R(peer_port_login, 0, 0, 1,
++ "Allow peer ports on the same physical port to login to each "
++ "other.");
++
++/*
++# vport_restrict_login: This parameter allows/prevents logins
++# between Virtual Ports and remote initiators.
++# When this parameter is not set (0) Virtual Ports will accept PLOGIs from
++# other initiators and will attempt to PLOGI all remote ports.
++# When this parameter is set (1) Virtual Ports will reject PLOGIs from
++# remote ports and will not attempt to PLOGI to other initiators.
++# This parameter does not restrict to the physical port.
++# This parameter does not restrict logins to Fabric resident remote ports.
++# Default value of this parameter is 1.
++*/
++LPFC_ATTR_RW(vport_restrict_login, 1, 0, 1,
++ "Restrict virtual ports login to remote initiators.");
++
++/*
+ # Some disk devices have a "select ID" or "select Target" capability.
+ # From a protocol standpoint "select ID" usually means select the
+ # Fibre channel "ALPA". In the FC-AL Profile there is an "informative
+@@ -1088,7 +1349,8 @@
+ LPFC_ATTR_R(use_msi, 0, 0, 1, "Use Message Signaled Interrupts, if possible");
+
+
+-struct class_device_attribute *lpfc_host_attrs[] = {
++
++struct class_device_attribute *lpfc_hba_attrs[] = {
+ &class_device_attr_info,
+ &class_device_attr_serialnum,
+ &class_device_attr_modeldesc,
+@@ -1104,6 +1366,8 @@
+ &class_device_attr_lpfc_log_verbose,
+ &class_device_attr_lpfc_lun_queue_depth,
+ &class_device_attr_lpfc_hba_queue_depth,
++ &class_device_attr_lpfc_peer_port_login,
++ &class_device_attr_lpfc_vport_restrict_login,
+ &class_device_attr_lpfc_nodev_tmo,
+ &class_device_attr_lpfc_devloss_tmo,
+ &class_device_attr_lpfc_fcp_class,
+@@ -1119,9 +1383,17 @@
+ &class_device_attr_lpfc_multi_ring_type,
+ &class_device_attr_lpfc_fdmi_on,
+ &class_device_attr_lpfc_max_luns,
++ &class_device_attr_lpfc_npiv_enable,
+ &class_device_attr_nport_evt_cnt,
+ &class_device_attr_management_version,
+ &class_device_attr_board_mode,
++ &class_device_attr_max_vpi,
++ &class_device_attr_used_vpi,
++ &class_device_attr_max_rpi,
++ &class_device_attr_used_rpi,
++ &class_device_attr_max_xri,
++ &class_device_attr_used_xri,
++ &class_device_attr_npiv_info,
+ &class_device_attr_issue_reset,
+ &class_device_attr_lpfc_poll,
+ &class_device_attr_lpfc_poll_tmo,
+@@ -1136,9 +1408,11 @@
+ sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+ size_t buf_off;
+- struct Scsi_Host *host = class_to_shost(container_of(kobj,
+- struct class_device, kobj));
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct class_device *cdev = container_of(kobj, struct class_device,
++ kobj);
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ if ((off + count) > FF_REG_AREA_SIZE)
+ return -ERANGE;
+@@ -1148,18 +1422,16 @@
+ if (off % 4 || count % 4 || (unsigned long)buf % 4)
+ return -EINVAL;
+
+- spin_lock_irq(phba->host->host_lock);
+-
+- if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
+- spin_unlock_irq(phba->host->host_lock);
++ if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
+ return -EPERM;
+ }
+
++ spin_lock_irq(&phba->hbalock);
+ for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t))
+ writel(*((uint32_t *)(buf + buf_off)),
+ phba->ctrl_regs_memmap_p + off + buf_off);
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return count;
+ }
+@@ -1169,9 +1441,11 @@
+ {
+ size_t buf_off;
+ uint32_t * tmp_ptr;
+- struct Scsi_Host *host = class_to_shost(container_of(kobj,
+- struct class_device, kobj));
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct class_device *cdev = container_of(kobj, struct class_device,
++ kobj);
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ if (off > FF_REG_AREA_SIZE)
+ return -ERANGE;
+@@ -1184,14 +1458,14 @@
+ if (off % 4 || count % 4 || (unsigned long)buf % 4)
+ return -EINVAL;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) {
+ tmp_ptr = (uint32_t *)(buf + buf_off);
+ *tmp_ptr = readl(phba->ctrl_regs_memmap_p + off + buf_off);
+ }
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return count;
+ }
+@@ -1200,7 +1474,6 @@
+ .attr = {
+ .name = "ctlreg",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 256,
+ .read = sysfs_ctlreg_read,
+@@ -1209,7 +1482,7 @@
+
+
+ static void
+-sysfs_mbox_idle (struct lpfc_hba * phba)
++sysfs_mbox_idle(struct lpfc_hba *phba)
+ {
+ phba->sysfs_mbox.state = SMBOX_IDLE;
+ phba->sysfs_mbox.offset = 0;
+@@ -1224,10 +1497,12 @@
+ static ssize_t
+ sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+- struct Scsi_Host * host =
+- class_to_shost(container_of(kobj, struct class_device, kobj));
+- struct lpfc_hba * phba = (struct lpfc_hba*)host->hostdata;
+- struct lpfcMboxq * mbox = NULL;
++ struct class_device *cdev = container_of(kobj, struct class_device,
++ kobj);
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfcMboxq *mbox = NULL;
+
+ if ((count + off) > MAILBOX_CMD_SIZE)
+ return -ERANGE;
+@@ -1245,7 +1520,7 @@
+ memset(mbox, 0, sizeof (LPFC_MBOXQ_t));
+ }
+
+- spin_lock_irq(host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ if (off == 0) {
+ if (phba->sysfs_mbox.mbox)
+@@ -1256,9 +1531,9 @@
+ } else {
+ if (phba->sysfs_mbox.state != SMBOX_WRITING ||
+ phba->sysfs_mbox.offset != off ||
+- phba->sysfs_mbox.mbox == NULL ) {
++ phba->sysfs_mbox.mbox == NULL) {
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EAGAIN;
+ }
+ }
+@@ -1268,7 +1543,7 @@
+
+ phba->sysfs_mbox.offset = off + count;
+
+- spin_unlock_irq(host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return count;
+ }
+@@ -1276,10 +1551,11 @@
+ static ssize_t
+ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+- struct Scsi_Host *host =
+- class_to_shost(container_of(kobj, struct class_device,
+- kobj));
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct class_device *cdev = container_of(kobj, struct class_device,
++ kobj);
++ struct Scsi_Host *shost = class_to_shost(cdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ int rc;
+
+ if (off > MAILBOX_CMD_SIZE)
+@@ -1294,7 +1570,7 @@
+ if (off && count == 0)
+ return 0;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ if (off == 0 &&
+ phba->sysfs_mbox.state == SMBOX_WRITING &&
+@@ -1317,12 +1593,12 @@
+ case MBX_SET_MASK:
+ case MBX_SET_SLIM:
+ case MBX_SET_DEBUG:
+- if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
++ if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
+ printk(KERN_WARNING "mbox_read:Command 0x%x "
+ "is illegal in on-line state\n",
+ phba->sysfs_mbox.mbox->mb.mbxCommand);
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EPERM;
+ }
+ case MBX_LOAD_SM:
+@@ -1352,48 +1628,48 @@
+ printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n",
+ phba->sysfs_mbox.mbox->mb.mbxCommand);
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EPERM;
+ default:
+ printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n",
+ phba->sysfs_mbox.mbox->mb.mbxCommand);
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EPERM;
+ }
+
+- if (phba->fc_flag & FC_BLOCK_MGMT_IO) {
++ phba->sysfs_mbox.mbox->vport = vport;
++
++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EAGAIN;
+ }
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ rc = lpfc_sli_issue_mbox (phba,
+ phba->sysfs_mbox.mbox,
+ MBX_POLL);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ } else {
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ rc = lpfc_sli_issue_mbox_wait (phba,
+ phba->sysfs_mbox.mbox,
+ lpfc_mbox_tmo_val(phba,
+ phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ }
+
+ if (rc != MBX_SUCCESS) {
+ if (rc == MBX_TIMEOUT) {
+- phba->sysfs_mbox.mbox->mbox_cmpl =
+- lpfc_sli_def_mbox_cmpl;
+ phba->sysfs_mbox.mbox = NULL;
+ }
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return (rc == MBX_TIMEOUT) ? -ETIME : -ENODEV;
+ }
+ phba->sysfs_mbox.state = SMBOX_READING;
+@@ -1402,7 +1678,7 @@
+ phba->sysfs_mbox.state != SMBOX_READING) {
+ printk(KERN_WARNING "mbox_read: Bad State\n");
+ sysfs_mbox_idle(phba);
+- spin_unlock_irq(host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return -EAGAIN;
+ }
+
+@@ -1413,7 +1689,7 @@
+ if (phba->sysfs_mbox.offset == MAILBOX_CMD_SIZE)
+ sysfs_mbox_idle(phba);
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return count;
+ }
+@@ -1422,7 +1698,6 @@
+ .attr = {
+ .name = "mbox",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = MAILBOX_CMD_SIZE,
+ .read = sysfs_mbox_read,
+@@ -1430,35 +1705,35 @@
+ };
+
+ int
+-lpfc_alloc_sysfs_attr(struct lpfc_hba *phba)
++lpfc_alloc_sysfs_attr(struct lpfc_vport *vport)
+ {
+- struct Scsi_Host *host = phba->host;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ int error;
+
+- error = sysfs_create_bin_file(&host->shost_classdev.kobj,
++ error = sysfs_create_bin_file(&shost->shost_classdev.kobj,
+ &sysfs_ctlreg_attr);
+ if (error)
+ goto out;
+
+- error = sysfs_create_bin_file(&host->shost_classdev.kobj,
++ error = sysfs_create_bin_file(&shost->shost_classdev.kobj,
+ &sysfs_mbox_attr);
+ if (error)
+ goto out_remove_ctlreg_attr;
+
+ return 0;
+ out_remove_ctlreg_attr:
+- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr);
+ out:
+ return error;
+ }
+
+ void
+-lpfc_free_sysfs_attr(struct lpfc_hba *phba)
++lpfc_free_sysfs_attr(struct lpfc_vport *vport)
+ {
+- struct Scsi_Host *host = phba->host;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+
+- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr);
+- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_mbox_attr);
++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr);
+ }
+
+
+@@ -1469,26 +1744,30 @@
+ static void
+ lpfc_get_host_port_id(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++
+ /* note: fc_myDID already in cpu endianness */
+- fc_host_port_id(shost) = phba->fc_myDID;
++ fc_host_port_id(shost) = vport->fc_myDID;
+ }
+
+ static void
+ lpfc_get_host_port_type(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ spin_lock_irq(shost->host_lock);
+
+- if (phba->hba_state == LPFC_HBA_READY) {
++ if (vport->port_type == LPFC_NPIV_PORT) {
++ fc_host_port_type(shost) = FC_PORTTYPE_NPIV;
++ } else if (lpfc_is_link_up(phba)) {
+ if (phba->fc_topology == TOPOLOGY_LOOP) {
+- if (phba->fc_flag & FC_PUBLIC_LOOP)
++ if (vport->fc_flag & FC_PUBLIC_LOOP)
+ fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
+ else
+ fc_host_port_type(shost) = FC_PORTTYPE_LPORT;
+ } else {
+- if (phba->fc_flag & FC_FABRIC)
++ if (vport->fc_flag & FC_FABRIC)
+ fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+ else
+ fc_host_port_type(shost) = FC_PORTTYPE_PTP;
+@@ -1502,29 +1781,20 @@
+ static void
+ lpfc_get_host_port_state(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ spin_lock_irq(shost->host_lock);
+
+- if (phba->fc_flag & FC_OFFLINE_MODE)
++ if (vport->fc_flag & FC_OFFLINE_MODE)
+ fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
+ else {
+- switch (phba->hba_state) {
+- case LPFC_STATE_UNKNOWN:
+- case LPFC_WARM_START:
+- case LPFC_INIT_START:
+- case LPFC_INIT_MBX_CMDS:
++ switch (phba->link_state) {
++ case LPFC_LINK_UNKNOWN:
+ case LPFC_LINK_DOWN:
+ fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+ break;
+ case LPFC_LINK_UP:
+- case LPFC_LOCAL_CFG_LINK:
+- case LPFC_FLOGI:
+- case LPFC_FABRIC_CFG_LINK:
+- case LPFC_NS_REG:
+- case LPFC_NS_QRY:
+- case LPFC_BUILD_DISC_LIST:
+- case LPFC_DISC_AUTH:
+ case LPFC_CLEAR_LA:
+ case LPFC_HBA_READY:
+ /* Links up, beyond this port_type reports state */
+@@ -1545,11 +1815,12 @@
+ static void
+ lpfc_get_host_speed(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+
+ spin_lock_irq(shost->host_lock);
+
+- if (phba->hba_state == LPFC_HBA_READY) {
++ if (lpfc_is_link_up(phba)) {
+ switch(phba->fc_linkspeed) {
+ case LA_1GHZ_LINK:
+ fc_host_speed(shost) = FC_PORTSPEED_1GBIT;
+@@ -1575,38 +1846,30 @@
+ static void
+ lpfc_get_host_fabric_name (struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ u64 node_name;
+
+ spin_lock_irq(shost->host_lock);
+
+- if ((phba->fc_flag & FC_FABRIC) ||
++ if ((vport->fc_flag & FC_FABRIC) ||
+ ((phba->fc_topology == TOPOLOGY_LOOP) &&
+- (phba->fc_flag & FC_PUBLIC_LOOP)))
++ (vport->fc_flag & FC_PUBLIC_LOOP)))
+ node_name = wwn_to_u64(phba->fc_fabparam.nodeName.u.wwn);
+ else
+ /* fabric is local port if there is no F/FL_Port */
+- node_name = wwn_to_u64(phba->fc_nodename.u.wwn);
++ node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+
+ spin_unlock_irq(shost->host_lock);
+
+ fc_host_fabric_name(shost) = node_name;
+ }
+
+-static void
+-lpfc_get_host_symbolic_name (struct Scsi_Host *shost)
+-{
+- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
+-
+- spin_lock_irq(shost->host_lock);
+- lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
+- spin_unlock_irq(shost->host_lock);
+-}
+-
+ static struct fc_host_statistics *
+ lpfc_get_stats(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli *psli = &phba->sli;
+ struct fc_host_statistics *hs = &phba->link_stats;
+ struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
+@@ -1615,7 +1878,16 @@
+ unsigned long seconds;
+ int rc = 0;
+
+- if (phba->fc_flag & FC_BLOCK_MGMT_IO)
++ /*
++ * prevent udev from issuing mailbox commands until the port is
++ * configured.
++ */
++ if (phba->link_state < LPFC_LINK_DOWN ||
++ !phba->mbox_mem_pool ||
++ (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
++ return NULL;
++
++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
+ return NULL;
+
+ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+@@ -1627,17 +1899,16 @@
+ pmb->mbxCommand = MBX_READ_STATUS;
+ pmb->mbxOwner = OWN_HOST;
+ pmboxq->context1 = NULL;
++ pmboxq->vport = vport;
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ else
+ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+ if (rc != MBX_SUCCESS) {
+- if (rc == MBX_TIMEOUT)
+- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+- else
++ if (rc != MBX_TIMEOUT)
+ mempool_free(pmboxq, phba->mbox_mem_pool);
+ return NULL;
+ }
+@@ -1653,18 +1924,17 @@
+ pmb->mbxCommand = MBX_READ_LNK_STAT;
+ pmb->mbxOwner = OWN_HOST;
+ pmboxq->context1 = NULL;
++ pmboxq->vport = vport;
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ else
+ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+ if (rc != MBX_SUCCESS) {
+- if (rc == MBX_TIMEOUT)
+- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+- else
+- mempool_free( pmboxq, phba->mbox_mem_pool);
++ if (rc != MBX_TIMEOUT)
++ mempool_free(pmboxq, phba->mbox_mem_pool);
+ return NULL;
+ }
+
+@@ -1711,14 +1981,15 @@
+ static void
+ lpfc_reset_stats(struct Scsi_Host *shost)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli *psli = &phba->sli;
+- struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
++ struct lpfc_lnk_stat *lso = &psli->lnk_stat_offsets;
+ LPFC_MBOXQ_t *pmboxq;
+ MAILBOX_t *pmb;
+ int rc = 0;
+
+- if (phba->fc_flag & FC_BLOCK_MGMT_IO)
++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
+ return;
+
+ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+@@ -1731,17 +2002,16 @@
+ pmb->mbxOwner = OWN_HOST;
+ pmb->un.varWords[0] = 0x1; /* reset request */
+ pmboxq->context1 = NULL;
++ pmboxq->vport = vport;
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ else
+ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+ if (rc != MBX_SUCCESS) {
+- if (rc == MBX_TIMEOUT)
+- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+- else
++ if (rc != MBX_TIMEOUT)
+ mempool_free(pmboxq, phba->mbox_mem_pool);
+ return;
+ }
+@@ -1750,17 +2020,16 @@
+ pmb->mbxCommand = MBX_READ_LNK_STAT;
+ pmb->mbxOwner = OWN_HOST;
+ pmboxq->context1 = NULL;
++ pmboxq->vport = vport;
+
+- if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++ if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ else
+ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+
+ if (rc != MBX_SUCCESS) {
+- if (rc == MBX_TIMEOUT)
+- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+- else
++ if (rc != MBX_TIMEOUT)
+ mempool_free( pmboxq, phba->mbox_mem_pool);
+ return;
+ }
+@@ -1790,12 +2059,12 @@
+ lpfc_get_node_by_target(struct scsi_target *starget)
+ {
+ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
+- struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+ struct lpfc_nodelist *ndlp;
+
+ spin_lock_irq(shost->host_lock);
+ /* Search for this, mapped, target ID */
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
+ starget->id == ndlp->nlp_sid) {
+ spin_unlock_irq(shost->host_lock);
+@@ -1885,8 +2154,66 @@
+ .get_host_fabric_name = lpfc_get_host_fabric_name,
+ .show_host_fabric_name = 1,
+
+- .get_host_symbolic_name = lpfc_get_host_symbolic_name,
+- .show_host_symbolic_name = 1,
++ /*
++ * The LPFC driver treats linkdown handling as target loss events
++ * so there are no sysfs handlers for link_down_tmo.
++ */
++
++ .get_fc_host_stats = lpfc_get_stats,
++ .reset_fc_host_stats = lpfc_reset_stats,
++
++ .dd_fcrport_size = sizeof(struct lpfc_rport_data),
++ .show_rport_maxframe_size = 1,
++ .show_rport_supported_classes = 1,
++
++ .set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo,
++ .show_rport_dev_loss_tmo = 1,
++
++ .get_starget_port_id = lpfc_get_starget_port_id,
++ .show_starget_port_id = 1,
++
++ .get_starget_node_name = lpfc_get_starget_node_name,
++ .show_starget_node_name = 1,
++
++ .get_starget_port_name = lpfc_get_starget_port_name,
++ .show_starget_port_name = 1,
++
++ .issue_fc_host_lip = lpfc_issue_lip,
++ .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
++ .terminate_rport_io = lpfc_terminate_rport_io,
++
++ .vport_create = lpfc_vport_create,
++ .vport_delete = lpfc_vport_delete,
++ .dd_fcvport_size = sizeof(struct lpfc_vport *),
++};
++
++struct fc_function_template lpfc_vport_transport_functions = {
++ /* fixed attributes the driver supports */
++ .show_host_node_name = 1,
++ .show_host_port_name = 1,
++ .show_host_supported_classes = 1,
++ .show_host_supported_fc4s = 1,
++ .show_host_supported_speeds = 1,
++ .show_host_maxframe_size = 1,
++
++ /* dynamic attributes the driver supports */
++ .get_host_port_id = lpfc_get_host_port_id,
++ .show_host_port_id = 1,
++
++ .get_host_port_type = lpfc_get_host_port_type,
++ .show_host_port_type = 1,
++
++ .get_host_port_state = lpfc_get_host_port_state,
++ .show_host_port_state = 1,
++
++ /* active_fc4s is shown but doesn't change (thus no get function) */
++ .show_host_active_fc4s = 1,
++
++ .get_host_speed = lpfc_get_host_speed,
++ .show_host_speed = 1,
++
++ .get_host_fabric_name = lpfc_get_host_fabric_name,
++ .show_host_fabric_name = 1,
+
+ /*
+ * The LPFC driver treats linkdown handling as target loss events
+@@ -1915,6 +2242,8 @@
+ .issue_fc_host_lip = lpfc_issue_lip,
+ .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
+ .terminate_rport_io = lpfc_terminate_rport_io,
++
++ .vport_disable = lpfc_vport_disable,
+ };
+
+ void
+@@ -1937,6 +2266,9 @@
+ lpfc_discovery_threads_init(phba, lpfc_discovery_threads);
+ lpfc_max_luns_init(phba, lpfc_max_luns);
+ lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
++ lpfc_peer_port_login_init(phba, lpfc_peer_port_login);
++ lpfc_npiv_enable_init(phba, lpfc_npiv_enable);
++ lpfc_vport_restrict_login_init(phba, lpfc_vport_restrict_login);
+ lpfc_use_msi_init(phba, lpfc_use_msi);
+ lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo);
+ lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h 2007-12-21 15:36:12.000000000 -0500
+@@ -23,92 +23,114 @@
+ struct fc_rport;
+ void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
+ void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_heart_beat(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb,
+ struct lpfc_dmabuf *mp);
+ void lpfc_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport);
+ void lpfc_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int);
+ void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-int lpfc_reg_login(struct lpfc_hba *, uint32_t, uint8_t *, LPFC_MBOXQ_t *,
+- uint32_t);
+-void lpfc_unreg_login(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
+-void lpfc_unreg_did(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
++int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
++ LPFC_MBOXQ_t *, uint32_t);
++void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *);
+ void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
+
+-
++void lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove);
+ int lpfc_linkdown(struct lpfc_hba *);
+ void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
+
+ void lpfc_mbx_cmpl_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-void lpfc_dequeue_node(struct lpfc_hba *, struct lpfc_nodelist *);
+-void lpfc_nlp_set_state(struct lpfc_hba *, struct lpfc_nodelist *, int);
+-void lpfc_drop_node(struct lpfc_hba *, struct lpfc_nodelist *);
+-void lpfc_set_disctmo(struct lpfc_hba *);
+-int lpfc_can_disctmo(struct lpfc_hba *);
+-int lpfc_unreg_rpi(struct lpfc_hba *, struct lpfc_nodelist *);
++void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_nlp_set_state(struct lpfc_vport *, struct lpfc_nodelist *, int);
++void lpfc_drop_node(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_set_disctmo(struct lpfc_vport *);
++int lpfc_can_disctmo(struct lpfc_vport *);
++int lpfc_unreg_rpi(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_unreg_all_rpis(struct lpfc_vport *);
++void lpfc_unreg_default_rpis(struct lpfc_vport *);
++void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *);
++
+ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_iocbq *, struct lpfc_nodelist *);
+-void lpfc_nlp_init(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t);
++void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t);
+ struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
+ int lpfc_nlp_put(struct lpfc_nodelist *);
+-struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_hba *, uint32_t);
+-void lpfc_disc_list_loopmap(struct lpfc_hba *);
+-void lpfc_disc_start(struct lpfc_hba *);
+-void lpfc_disc_flush_list(struct lpfc_hba *);
++struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t);
++void lpfc_disc_list_loopmap(struct lpfc_vport *);
++void lpfc_disc_start(struct lpfc_vport *);
++void lpfc_disc_flush_list(struct lpfc_vport *);
++void lpfc_cleanup_discovery_resources(struct lpfc_vport *);
+ void lpfc_disc_timeout(unsigned long);
+
+-struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi);
+-struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi);
++struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_vport *, uint16_t);
++struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_vport *, uint16_t);
+
++void lpfc_worker_wake_up(struct lpfc_hba *);
+ int lpfc_workq_post_event(struct lpfc_hba *, void *, void *, uint32_t);
+ int lpfc_do_work(void *);
+-int lpfc_disc_state_machine(struct lpfc_hba *, struct lpfc_nodelist *, void *,
++int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *,
+ uint32_t);
+
+-int lpfc_check_sparm(struct lpfc_hba *, struct lpfc_nodelist *,
++void lpfc_register_new_vport(struct lpfc_hba *, struct lpfc_vport *,
++ struct lpfc_nodelist *);
++void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *);
++int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *,
+ struct serv_parm *, uint32_t);
+-int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist * ndlp);
++int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_els_chk_latt(struct lpfc_vport *);
+ int lpfc_els_abort_flogi(struct lpfc_hba *);
+-int lpfc_initial_flogi(struct lpfc_hba *);
+-int lpfc_issue_els_plogi(struct lpfc_hba *, uint32_t, uint8_t);
+-int lpfc_issue_els_prli(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_adisc(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_logo(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_scr(struct lpfc_hba *, uint32_t, uint8_t);
++int lpfc_initial_flogi(struct lpfc_vport *);
++int lpfc_initial_fdisc(struct lpfc_vport *);
++int lpfc_issue_els_fdisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_plogi(struct lpfc_vport *, uint32_t, uint8_t);
++int lpfc_issue_els_prli(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *);
++int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t);
+ int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
+-int lpfc_els_rsp_acc(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
++int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
+ struct lpfc_nodelist *, LPFC_MBOXQ_t *, uint8_t);
+-int lpfc_els_rsp_reject(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++int lpfc_els_rsp_reject(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
++ struct lpfc_nodelist *, LPFC_MBOXQ_t *);
++int lpfc_els_rsp_adisc_acc(struct lpfc_vport *, struct lpfc_iocbq *,
+ struct lpfc_nodelist *);
+-int lpfc_els_rsp_adisc_acc(struct lpfc_hba *, struct lpfc_iocbq *,
++int lpfc_els_rsp_prli_acc(struct lpfc_vport *, struct lpfc_iocbq *,
+ struct lpfc_nodelist *);
+-int lpfc_els_rsp_prli_acc(struct lpfc_hba *, struct lpfc_iocbq *,
+- struct lpfc_nodelist *);
+-void lpfc_cancel_retry_delay_tmo(struct lpfc_hba *, struct lpfc_nodelist *);
++void lpfc_cancel_retry_delay_tmo(struct lpfc_vport *, struct lpfc_nodelist *);
+ void lpfc_els_retry_delay(unsigned long);
+ void lpfc_els_retry_delay_handler(struct lpfc_nodelist *);
++void lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *);
+ void lpfc_els_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_iocbq *);
+-int lpfc_els_handle_rscn(struct lpfc_hba *);
+-int lpfc_els_flush_rscn(struct lpfc_hba *);
+-int lpfc_rscn_payload_check(struct lpfc_hba *, uint32_t);
+-void lpfc_els_flush_cmd(struct lpfc_hba *);
+-int lpfc_els_disc_adisc(struct lpfc_hba *);
+-int lpfc_els_disc_plogi(struct lpfc_hba *);
++int lpfc_els_handle_rscn(struct lpfc_vport *);
++void lpfc_els_flush_rscn(struct lpfc_vport *);
++int lpfc_rscn_payload_check(struct lpfc_vport *, uint32_t);
++void lpfc_els_flush_cmd(struct lpfc_vport *);
++int lpfc_els_disc_adisc(struct lpfc_vport *);
++int lpfc_els_disc_plogi(struct lpfc_vport *);
+ void lpfc_els_timeout(unsigned long);
+-void lpfc_els_timeout_handler(struct lpfc_hba *);
++void lpfc_els_timeout_handler(struct lpfc_vport *);
++void lpfc_hb_timeout(unsigned long);
++void lpfc_hb_timeout_handler(struct lpfc_hba *);
+
+ void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_iocbq *);
+-int lpfc_ns_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
+-int lpfc_fdmi_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
++int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
++int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int);
+ void lpfc_fdmi_tmo(unsigned long);
+-void lpfc_fdmi_tmo_handler(struct lpfc_hba *);
++void lpfc_fdmi_timeout_handler(struct lpfc_vport *vport);
+
+ int lpfc_config_port_prep(struct lpfc_hba *);
+ int lpfc_config_port_post(struct lpfc_hba *);
+@@ -136,16 +158,23 @@
+ void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
++void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ int lpfc_mbox_tmo_val(struct lpfc_hba *, int);
+
++void lpfc_config_hbq(struct lpfc_hba *, struct lpfc_hbq_init *, uint32_t ,
++ LPFC_MBOXQ_t *);
++struct lpfc_hbq_entry * lpfc_sli_next_hbq_slot(struct lpfc_hba *, uint32_t);
++
+ int lpfc_mem_alloc(struct lpfc_hba *);
+ void lpfc_mem_free(struct lpfc_hba *);
++void lpfc_stop_vport_timers(struct lpfc_vport *);
+
+ void lpfc_poll_timeout(unsigned long ptr);
+ void lpfc_poll_start_timer(struct lpfc_hba * phba);
+ void lpfc_sli_poll_fcp_ring(struct lpfc_hba * hba);
+ struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *);
+ void lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
++void __lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
+ uint16_t lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
+
+ void lpfc_reset_barrier(struct lpfc_hba * phba);
+@@ -154,6 +183,7 @@
+ int lpfc_sli_brdreset(struct lpfc_hba *);
+ int lpfc_sli_brdrestart(struct lpfc_hba *);
+ int lpfc_sli_hba_setup(struct lpfc_hba *);
++int lpfc_sli_host_down(struct lpfc_vport *);
+ int lpfc_sli_hba_down(struct lpfc_hba *);
+ int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
+ int lpfc_sli_handle_mb_event(struct lpfc_hba *);
+@@ -164,12 +194,17 @@
+ int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_iocbq *, uint32_t);
+ void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
+-int lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
++void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
+ int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_dmabuf *);
+ struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *,
+ struct lpfc_sli_ring *,
+ dma_addr_t);
++int lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *, uint32_t);
++int lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *, uint32_t);
++void lpfc_sli_hbqbuf_free_all(struct lpfc_hba *);
++struct hbq_dmabuf *lpfc_sli_hbqbuf_find(struct lpfc_hba *, uint32_t);
++int lpfc_sli_hbq_size(void);
+ int lpfc_sli_issue_abort_iotag(struct lpfc_hba *, struct lpfc_sli_ring *,
+ struct lpfc_iocbq *);
+ int lpfc_sli_sum_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t,
+@@ -180,8 +215,12 @@
+ void lpfc_mbox_timeout(unsigned long);
+ void lpfc_mbox_timeout_handler(struct lpfc_hba *);
+
+-struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_hba *, uint32_t);
+-struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_hba *, struct lpfc_name *);
++struct lpfc_nodelist *__lpfc_find_node(struct lpfc_vport *, node_filter,
++ void *);
++struct lpfc_nodelist *lpfc_find_node(struct lpfc_vport *, node_filter, void *);
++struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_vport *, uint32_t);
++struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *,
++ struct lpfc_name *);
+
+ int lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
+ uint32_t timeout);
+@@ -195,25 +234,56 @@
+ struct lpfc_iocbq * cmdiocb,
+ struct lpfc_iocbq * rspiocb);
+
++void *lpfc_hbq_alloc(struct lpfc_hba *, int, dma_addr_t *);
++void lpfc_hbq_free(struct lpfc_hba *, void *, dma_addr_t);
++void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
++
+ void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *);
++void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+ void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+
++void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
+ /* Function prototypes. */
+ const char* lpfc_info(struct Scsi_Host *);
+-void lpfc_scan_start(struct Scsi_Host *);
+ int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
+
+ void lpfc_get_cfgparam(struct lpfc_hba *);
+-int lpfc_alloc_sysfs_attr(struct lpfc_hba *);
+-void lpfc_free_sysfs_attr(struct lpfc_hba *);
+-extern struct class_device_attribute *lpfc_host_attrs[];
++int lpfc_alloc_sysfs_attr(struct lpfc_vport *);
++void lpfc_free_sysfs_attr(struct lpfc_vport *);
++extern struct class_device_attribute *lpfc_hba_attrs[];
+ extern struct scsi_host_template lpfc_template;
+ extern struct fc_function_template lpfc_transport_functions;
++extern struct fc_function_template lpfc_vport_transport_functions;
++extern int lpfc_sli_mode;
+
+-void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp);
++int lpfc_vport_symbolic_node_name(struct lpfc_vport *, char *, size_t);
+ void lpfc_terminate_rport_io(struct fc_rport *);
+ void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport);
+
++struct lpfc_vport *lpfc_create_port(struct lpfc_hba *, int, struct fc_vport *);
++int lpfc_vport_disable(struct fc_vport *fc_vport, bool disable);
++void lpfc_mbx_unreg_vpi(struct lpfc_vport *);
++void destroy_port(struct lpfc_vport *);
++int lpfc_get_instance(void);
++void lpfc_host_attrib_init(struct Scsi_Host *);
++
++extern void lpfc_debugfs_initialize(struct lpfc_vport *);
++extern void lpfc_debugfs_terminate(struct lpfc_vport *);
++extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t,
++ uint32_t, uint32_t);
++
++/* Interface exported by fabric iocb scheduler */
++int lpfc_issue_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
++void lpfc_fabric_abort_vport(struct lpfc_vport *);
++void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
++void lpfc_fabric_abort_hba(struct lpfc_hba *);
++void lpfc_fabric_abort_flogi(struct lpfc_hba *);
++void lpfc_fabric_block_timeout(unsigned long);
++void lpfc_unblock_fabric_iocbs(struct lpfc_hba *);
++void lpfc_adjust_queue_depth(struct lpfc_hba *);
++void lpfc_ramp_down_queue_handler(struct lpfc_hba *);
++void lpfc_ramp_up_queue_handler(struct lpfc_hba *);
++
+ #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
+ #define HBA_EVENT_RSCN 5
+ #define HBA_EVENT_LINK_UP 2
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c 2007-12-21 15:36:12.000000000 -0500
+@@ -40,6 +40,8 @@
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
+ #include "lpfc_version.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+
+ #define HBA_PORTSPEED_UNKNOWN 0 /* Unknown - transceiver
+ * incapable of reporting */
+@@ -58,24 +60,68 @@
+ /*
+ * lpfc_ct_unsol_event
+ */
++static void
++lpfc_ct_unsol_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
++ struct lpfc_dmabuf *mp, uint32_t size)
++{
++ if (!mp) {
++ printk(KERN_ERR "%s (%d): Unsolited CT, no buffer, "
++ "piocbq = %p, status = x%x, mp = %p, size = %d\n",
++ __FUNCTION__, __LINE__,
++ piocbq, piocbq->iocb.ulpStatus, mp, size);
++ }
++
++ printk(KERN_ERR "%s (%d): Ignoring unsolicted CT piocbq = %p, "
++ "buffer = %p, size = %d, status = x%x\n",
++ __FUNCTION__, __LINE__,
++ piocbq, mp, size,
++ piocbq->iocb.ulpStatus);
++
++}
++
++static void
++lpfc_ct_ignore_hbq_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
++ struct lpfc_dmabuf *mp, uint32_t size)
++{
++ if (!mp) {
++ printk(KERN_ERR "%s (%d): Unsolited CT, no "
++ "HBQ buffer, piocbq = %p, status = x%x\n",
++ __FUNCTION__, __LINE__,
++ piocbq, piocbq->iocb.ulpStatus);
++ } else {
++ lpfc_ct_unsol_buffer(phba, piocbq, mp, size);
++ printk(KERN_ERR "%s (%d): Ignoring unsolicted CT "
++ "piocbq = %p, buffer = %p, size = %d, "
++ "status = x%x\n",
++ __FUNCTION__, __LINE__,
++ piocbq, mp, size, piocbq->iocb.ulpStatus);
++ }
++}
++
+ void
+-lpfc_ct_unsol_event(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocbq)
++lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocbq)
+ {
+
+- struct lpfc_iocbq *next_piocbq;
+- struct lpfc_dmabuf *pmbuf = NULL;
+- struct lpfc_dmabuf *matp, *next_matp;
+- uint32_t ctx = 0, size = 0, cnt = 0;
++ struct lpfc_dmabuf *mp = NULL;
+ IOCB_t *icmd = &piocbq->iocb;
+- IOCB_t *save_icmd = icmd;
+- int i, go_exit = 0;
+- struct list_head head;
++ int i;
++ struct lpfc_iocbq *iocbq;
++ dma_addr_t paddr;
++ uint32_t size;
++ struct lpfc_dmabuf *bdeBuf1 = piocbq->context2;
++ struct lpfc_dmabuf *bdeBuf2 = piocbq->context3;
++
++ piocbq->context2 = NULL;
++ piocbq->context3 = NULL;
+
+- if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ if (unlikely(icmd->ulpStatus == IOSTAT_NEED_BUFFER)) {
++ lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
++ } else if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+ ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
+ /* Not enough posted buffers; Try posting more buffers */
+ phba->fc_stat.NoRcvBuf++;
++ if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED))
+ lpfc_post_buffer(phba, pring, 0, 1);
+ return;
+ }
+@@ -86,66 +132,56 @@
+ if (icmd->ulpBdeCount == 0)
+ return;
+
+- INIT_LIST_HEAD(&head);
+- list_add_tail(&head, &piocbq->list);
+-
+- list_for_each_entry_safe(piocbq, next_piocbq, &head, list) {
+- icmd = &piocbq->iocb;
+- if (ctx == 0)
+- ctx = (uint32_t) (icmd->ulpContext);
+- if (icmd->ulpBdeCount == 0)
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++ list_for_each_entry(iocbq, &piocbq->list, list) {
++ icmd = &iocbq->iocb;
++ if (icmd->ulpBdeCount == 0) {
++ printk(KERN_ERR "%s (%d): Unsolited CT, no "
++ "BDE, iocbq = %p, status = x%x\n",
++ __FUNCTION__, __LINE__,
++ iocbq, iocbq->iocb.ulpStatus);
+ continue;
+-
+- for (i = 0; i < icmd->ulpBdeCount; i++) {
+- matp = lpfc_sli_ringpostbuf_get(phba, pring,
+- getPaddr(icmd->un.
+- cont64[i].
+- addrHigh,
+- icmd->un.
+- cont64[i].
+- addrLow));
+- if (!matp) {
+- /* Insert lpfc log message here */
+- lpfc_post_buffer(phba, pring, cnt, 1);
+- go_exit = 1;
+- goto ct_unsol_event_exit_piocbq;
+- }
+-
+- /* Typically for Unsolicited CT requests */
+- if (!pmbuf) {
+- pmbuf = matp;
+- INIT_LIST_HEAD(&pmbuf->list);
+- } else
+- list_add_tail(&matp->list, &pmbuf->list);
+-
+- size += icmd->un.cont64[i].tus.f.bdeSize;
+- cnt++;
+ }
+
+- icmd->ulpBdeCount = 0;
++ size = icmd->un.cont64[0].tus.f.bdeSize;
++ lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf1, size);
++ lpfc_in_buf_free(phba, bdeBuf1);
++ if (icmd->ulpBdeCount == 2) {
++ lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf2,
++ size);
++ lpfc_in_buf_free(phba, bdeBuf2);
++ }
+ }
++ } else {
++ struct lpfc_iocbq *next;
+
+- lpfc_post_buffer(phba, pring, cnt, 1);
+- if (save_icmd->ulpStatus) {
+- go_exit = 1;
++ list_for_each_entry_safe(iocbq, next, &piocbq->list, list) {
++ icmd = &iocbq->iocb;
++ if (icmd->ulpBdeCount == 0) {
++ printk(KERN_ERR "%s (%d): Unsolited CT, no "
++ "BDE, iocbq = %p, status = x%x\n",
++ __FUNCTION__, __LINE__,
++ iocbq, iocbq->iocb.ulpStatus);
++ continue;
+ }
+
+-ct_unsol_event_exit_piocbq:
+- list_del(&head);
+- if (pmbuf) {
+- list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) {
+- lpfc_mbuf_free(phba, matp->virt, matp->phys);
+- list_del(&matp->list);
+- kfree(matp);
++ for (i = 0; i < icmd->ulpBdeCount; i++) {
++ paddr = getPaddr(icmd->un.cont64[i].addrHigh,
++ icmd->un.cont64[i].addrLow);
++ mp = lpfc_sli_ringpostbuf_get(phba, pring,
++ paddr);
++ size = icmd->un.cont64[i].tus.f.bdeSize;
++ lpfc_ct_unsol_buffer(phba, piocbq, mp, size);
++ lpfc_in_buf_free(phba, mp);
++ }
++ list_del(&iocbq->list);
++ lpfc_sli_release_iocbq(phba, iocbq);
+ }
+- lpfc_mbuf_free(phba, pmbuf->virt, pmbuf->phys);
+- kfree(pmbuf);
+ }
+- return;
+ }
+
+ static void
+-lpfc_free_ct_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mlist)
++lpfc_free_ct_rsp(struct lpfc_hba *phba, struct lpfc_dmabuf *mlist)
+ {
+ struct lpfc_dmabuf *mlast, *next_mlast;
+
+@@ -160,7 +196,7 @@
+ }
+
+ static struct lpfc_dmabuf *
+-lpfc_alloc_ct_rsp(struct lpfc_hba * phba, int cmdcode, struct ulp_bde64 * bpl,
++lpfc_alloc_ct_rsp(struct lpfc_hba *phba, int cmdcode, struct ulp_bde64 *bpl,
+ uint32_t size, int *entries)
+ {
+ struct lpfc_dmabuf *mlist = NULL;
+@@ -181,7 +217,8 @@
+
+ INIT_LIST_HEAD(&mp->list);
+
+- if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT))
++ if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT) ||
++ cmdcode == be16_to_cpu(SLI_CTNS_GFF_ID))
+ mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
+ else
+ mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys));
+@@ -201,8 +238,8 @@
+
+ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+ /* build buffer ptr list for IOCB */
+- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
+- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
+ bpl->tus.f.bdeSize = (uint16_t) cnt;
+ bpl->tus.w = le32_to_cpu(bpl->tus.w);
+ bpl++;
+@@ -215,24 +252,49 @@
+ return mlist;
+ }
+
++int
++lpfc_ct_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *ctiocb)
++{
++ struct lpfc_dmabuf *buf_ptr;
++
++ if (ctiocb->context1) {
++ buf_ptr = (struct lpfc_dmabuf *) ctiocb->context1;
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ ctiocb->context1 = NULL;
++ }
++ if (ctiocb->context2) {
++ lpfc_free_ct_rsp(phba, (struct lpfc_dmabuf *) ctiocb->context2);
++ ctiocb->context2 = NULL;
++ }
++
++ if (ctiocb->context3) {
++ buf_ptr = (struct lpfc_dmabuf *) ctiocb->context3;
++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++ kfree(buf_ptr);
++ ctiocb->context1 = NULL;
++ }
++ lpfc_sli_release_iocbq(phba, ctiocb);
++ return 0;
++}
++
+ static int
+-lpfc_gen_req(struct lpfc_hba *phba, struct lpfc_dmabuf *bmp,
++lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
+ struct lpfc_dmabuf *inp, struct lpfc_dmabuf *outp,
+ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ struct lpfc_iocbq *),
+ struct lpfc_nodelist *ndlp, uint32_t usr_flg, uint32_t num_entry,
+- uint32_t tmo)
++ uint32_t tmo, uint8_t retry)
+ {
+-
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ IOCB_t *icmd;
+ struct lpfc_iocbq *geniocb;
++ int rc;
+
+ /* Allocate buffer for command iocb */
+- spin_lock_irq(phba->host->host_lock);
+ geniocb = lpfc_sli_get_iocbq(phba);
+- spin_unlock_irq(phba->host->host_lock);
+
+ if (geniocb == NULL)
+ return 1;
+@@ -272,31 +334,40 @@
+ icmd->ulpClass = CLASS3;
+ icmd->ulpContext = ndlp->nlp_rpi;
+
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ /* For GEN_REQUEST64_CR, use the RPI */
++ icmd->ulpCt_h = 0;
++ icmd->ulpCt_l = 0;
++ }
++
+ /* Issue GEN REQ IOCB for NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0119 Issue GEN REQ IOCB for NPORT x%x "
+- "Data: x%x x%x\n", phba->brd_no, icmd->un.ulpWord[5],
+- icmd->ulpIoTag, phba->hba_state);
++ "%d (%d):0119 Issue GEN REQ IOCB to NPORT x%x "
++ "Data: x%x x%x\n", phba->brd_no, vport->vpi,
++ ndlp->nlp_DID, icmd->ulpIoTag,
++ vport->port_state);
+ geniocb->iocb_cmpl = cmpl;
+ geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT;
+- spin_lock_irq(phba->host->host_lock);
+- if (lpfc_sli_issue_iocb(phba, pring, geniocb, 0) == IOCB_ERROR) {
++ geniocb->vport = vport;
++ geniocb->retry = retry;
++ rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0);
++
++ if (rc == IOCB_ERROR) {
+ lpfc_sli_release_iocbq(phba, geniocb);
+- spin_unlock_irq(phba->host->host_lock);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+
+ return 0;
+ }
+
+ static int
+-lpfc_ct_cmd(struct lpfc_hba *phba, struct lpfc_dmabuf *inmp,
++lpfc_ct_cmd(struct lpfc_vport *vport, struct lpfc_dmabuf *inmp,
+ struct lpfc_dmabuf *bmp, struct lpfc_nodelist *ndlp,
+ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ struct lpfc_iocbq *),
+- uint32_t rsp_size)
++ uint32_t rsp_size, uint8_t retry)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct ulp_bde64 *bpl = (struct ulp_bde64 *) bmp->virt;
+ struct lpfc_dmabuf *outmp;
+ int cnt = 0, status;
+@@ -310,8 +381,8 @@
+ if (!outmp)
+ return -ENOMEM;
+
+- status = lpfc_gen_req(phba, bmp, inmp, outmp, cmpl, ndlp, 0,
+- cnt+1, 0);
++ status = lpfc_gen_req(vport, bmp, inmp, outmp, cmpl, ndlp, 0,
++ cnt+1, 0, retry);
+ if (status) {
+ lpfc_free_ct_rsp(phba, outmp);
+ return -ENOMEM;
+@@ -319,20 +390,35 @@
+ return 0;
+ }
+
++static struct lpfc_vport *
++lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) {
++
++ struct lpfc_vport *vport_curr;
++
++ list_for_each_entry(vport_curr, &phba->port_list, listentry) {
++ if ((vport_curr->fc_myDID) &&
++ (vport_curr->fc_myDID == did))
++ return vport_curr;
++ }
++
++ return NULL;
++}
++
+ static int
+-lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size)
++lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli_ct_request *Response =
+ (struct lpfc_sli_ct_request *) mp->virt;
+ struct lpfc_nodelist *ndlp = NULL;
+ struct lpfc_dmabuf *mlast, *next_mp;
+ uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
+- uint32_t Did;
+- uint32_t CTentry;
++ uint32_t Did, CTentry;
+ int Cnt;
+ struct list_head head;
+
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
++ vport->num_disc_nodes = 0;
+
+
+ list_add_tail(&head, &mp->list);
+@@ -350,39 +436,96 @@
+
+ /* Loop through entire NameServer list of DIDs */
+ while (Cnt >= sizeof (uint32_t)) {
+-
+ /* Get next DID from NameServer List */
+ CTentry = *ctptr++;
+ Did = ((be32_to_cpu(CTentry)) & Mask_DID);
+
+ ndlp = NULL;
+- if (Did != phba->fc_myDID) {
+- /* Check for rscn processing or not */
+- ndlp = lpfc_setup_disc_node(phba, Did);
+- }
+- /* Mark all node table entries that are in the
+- Nameserver */
++
++ /*
++ * Check for rscn processing or not
++ * To conserve rpi's, filter out addresses for other
++ * vports on the same physical HBAs.
++ */
++ if ((Did != vport->fc_myDID) &&
++ ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
++ phba->cfg_peer_port_login)) {
++ if ((vport->port_type != LPFC_NPIV_PORT) ||
++ (vport->fc_flag & FC_RFF_NOT_SUPPORTED) ||
++ (!phba->cfg_vport_restrict_login)) {
++ ndlp = lpfc_setup_disc_node(vport, Did);
+ if (ndlp) {
+- /* NameServer Rsp */
+- lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0238 Process x%x NameServer"
+- " Rsp Data: x%x x%x x%x\n",
+- phba->brd_no,
++ lpfc_debugfs_disc_trc(vport,
++ LPFC_DISC_TRC_CT,
++ "Parse GID_FTrsp: "
++ "did:x%x flg:x%x x%x",
+ Did, ndlp->nlp_flag,
+- phba->fc_flag,
+- phba->fc_rscn_id_cnt);
++ vport->fc_flag);
++
++ lpfc_printf_log(phba, KERN_INFO,
++ LOG_DISCOVERY,
++ "%d (%d):0238 Process "
++ "x%x NameServer Rsp"
++ "Data: x%x x%x x%x\n",
++ phba->brd_no,
++ vport->vpi, Did,
++ ndlp->nlp_flag,
++ vport->fc_flag,
++ vport->fc_rscn_id_cnt);
+ } else {
+- /* NameServer Rsp */
+- lpfc_printf_log(phba,
+- KERN_INFO,
++ lpfc_debugfs_disc_trc(vport,
++ LPFC_DISC_TRC_CT,
++ "Skip1 GID_FTrsp: "
++ "did:x%x flg:x%x cnt:%d",
++ Did, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
++
++ lpfc_printf_log(phba, KERN_INFO,
+ LOG_DISCOVERY,
+- "%d:0239 Skip x%x NameServer "
+- "Rsp Data: x%x x%x x%x\n",
++ "%d (%d):0239 Skip x%x "
++ "NameServer Rsp Data: "
++ "x%x x%x\n",
+ phba->brd_no,
+- Did, Size, phba->fc_flag,
+- phba->fc_rscn_id_cnt);
++ vport->vpi, Did,
++ vport->fc_flag,
++ vport->fc_rscn_id_cnt);
+ }
+
++ } else {
++ if (!(vport->fc_flag & FC_RSCN_MODE) ||
++ (lpfc_rscn_payload_check(vport, Did))) {
++ lpfc_debugfs_disc_trc(vport,
++ LPFC_DISC_TRC_CT,
++ "Query GID_FTrsp: "
++ "did:x%x flg:x%x cnt:%d",
++ Did, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
++
++ if (lpfc_ns_cmd(vport,
++ SLI_CTNS_GFF_ID,
++ 0, Did) == 0)
++ vport->num_disc_nodes++;
++ }
++ else {
++ lpfc_debugfs_disc_trc(vport,
++ LPFC_DISC_TRC_CT,
++ "Skip2 GID_FTrsp: "
++ "did:x%x flg:x%x cnt:%d",
++ Did, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
++
++ lpfc_printf_log(phba, KERN_INFO,
++ LOG_DISCOVERY,
++ "%d (%d):0245 Skip x%x "
++ "NameServer Rsp Data: "
++ "x%x x%x\n",
++ phba->brd_no,
++ vport->vpi, Did,
++ vport->fc_flag,
++ vport->fc_rscn_id_cnt);
++ }
++ }
++ }
+ if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY)))
+ goto nsout1;
+ Cnt -= sizeof (uint32_t);
+@@ -393,190 +536,369 @@
+
+ nsout1:
+ list_del(&head);
+-
+- /*
+- * The driver has cycled through all Nports in the RSCN payload.
+- * Complete the handling by cleaning up and marking the
+- * current driver state.
+- */
+- if (phba->hba_state == LPFC_HBA_READY) {
+- lpfc_els_flush_rscn(phba);
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */
+- spin_unlock_irq(phba->host->host_lock);
+- }
+ return 0;
+ }
+
+-
+-
+-
+ static void
+-lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+- struct lpfc_sli *psli;
+ struct lpfc_dmabuf *bmp;
+- struct lpfc_dmabuf *inp;
+ struct lpfc_dmabuf *outp;
+- struct lpfc_nodelist *ndlp;
+ struct lpfc_sli_ct_request *CTrsp;
++ int rc;
+
+- psli = &phba->sli;
+ /* we pass cmdiocb to state machine which needs rspiocb as well */
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+
+- inp = (struct lpfc_dmabuf *) cmdiocb->context1;
+ outp = (struct lpfc_dmabuf *) cmdiocb->context2;
+ bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
+-
+ irsp = &rspiocb->iocb;
+- if (irsp->ulpStatus) {
+- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) ||
+- (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) {
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "GID_FT cmpl: status:x%x/x%x rtry:%d",
++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_ns_retry);
++
++ /* Don't bother processing response if vport is being torn down. */
++ if (vport->load_flag & FC_UNLOADING)
++ goto out;
++
++
++ if (lpfc_els_chk_latt(vport) || lpfc_error_lost_link(irsp)) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0216 Link event during NS query\n",
++ phba->brd_no, vport->vpi);
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+ goto out;
+ }
+
++ if (irsp->ulpStatus) {
+ /* Check for retry */
+- if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
+- phba->fc_ns_retry++;
++ if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++ if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
++ (irsp->un.ulpWord[4] != IOERR_NO_RESOURCES))
++ vport->fc_ns_retry++;
+ /* CT command is being retried */
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
+- if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) ==
+- 0) {
++ rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
++ vport->fc_ns_retry, 0);
++ if (rc == 0)
+ goto out;
+ }
+- }
+- }
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0257 GID_FT Query error: 0x%x 0x%x\n",
++ phba->brd_no, vport->vpi, irsp->ulpStatus,
++ vport->fc_ns_retry);
+ } else {
+ /* Good status, continue checking */
+ CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
+ if (CTrsp->CommandResponse.bits.CmdRsp ==
+ be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0208 NameServer Rsp "
++ "%d (%d):0208 NameServer Rsp "
+ "Data: x%x\n",
+- phba->brd_no,
+- phba->fc_flag);
+- lpfc_ns_rsp(phba, outp,
++ phba->brd_no, vport->vpi,
++ vport->fc_flag);
++ lpfc_ns_rsp(vport, outp,
+ (uint32_t) (irsp->un.genreq64.bdl.bdeSize));
+ } else if (CTrsp->CommandResponse.bits.CmdRsp ==
+ be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+ /* NameServer Rsp Error */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0240 NameServer Rsp Error "
++ "%d (%d):0240 NameServer Rsp Error "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t) CTrsp->ReasonCode,
+ (uint32_t) CTrsp->Explanation,
+- phba->fc_flag);
++ vport->fc_flag);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "GID_FT rsp err1 cmd:x%x rsn:x%x exp:x%x",
++ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
++ (uint32_t) CTrsp->ReasonCode,
++ (uint32_t) CTrsp->Explanation);
++
+ } else {
+ /* NameServer Rsp Error */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0241 NameServer Rsp Error "
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0241 NameServer Rsp Error "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ CTrsp->CommandResponse.bits.CmdRsp,
+ (uint32_t) CTrsp->ReasonCode,
+ (uint32_t) CTrsp->Explanation,
+- phba->fc_flag);
++ vport->fc_flag);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "GID_FT rsp err2 cmd:x%x rsn:x%x exp:x%x",
++ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
++ (uint32_t) CTrsp->ReasonCode,
++ (uint32_t) CTrsp->Explanation);
+ }
+ }
+ /* Link up / RSCN discovery */
+- lpfc_disc_start(phba);
++ if (vport->num_disc_nodes == 0) {
++ /*
++ * The driver has cycled through all Nports in the RSCN payload.
++ * Complete the handling by cleaning up and marking the
++ * current driver state.
++ */
++ if (vport->port_state >= LPFC_DISC_AUTH) {
++ if (vport->fc_flag & FC_RSCN_MODE) {
++ lpfc_els_flush_rscn(vport);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
++ spin_unlock_irq(shost->host_lock);
++ }
++ else
++ lpfc_els_flush_rscn(vport);
++ }
++
++ lpfc_disc_start(vport);
++ }
+ out:
+- lpfc_free_ct_rsp(phba, outp);
+- lpfc_mbuf_free(phba, inp->virt, inp->phys);
+- lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+- kfree(inp);
+- kfree(bmp);
+- spin_lock_irq(phba->host->host_lock);
+- lpfc_sli_release_iocbq(phba, cmdiocb);
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_ct_free_iocb(phba, cmdiocb);
++ return;
++}
++
++void
++lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ IOCB_t *irsp = &rspiocb->iocb;
++ struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *) cmdiocb->context1;
++ struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *) cmdiocb->context2;
++ struct lpfc_sli_ct_request *CTrsp;
++ int did;
++ uint8_t fbits;
++ struct lpfc_nodelist *ndlp;
++
++ did = ((struct lpfc_sli_ct_request *) inp->virt)->un.gff.PortId;
++ did = be32_to_cpu(did);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "GFF_ID cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4], did);
++
++ if (irsp->ulpStatus == IOSTAT_SUCCESS) {
++ /* Good status, continue checking */
++ CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
++ fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET];
++
++ if (CTrsp->CommandResponse.bits.CmdRsp ==
++ be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
++ if ((fbits & FC4_FEATURE_INIT) &&
++ !(fbits & FC4_FEATURE_TARGET)) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0245 Skip x%x GFF "
++ "NameServer Rsp Data: (init) "
++ "x%x x%x\n", phba->brd_no,
++ vport->vpi, did, fbits,
++ vport->fc_rscn_id_cnt);
++ goto out;
++ }
++ }
++ }
++ else {
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0267 NameServer GFF Rsp"
++ " x%x Error (%d %d) Data: x%x x%x\n",
++ phba->brd_no, vport->vpi, did,
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ vport->fc_flag, vport->fc_rscn_id_cnt)
++ }
++
++ /* This is a target port, unregistered port, or the GFF_ID failed */
++ ndlp = lpfc_setup_disc_node(vport, did);
++ if (ndlp) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0242 Process x%x GFF "
++ "NameServer Rsp Data: x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
++ did, ndlp->nlp_flag, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
++ } else {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0243 Skip x%x GFF "
++ "NameServer Rsp Data: x%x x%x\n",
++ phba->brd_no, vport->vpi, did,
++ vport->fc_flag, vport->fc_rscn_id_cnt);
++ }
++out:
++ /* Link up / RSCN discovery */
++ if (vport->num_disc_nodes)
++ vport->num_disc_nodes--;
++ if (vport->num_disc_nodes == 0) {
++ /*
++ * The driver has cycled through all Nports in the RSCN payload.
++ * Complete the handling by cleaning up and marking the
++ * current driver state.
++ */
++ if (vport->port_state >= LPFC_DISC_AUTH) {
++ if (vport->fc_flag & FC_RSCN_MODE) {
++ lpfc_els_flush_rscn(vport);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
++ spin_unlock_irq(shost->host_lock);
++ }
++ else
++ lpfc_els_flush_rscn(vport);
++ }
++ lpfc_disc_start(vport);
++ }
++ lpfc_ct_free_iocb(phba, cmdiocb);
+ return;
+ }
+
++
+ static void
+-lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- struct lpfc_sli *psli;
+- struct lpfc_dmabuf *bmp;
++ struct lpfc_vport *vport = cmdiocb->vport;
+ struct lpfc_dmabuf *inp;
+ struct lpfc_dmabuf *outp;
+ IOCB_t *irsp;
+ struct lpfc_sli_ct_request *CTrsp;
++ int cmdcode, rc;
++ uint8_t retry;
++ uint32_t latt;
+
+- psli = &phba->sli;
+ /* we pass cmdiocb to state machine which needs rspiocb as well */
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+
+ inp = (struct lpfc_dmabuf *) cmdiocb->context1;
+ outp = (struct lpfc_dmabuf *) cmdiocb->context2;
+- bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
+ irsp = &rspiocb->iocb;
+
++ cmdcode = be16_to_cpu(((struct lpfc_sli_ct_request *) inp->virt)->
++ CommandResponse.bits.CmdRsp);
+ CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
+
++ latt = lpfc_els_chk_latt(vport);
++
+ /* RFT request completes status <ulpStatus> CmdRsp <CmdRsp> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0209 RFT request completes ulpStatus x%x "
+- "CmdRsp x%x\n", phba->brd_no, irsp->ulpStatus,
+- CTrsp->CommandResponse.bits.CmdRsp);
++ "%d (%d):0209 RFT request completes, latt %d, "
++ "ulpStatus x%x CmdRsp x%x, Context x%x, Tag x%x\n",
++ phba->brd_no, vport->vpi, latt, irsp->ulpStatus,
++ CTrsp->CommandResponse.bits.CmdRsp,
++ cmdiocb->iocb.ulpContext, cmdiocb->iocb.ulpIoTag);
+
+- lpfc_free_ct_rsp(phba, outp);
+- lpfc_mbuf_free(phba, inp->virt, inp->phys);
+- lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+- kfree(inp);
+- kfree(bmp);
+- spin_lock_irq(phba->host->host_lock);
+- lpfc_sli_release_iocbq(phba, cmdiocb);
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "CT cmd cmpl: status:x%x/x%x cmd:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4], cmdcode);
++
++ if (irsp->ulpStatus) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0268 NS cmd %x Error (%d %d)\n",
++ phba->brd_no, vport->vpi, cmdcode,
++ irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) ||
++ (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED)))
++ goto out;
++
++ retry = cmdiocb->retry;
++ if (retry >= LPFC_MAX_NS_RETRY)
++ goto out;
++
++ retry++;
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0216 Retrying NS cmd %x\n",
++ phba->brd_no, vport->vpi, cmdcode);
++ rc = lpfc_ns_cmd(vport, cmdcode, retry, 0);
++ if (rc == 0)
++ goto out;
++ }
++
++out:
++ lpfc_ct_free_iocb(phba, cmdiocb);
+ return;
+ }
+
+ static void
+-lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ return;
+ }
+
+ static void
+-lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rspn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ return;
+ }
+
+ static void
+-lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ return;
+ }
+
+-void
+-lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp)
++static void
++lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- char fwrev[16];
++ IOCB_t *irsp = &rspiocb->iocb;
++ struct lpfc_vport *vport = cmdiocb->vport;
+
+- lpfc_decode_firmware_rev(phba, fwrev, 0);
++ if (irsp->ulpStatus != IOSTAT_SUCCESS)
++ vport->fc_flag |= FC_RFF_NOT_SUPPORTED;
+
+- sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName,
+- fwrev, lpfc_release_version);
++ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ return;
+ }
+
++int
++lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol,
++ size_t size)
++{
++ int n;
++ uint8_t *wwn = vport->phba->wwpn;
++
++ n = snprintf(symbol, size,
++ "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
++ wwn[0], wwn[1], wwn[2], wwn[3],
++ wwn[4], wwn[5], wwn[6], wwn[7]);
++
++ if (vport->port_type == LPFC_PHYSICAL_PORT)
++ return n;
++
++ if (n < size)
++ n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi);
++
++ if (n < size && vport->vname)
++ n += snprintf(symbol + n, size - n, " VName-%s", vport->vname);
++ return n;
++}
++
++int
++lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
++ size_t size)
++{
++ char fwrev[16];
++ int n;
++
++ lpfc_decode_firmware_rev(vport->phba, fwrev, 0);
++
++ n = snprintf(symbol, size, "Emulex %s FV%s DV%s",
++ vport->phba->ModelName, fwrev, lpfc_release_version);
++ return n;
++}
++
+ /*
+ * lpfc_ns_cmd
+ * Description:
+@@ -585,55 +907,76 @@
+ * LI_CTNS_RFT_ID
+ */
+ int
+-lpfc_ns_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
++ uint8_t retry, uint32_t context)
+ {
++ struct lpfc_nodelist * ndlp;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *mp, *bmp;
+ struct lpfc_sli_ct_request *CtReq;
+ struct ulp_bde64 *bpl;
+ void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ struct lpfc_iocbq *) = NULL;
+ uint32_t rsp_size = 1024;
++ size_t size;
++ int rc = 0;
++
++ ndlp = lpfc_findnode_did(vport, NameServer_DID);
++ if (ndlp == NULL || ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) {
++ rc=1;
++ goto ns_cmd_exit;
++ }
+
+ /* fill in BDEs for command */
+ /* Allocate buffer for command payload */
+ mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+- if (!mp)
++ if (!mp) {
++ rc=2;
+ goto ns_cmd_exit;
++ }
+
+ INIT_LIST_HEAD(&mp->list);
+ mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
+- if (!mp->virt)
++ if (!mp->virt) {
++ rc=3;
+ goto ns_cmd_free_mp;
++ }
+
+ /* Allocate buffer for Buffer ptr list */
+ bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+- if (!bmp)
++ if (!bmp) {
++ rc=4;
+ goto ns_cmd_free_mpvirt;
++ }
+
+ INIT_LIST_HEAD(&bmp->list);
+ bmp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(bmp->phys));
+- if (!bmp->virt)
++ if (!bmp->virt) {
++ rc=5;
+ goto ns_cmd_free_bmp;
++ }
+
+ /* NameServer Req */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0236 NameServer Req Data: x%x x%x x%x\n",
+- phba->brd_no, cmdcode, phba->fc_flag,
+- phba->fc_rscn_id_cnt);
++ lpfc_printf_log(phba, KERN_INFO ,LOG_DISCOVERY,
++ "%d (%d):0236 NameServer Req Data: x%x x%x x%x\n",
++ phba->brd_no, vport->vpi, cmdcode, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
+
+ bpl = (struct ulp_bde64 *) bmp->virt;
+ memset(bpl, 0, sizeof(struct ulp_bde64));
+- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
+- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
+ bpl->tus.f.bdeFlags = 0;
+ if (cmdcode == SLI_CTNS_GID_FT)
+ bpl->tus.f.bdeSize = GID_REQUEST_SZ;
++ else if (cmdcode == SLI_CTNS_GFF_ID)
++ bpl->tus.f.bdeSize = GFF_REQUEST_SZ;
+ else if (cmdcode == SLI_CTNS_RFT_ID)
+ bpl->tus.f.bdeSize = RFT_REQUEST_SZ;
+ else if (cmdcode == SLI_CTNS_RNN_ID)
+ bpl->tus.f.bdeSize = RNN_REQUEST_SZ;
++ else if (cmdcode == SLI_CTNS_RSPN_ID)
++ bpl->tus.f.bdeSize = RSPN_REQUEST_SZ;
+ else if (cmdcode == SLI_CTNS_RSNN_NN)
+ bpl->tus.f.bdeSize = RSNN_REQUEST_SZ;
+ else if (cmdcode == SLI_CTNS_RFF_ID)
+@@ -654,56 +997,78 @@
+ CtReq->CommandResponse.bits.CmdRsp =
+ be16_to_cpu(SLI_CTNS_GID_FT);
+ CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
+- if (phba->hba_state < LPFC_HBA_READY)
+- phba->hba_state = LPFC_NS_QRY;
+- lpfc_set_disctmo(phba);
++ if (vport->port_state < LPFC_NS_QRY)
++ vport->port_state = LPFC_NS_QRY;
++ lpfc_set_disctmo(vport);
+ cmpl = lpfc_cmpl_ct_cmd_gid_ft;
+ rsp_size = FC_MAX_NS_RSP;
+ break;
+
++ case SLI_CTNS_GFF_ID:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_GFF_ID);
++ CtReq->un.gff.PortId = be32_to_cpu(context);
++ cmpl = lpfc_cmpl_ct_cmd_gff_id;
++ break;
++
+ case SLI_CTNS_RFT_ID:
+ CtReq->CommandResponse.bits.CmdRsp =
+ be16_to_cpu(SLI_CTNS_RFT_ID);
+- CtReq->un.rft.PortId = be32_to_cpu(phba->fc_myDID);
++ CtReq->un.rft.PortId = be32_to_cpu(vport->fc_myDID);
+ CtReq->un.rft.fcpReg = 1;
+ cmpl = lpfc_cmpl_ct_cmd_rft_id;
+ break;
+
+- case SLI_CTNS_RFF_ID:
+- CtReq->CommandResponse.bits.CmdRsp =
+- be16_to_cpu(SLI_CTNS_RFF_ID);
+- CtReq->un.rff.PortId = be32_to_cpu(phba->fc_myDID);
+- CtReq->un.rff.feature_res = 0;
+- CtReq->un.rff.feature_tgt = 0;
+- CtReq->un.rff.type_code = FC_FCP_DATA;
+- CtReq->un.rff.feature_init = 1;
+- cmpl = lpfc_cmpl_ct_cmd_rff_id;
+- break;
+-
+ case SLI_CTNS_RNN_ID:
+ CtReq->CommandResponse.bits.CmdRsp =
+ be16_to_cpu(SLI_CTNS_RNN_ID);
+- CtReq->un.rnn.PortId = be32_to_cpu(phba->fc_myDID);
+- memcpy(CtReq->un.rnn.wwnn, &phba->fc_nodename,
++ CtReq->un.rnn.PortId = be32_to_cpu(vport->fc_myDID);
++ memcpy(CtReq->un.rnn.wwnn, &vport->fc_nodename,
+ sizeof (struct lpfc_name));
+ cmpl = lpfc_cmpl_ct_cmd_rnn_id;
+ break;
+
++ case SLI_CTNS_RSPN_ID:
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_RSPN_ID);
++ CtReq->un.rspn.PortId = be32_to_cpu(vport->fc_myDID);
++ size = sizeof(CtReq->un.rspn.symbname);
++ CtReq->un.rspn.len =
++ lpfc_vport_symbolic_port_name(vport,
++ CtReq->un.rspn.symbname, size);
++ cmpl = lpfc_cmpl_ct_cmd_rspn_id;
++ break;
+ case SLI_CTNS_RSNN_NN:
+ CtReq->CommandResponse.bits.CmdRsp =
+ be16_to_cpu(SLI_CTNS_RSNN_NN);
+- memcpy(CtReq->un.rsnn.wwnn, &phba->fc_nodename,
++ memcpy(CtReq->un.rsnn.wwnn, &vport->fc_nodename,
+ sizeof (struct lpfc_name));
+- lpfc_get_hba_sym_node_name(phba, CtReq->un.rsnn.symbname);
+- CtReq->un.rsnn.len = strlen(CtReq->un.rsnn.symbname);
++ size = sizeof(CtReq->un.rsnn.symbname);
++ CtReq->un.rsnn.len =
++ lpfc_vport_symbolic_node_name(vport,
++ CtReq->un.rsnn.symbname, size);
+ cmpl = lpfc_cmpl_ct_cmd_rsnn_nn;
+ break;
++ case SLI_CTNS_RFF_ID:
++ vport->fc_flag &= ~FC_RFF_NOT_SUPPORTED;
++ CtReq->CommandResponse.bits.CmdRsp =
++ be16_to_cpu(SLI_CTNS_RFF_ID);
++ CtReq->un.rff.PortId = be32_to_cpu(vport->fc_myDID);;
++ CtReq->un.rff.fbits = FC4_FEATURE_INIT;
++ CtReq->un.rff.type_code = FC_FCP_DATA;
++ cmpl = lpfc_cmpl_ct_cmd_rff_id;
++ break;
+ }
+
+- if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, rsp_size))
++ if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, rsp_size, retry)) {
+ /* On success, The cmpl function will free the buffers */
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "Issue CT cmd: cmd:x%x did:x%x",
++ cmdcode, ndlp->nlp_DID, 0);
+ return 0;
++ }
+
++ rc=6;
+ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+ ns_cmd_free_bmp:
+ kfree(bmp);
+@@ -712,14 +1077,17 @@
+ ns_cmd_free_mp:
+ kfree(mp);
+ ns_cmd_exit:
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0266 Issue NameServer Req x%x err %d Data: x%x x%x\n",
++ phba->brd_no, vport->vpi, cmdcode, rc, vport->fc_flag,
++ vport->fc_rscn_id_cnt);
+ return 1;
+ }
+
+ static void
+-lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq * rspiocb)
+ {
+- struct lpfc_dmabuf *bmp = cmdiocb->context3;
+ struct lpfc_dmabuf *inp = cmdiocb->context1;
+ struct lpfc_dmabuf *outp = cmdiocb->context2;
+ struct lpfc_sli_ct_request *CTrsp = outp->virt;
+@@ -727,48 +1095,60 @@
+ struct lpfc_nodelist *ndlp;
+ uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp;
+ uint16_t fdmi_rsp = CTrsp->CommandResponse.bits.CmdRsp;
++ struct lpfc_vport *vport = cmdiocb->vport;
++ IOCB_t *irsp = &rspiocb->iocb;
++ uint32_t latt;
++
++ latt = lpfc_els_chk_latt(vport);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++ "FDMI cmpl: status:x%x/x%x latt:%d",
++ irsp->ulpStatus, irsp->un.ulpWord[4], latt);
+
+- ndlp = lpfc_findnode_did(phba, FDMI_DID);
++ if (latt || irsp->ulpStatus) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0229 FDMI cmd %04x failed, latt = %d "
++ "ulpStatus: x%x, rid x%x\n",
++ phba->brd_no, vport->vpi,
++ be16_to_cpu(fdmi_cmd), latt, irsp->ulpStatus,
++ irsp->un.ulpWord[4]);
++ lpfc_ct_free_iocb(phba, cmdiocb);
++ return;
++ }
++
++ ndlp = lpfc_findnode_did(vport, FDMI_DID);
+ if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+ /* FDMI rsp failed */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0220 FDMI rsp failed Data: x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0220 FDMI rsp failed Data: x%x\n",
++ phba->brd_no, vport->vpi,
+ be16_to_cpu(fdmi_cmd));
+ }
+
+ switch (be16_to_cpu(fdmi_cmd)) {
+ case SLI_MGMT_RHBA:
+- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RPA);
++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RPA);
+ break;
+
+ case SLI_MGMT_RPA:
+ break;
+
+ case SLI_MGMT_DHBA:
+- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DPRT);
++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DPRT);
+ break;
+
+ case SLI_MGMT_DPRT:
+- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RHBA);
++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RHBA);
+ break;
+ }
+-
+- lpfc_free_ct_rsp(phba, outp);
+- lpfc_mbuf_free(phba, inp->virt, inp->phys);
+- lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+- kfree(inp);
+- kfree(bmp);
+- spin_lock_irq(phba->host->host_lock);
+- lpfc_sli_release_iocbq(phba, cmdiocb);
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_ct_free_iocb(phba, cmdiocb);
+ return;
+ }
++
+ int
+-lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *mp, *bmp;
+ struct lpfc_sli_ct_request *CtReq;
+ struct ulp_bde64 *bpl;
+@@ -805,12 +1185,10 @@
+ INIT_LIST_HEAD(&bmp->list);
+
+ /* FDMI request */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0218 FDMI Request Data: x%x x%x x%x\n",
+- phba->brd_no,
+- phba->fc_flag, phba->hba_state, cmdcode);
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0218 FDMI Request Data: x%x x%x x%x\n",
++ phba->brd_no, vport->vpi, vport->fc_flag,
++ vport->port_state, cmdcode);
+
+ CtReq = (struct lpfc_sli_ct_request *) mp->virt;
+
+@@ -833,11 +1211,11 @@
+ be16_to_cpu(SLI_MGMT_RHBA);
+ CtReq->CommandResponse.bits.Size = 0;
+ rh = (REG_HBA *) & CtReq->un.PortID;
+- memcpy(&rh->hi.PortName, &phba->fc_sparam.portName,
++ memcpy(&rh->hi.PortName, &vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+ /* One entry (port) per adapter */
+ rh->rpl.EntryCnt = be32_to_cpu(1);
+- memcpy(&rh->rpl.pe, &phba->fc_sparam.portName,
++ memcpy(&rh->rpl.pe, &vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+
+ /* point to the HBA attribute block */
+@@ -853,7 +1231,7 @@
+ ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME);
+ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES
+ + sizeof (struct lpfc_name));
+- memcpy(&ae->un.NodeName, &phba->fc_sparam.nodeName,
++ memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName,
+ sizeof (struct lpfc_name));
+ ab->EntryCnt++;
+ size += FOURBYTES + sizeof (struct lpfc_name);
+@@ -991,7 +1369,7 @@
+ pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID;
+ size = sizeof (struct lpfc_name) + FOURBYTES;
+ memcpy((uint8_t *) & pab->PortName,
+- (uint8_t *) & phba->fc_sparam.portName,
++ (uint8_t *) & vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+ pab->ab.EntryCnt = 0;
+
+@@ -1053,7 +1431,7 @@
+ ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
+ ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE);
+ ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
+- hsp = (struct serv_parm *) & phba->fc_sparam;
++ hsp = (struct serv_parm *) & vport->fc_sparam;
+ ae->un.MaxFrameSize =
+ (((uint32_t) hsp->cmn.
+ bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn.
+@@ -1097,7 +1475,7 @@
+ CtReq->CommandResponse.bits.Size = 0;
+ pe = (PORT_ENTRY *) & CtReq->un.PortID;
+ memcpy((uint8_t *) & pe->PortName,
+- (uint8_t *) & phba->fc_sparam.portName,
++ (uint8_t *) & vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+ size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+ break;
+@@ -1107,22 +1485,22 @@
+ CtReq->CommandResponse.bits.Size = 0;
+ pe = (PORT_ENTRY *) & CtReq->un.PortID;
+ memcpy((uint8_t *) & pe->PortName,
+- (uint8_t *) & phba->fc_sparam.portName,
++ (uint8_t *) & vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+ size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+ break;
+ }
+
+ bpl = (struct ulp_bde64 *) bmp->virt;
+- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
+- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
+ bpl->tus.f.bdeFlags = 0;
+ bpl->tus.f.bdeSize = size;
+ bpl->tus.w = le32_to_cpu(bpl->tus.w);
+
+ cmpl = lpfc_cmpl_ct_cmd_fdmi;
+
+- if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP))
++ if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP, 0))
+ return 0;
+
+ lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+@@ -1134,49 +1512,50 @@
+ kfree(mp);
+ fdmi_cmd_exit:
+ /* Issue FDMI request failed */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0244 Issue FDMI request failed Data: x%x\n",
+- phba->brd_no,
+- cmdcode);
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0244 Issue FDMI request failed Data: x%x\n",
++ phba->brd_no, vport->vpi, cmdcode);
+ return 1;
+ }
+
+ void
+ lpfc_fdmi_tmo(unsigned long ptr)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ struct lpfc_vport *vport = (struct lpfc_vport *)ptr;
++ struct lpfc_hba *phba = vport->phba;
+ unsigned long iflag;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- if (!(phba->work_hba_events & WORKER_FDMI_TMO)) {
+- phba->work_hba_events |= WORKER_FDMI_TMO;
++ spin_lock_irqsave(&vport->work_port_lock, iflag);
++ if (!(vport->work_port_events & WORKER_FDMI_TMO)) {
++ vport->work_port_events |= WORKER_FDMI_TMO;
++ spin_unlock_irqrestore(&vport->work_port_lock, iflag);
++
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+- spin_unlock_irqrestore(phba->host->host_lock,iflag);
++ else
++ spin_unlock_irqrestore(&vport->work_port_lock, iflag);
+ }
+
+ void
+-lpfc_fdmi_tmo_handler(struct lpfc_hba *phba)
++lpfc_fdmi_timeout_handler(struct lpfc_vport *vport)
+ {
+ struct lpfc_nodelist *ndlp;
+
+- ndlp = lpfc_findnode_did(phba, FDMI_DID);
++ ndlp = lpfc_findnode_did(vport, FDMI_DID);
+ if (ndlp) {
+- if (init_utsname()->nodename[0] != '\0') {
+- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
+- } else {
+- mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
+- }
++ if (init_utsname()->nodename[0] != '\0')
++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
++ else
++ mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60);
+ }
+ return;
+ }
+
+-
+ void
+-lpfc_decode_firmware_rev(struct lpfc_hba * phba, char *fwrevision, int flag)
++lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag)
+ {
+ struct lpfc_sli *psli = &phba->sli;
+ lpfc_vpd_t *vp = &phba->vpd;
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,508 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2007 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/idr.h>
++#include <linux/interrupt.h>
++#include <linux/kthread.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/ctype.h>
++#include <linux/version.h>
++
++#include <scsi/scsi.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_fc.h>
++
++#include "lpfc_hw.h"
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_version.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++/* debugfs interface
++ *
++ * To access this interface the user should:
++ * # mkdir /debug
++ * # mount -t debugfs none /debug
++ *
++ * The lpfc debugfs directory hierachy is:
++ * lpfc/lpfcX/vportY
++ * where X is the lpfc hba unique_id
++ * where Y is the vport VPI on that hba
++ *
++ * Debugging services available per vport:
++ * discovery_trace
++ * This is an ACSII readable file that contains a trace of the last
++ * lpfc_debugfs_max_disc_trc events that happened on a specific vport.
++ * See lpfc_debugfs.h for different categories of
++ * discovery events. To enable the discovery trace, the following
++ * module parameters must be set:
++ * lpfc_debugfs_enable=1 Turns on lpfc debugfs filesystem support
++ * lpfc_debugfs_max_disc_trc=X Where X is the event trace depth for
++ * EACH vport. X MUST also be a power of 2.
++ * lpfc_debugfs_mask_disc_trc=Y Where Y is an event mask as defined in
++ * lpfc_debugfs.h .
++ */
++static int lpfc_debugfs_enable = 0;
++module_param(lpfc_debugfs_enable, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_enable, "Enable debugfs services");
++
++static int lpfc_debugfs_max_disc_trc = 0; /* This MUST be a power of 2 */
++module_param(lpfc_debugfs_max_disc_trc, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_max_disc_trc,
++ "Set debugfs discovery trace depth");
++
++static int lpfc_debugfs_mask_disc_trc = 0;
++module_param(lpfc_debugfs_mask_disc_trc, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc,
++ "Set debugfs discovery trace mask");
++
++#include <linux/debugfs.h>
++
++/* size of discovery_trace output line */
++#define LPFC_DISC_TRC_ENTRY_SIZE 80
++
++/* nodelist output buffer size */
++#define LPFC_NODELIST_SIZE 8192
++#define LPFC_NODELIST_ENTRY_SIZE 120
++
++struct lpfc_debug {
++ char *buffer;
++ int len;
++};
++
++atomic_t lpfc_debugfs_disc_trc_cnt = ATOMIC_INIT(0);
++unsigned long lpfc_debugfs_start_time = 0L;
++
++static int
++lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size)
++{
++ int i, index, len, enable;
++ uint32_t ms;
++ struct lpfc_disc_trc *dtp;
++ char buffer[80];
++
++
++ enable = lpfc_debugfs_enable;
++ lpfc_debugfs_enable = 0;
++
++ len = 0;
++ index = (atomic_read(&vport->disc_trc_cnt) + 1) &
++ (lpfc_debugfs_max_disc_trc - 1);
++ for (i = index; i < lpfc_debugfs_max_disc_trc; i++) {
++ dtp = vport->disc_trc + i;
++ if (!dtp->fmt)
++ continue;
++ ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time);
++ snprintf(buffer, 80, "%010d:%010d ms:%s\n",
++ dtp->seq_cnt, ms, dtp->fmt);
++ len += snprintf(buf+len, size-len, buffer,
++ dtp->data1, dtp->data2, dtp->data3);
++ }
++ for (i = 0; i < index; i++) {
++ dtp = vport->disc_trc + i;
++ if (!dtp->fmt)
++ continue;
++ ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time);
++ snprintf(buffer, 80, "%010d:%010d ms:%s\n",
++ dtp->seq_cnt, ms, dtp->fmt);
++ len += snprintf(buf+len, size-len, buffer,
++ dtp->data1, dtp->data2, dtp->data3);
++ }
++
++ lpfc_debugfs_enable = enable;
++ return len;
++}
++
++static int
++lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
++{
++ int len = 0;
++ int cnt;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_nodelist *ndlp;
++ unsigned char *statep, *name;
++
++ cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
++
++ spin_lock_irq(shost->host_lock);
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++ if (!cnt) {
++ len += snprintf(buf+len, size-len,
++ "Missing Nodelist Entries\n");
++ break;
++ }
++ cnt--;
++ switch (ndlp->nlp_state) {
++ case NLP_STE_UNUSED_NODE:
++ statep = "UNUSED";
++ break;
++ case NLP_STE_PLOGI_ISSUE:
++ statep = "PLOGI ";
++ break;
++ case NLP_STE_ADISC_ISSUE:
++ statep = "ADISC ";
++ break;
++ case NLP_STE_REG_LOGIN_ISSUE:
++ statep = "REGLOG";
++ break;
++ case NLP_STE_PRLI_ISSUE:
++ statep = "PRLI ";
++ break;
++ case NLP_STE_UNMAPPED_NODE:
++ statep = "UNMAP ";
++ break;
++ case NLP_STE_MAPPED_NODE:
++ statep = "MAPPED";
++ break;
++ case NLP_STE_NPR_NODE:
++ statep = "NPR ";
++ break;
++ default:
++ statep = "UNKNOWN";
++ }
++ len += snprintf(buf+len, size-len, "%s DID:x%06x ",
++ statep, ndlp->nlp_DID);
++ name = (unsigned char *)&ndlp->nlp_portname;
++ len += snprintf(buf+len, size-len,
++ "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
++ *name, *(name+1), *(name+2), *(name+3),
++ *(name+4), *(name+5), *(name+6), *(name+7));
++ name = (unsigned char *)&ndlp->nlp_nodename;
++ len += snprintf(buf+len, size-len,
++ "WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
++ *name, *(name+1), *(name+2), *(name+3),
++ *(name+4), *(name+5), *(name+6), *(name+7));
++ len += snprintf(buf+len, size-len, "RPI:%03d flag:x%08x ",
++ ndlp->nlp_rpi, ndlp->nlp_flag);
++ if (!ndlp->nlp_type)
++ len += snprintf(buf+len, size-len, "UNKNOWN_TYPE");
++ if (ndlp->nlp_type & NLP_FC_NODE)
++ len += snprintf(buf+len, size-len, "FC_NODE ");
++ if (ndlp->nlp_type & NLP_FABRIC)
++ len += snprintf(buf+len, size-len, "FABRIC ");
++ if (ndlp->nlp_type & NLP_FCP_TARGET)
++ len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
++ ndlp->nlp_sid);
++ if (ndlp->nlp_type & NLP_FCP_INITIATOR)
++ len += snprintf(buf+len, size-len, "FCP_INITIATOR");
++ len += snprintf(buf+len, size-len, "\n");
++ }
++ spin_unlock_irq(shost->host_lock);
++ return len;
++}
++#endif
++
++
++inline void
++lpfc_debugfs_disc_trc(struct lpfc_vport *vport, int mask, char *fmt,
++ uint32_t data1, uint32_t data2, uint32_t data3)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++ struct lpfc_disc_trc *dtp;
++ int index;
++
++ if (!(lpfc_debugfs_mask_disc_trc & mask))
++ return;
++
++ if (!lpfc_debugfs_enable || !lpfc_debugfs_max_disc_trc ||
++ !vport || !vport->disc_trc)
++ return;
++
++ index = atomic_inc_return(&vport->disc_trc_cnt) &
++ (lpfc_debugfs_max_disc_trc - 1);
++ dtp = vport->disc_trc + index;
++ dtp->fmt = fmt;
++ dtp->data1 = data1;
++ dtp->data2 = data2;
++ dtp->data3 = data3;
++ dtp->seq_cnt = atomic_inc_return(&lpfc_debugfs_disc_trc_cnt);
++ dtp->jif = jiffies;
++#endif
++ return;
++}
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++static int
++lpfc_debugfs_disc_trc_open(struct inode *inode, struct file *file)
++{
++ struct lpfc_vport *vport = inode->i_private;
++ struct lpfc_debug *debug;
++ int size;
++ int rc = -ENOMEM;
++
++ if (!lpfc_debugfs_max_disc_trc) {
++ rc = -ENOSPC;
++ goto out;
++ }
++
++ debug = kmalloc(sizeof(*debug), GFP_KERNEL);
++ if (!debug)
++ goto out;
++
++ /* Round to page boundry */
++ size = (lpfc_debugfs_max_disc_trc * LPFC_DISC_TRC_ENTRY_SIZE);
++ size = PAGE_ALIGN(size);
++
++ debug->buffer = kmalloc(size, GFP_KERNEL);
++ if (!debug->buffer) {
++ kfree(debug);
++ goto out;
++ }
++
++ debug->len = lpfc_debugfs_disc_trc_data(vport, debug->buffer, size);
++ file->private_data = debug;
++
++ rc = 0;
++out:
++ return rc;
++}
++
++static int
++lpfc_debugfs_nodelist_open(struct inode *inode, struct file *file)
++{
++ struct lpfc_vport *vport = inode->i_private;
++ struct lpfc_debug *debug;
++ int rc = -ENOMEM;
++
++ debug = kmalloc(sizeof(*debug), GFP_KERNEL);
++ if (!debug)
++ goto out;
++
++ /* Round to page boundry */
++ debug->buffer = kmalloc(LPFC_NODELIST_SIZE, GFP_KERNEL);
++ if (!debug->buffer) {
++ kfree(debug);
++ goto out;
++ }
++
++ debug->len = lpfc_debugfs_nodelist_data(vport, debug->buffer,
++ LPFC_NODELIST_SIZE);
++ file->private_data = debug;
++
++ rc = 0;
++out:
++ return rc;
++}
++
++static loff_t
++lpfc_debugfs_lseek(struct file *file, loff_t off, int whence)
++{
++ struct lpfc_debug *debug;
++ loff_t pos = -1;
++
++ debug = file->private_data;
++
++ switch (whence) {
++ case 0:
++ pos = off;
++ break;
++ case 1:
++ pos = file->f_pos + off;
++ break;
++ case 2:
++ pos = debug->len - off;
++ }
++ return (pos < 0 || pos > debug->len) ? -EINVAL : (file->f_pos = pos);
++}
++
++static ssize_t
++lpfc_debugfs_read(struct file *file, char __user *buf,
++ size_t nbytes, loff_t *ppos)
++{
++ struct lpfc_debug *debug = file->private_data;
++ return simple_read_from_buffer(buf, nbytes, ppos, debug->buffer,
++ debug->len);
++}
++
++static int
++lpfc_debugfs_release(struct inode *inode, struct file *file)
++{
++ struct lpfc_debug *debug = file->private_data;
++
++ kfree(debug->buffer);
++ kfree(debug);
++
++ return 0;
++}
++
++#undef lpfc_debugfs_op_disc_trc
++static struct file_operations lpfc_debugfs_op_disc_trc = {
++ .owner = THIS_MODULE,
++ .open = lpfc_debugfs_disc_trc_open,
++ .llseek = lpfc_debugfs_lseek,
++ .read = lpfc_debugfs_read,
++ .release = lpfc_debugfs_release,
++};
++
++#undef lpfc_debugfs_op_nodelist
++static struct file_operations lpfc_debugfs_op_nodelist = {
++ .owner = THIS_MODULE,
++ .open = lpfc_debugfs_nodelist_open,
++ .llseek = lpfc_debugfs_lseek,
++ .read = lpfc_debugfs_read,
++ .release = lpfc_debugfs_release,
++};
++
++static struct dentry *lpfc_debugfs_root = NULL;
++static atomic_t lpfc_debugfs_hba_count;
++#endif
++
++inline void
++lpfc_debugfs_initialize(struct lpfc_vport *vport)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++ struct lpfc_hba *phba = vport->phba;
++ char name[64];
++ uint32_t num, i;
++
++ if (!lpfc_debugfs_enable)
++ return;
++
++ if (lpfc_debugfs_max_disc_trc) {
++ num = lpfc_debugfs_max_disc_trc - 1;
++ if (num & lpfc_debugfs_max_disc_trc) {
++ /* Change to be a power of 2 */
++ num = lpfc_debugfs_max_disc_trc;
++ i = 0;
++ while (num > 1) {
++ num = num >> 1;
++ i++;
++ }
++ lpfc_debugfs_max_disc_trc = (1 << i);
++ printk(KERN_ERR
++ "lpfc_debugfs_max_disc_trc changed to %d\n",
++ lpfc_debugfs_max_disc_trc);
++ }
++ }
++
++ if (!lpfc_debugfs_root) {
++ lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL);
++ atomic_set(&lpfc_debugfs_hba_count, 0);
++ if (!lpfc_debugfs_root)
++ goto debug_failed;
++ }
++
++ snprintf(name, sizeof(name), "lpfc%d", phba->brd_no);
++ if (!phba->hba_debugfs_root) {
++ phba->hba_debugfs_root =
++ debugfs_create_dir(name, lpfc_debugfs_root);
++ if (!phba->hba_debugfs_root)
++ goto debug_failed;
++ atomic_inc(&lpfc_debugfs_hba_count);
++ atomic_set(&phba->debugfs_vport_count, 0);
++ }
++
++ snprintf(name, sizeof(name), "vport%d", vport->vpi);
++ if (!vport->vport_debugfs_root) {
++ vport->vport_debugfs_root =
++ debugfs_create_dir(name, phba->hba_debugfs_root);
++ if (!vport->vport_debugfs_root)
++ goto debug_failed;
++ atomic_inc(&phba->debugfs_vport_count);
++ }
++
++ if (!lpfc_debugfs_start_time)
++ lpfc_debugfs_start_time = jiffies;
++
++ vport->disc_trc = kmalloc(
++ (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc),
++ GFP_KERNEL);
++
++ if (!vport->disc_trc)
++ goto debug_failed;
++ memset(vport->disc_trc, 0,
++ (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc));
++
++ snprintf(name, sizeof(name), "discovery_trace");
++ vport->debug_disc_trc =
++ debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
++ vport->vport_debugfs_root,
++ vport, &lpfc_debugfs_op_disc_trc);
++ if (!vport->debug_disc_trc) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0409 Cannot create debugfs",
++ phba->brd_no);
++ goto debug_failed;
++ }
++ snprintf(name, sizeof(name), "nodelist");
++ vport->debug_nodelist =
++ debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
++ vport->vport_debugfs_root,
++ vport, &lpfc_debugfs_op_nodelist);
++ if (!vport->debug_nodelist) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0409 Cannot create debugfs",
++ phba->brd_no);
++ goto debug_failed;
++ }
++debug_failed:
++ return;
++#endif
++}
++
++
++inline void
++lpfc_debugfs_terminate(struct lpfc_vport *vport)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++ struct lpfc_hba *phba = vport->phba;
++
++ if (vport->disc_trc) {
++ kfree(vport->disc_trc);
++ vport->disc_trc = NULL;
++ }
++ if (vport->debug_disc_trc) {
++ debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
++ vport->debug_disc_trc = NULL;
++ }
++ if (vport->debug_nodelist) {
++ debugfs_remove(vport->debug_nodelist); /* nodelist */
++ vport->debug_nodelist = NULL;
++ }
++ if (vport->vport_debugfs_root) {
++ debugfs_remove(vport->vport_debugfs_root); /* vportX */
++ vport->vport_debugfs_root = NULL;
++ atomic_dec(&phba->debugfs_vport_count);
++ }
++ if (atomic_read(&phba->debugfs_vport_count) == 0) {
++ debugfs_remove(vport->phba->hba_debugfs_root); /* lpfcX */
++ vport->phba->hba_debugfs_root = NULL;
++ atomic_dec(&lpfc_debugfs_hba_count);
++ if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
++ debugfs_remove(lpfc_debugfs_root); /* lpfc */
++ lpfc_debugfs_root = NULL;
++ }
++ }
++#endif
++}
++
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,50 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2007 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++#ifndef _H_LPFC_DEBUG_FS
++#define _H_LPFC_DEBUG_FS
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++struct lpfc_disc_trc {
++ char *fmt;
++ uint32_t data1;
++ uint32_t data2;
++ uint32_t data3;
++ uint32_t seq_cnt;
++ unsigned long jif;
++};
++#endif
++
++/* Mask for discovery_trace */
++#define LPFC_DISC_TRC_ELS_CMD 0x1 /* Trace ELS commands */
++#define LPFC_DISC_TRC_ELS_RSP 0x2 /* Trace ELS response */
++#define LPFC_DISC_TRC_ELS_UNSOL 0x4 /* Trace ELS rcv'ed */
++#define LPFC_DISC_TRC_ELS_ALL 0x7 /* Trace ELS */
++#define LPFC_DISC_TRC_MBOX_VPORT 0x8 /* Trace vport MBOXs */
++#define LPFC_DISC_TRC_MBOX 0x10 /* Trace other MBOXs */
++#define LPFC_DISC_TRC_MBOX_ALL 0x18 /* Trace all MBOXs */
++#define LPFC_DISC_TRC_CT 0x20 /* Trace disc CT requests */
++#define LPFC_DISC_TRC_DSM 0x40 /* Trace DSM events */
++#define LPFC_DISC_TRC_RPORT 0x80 /* Trace rport events */
++#define LPFC_DISC_TRC_NODE 0x100 /* Trace ndlp state changes */
++
++#define LPFC_DISC_TRC_DISCOVERY 0xef /* common mask for general
++ * discovery */
++#endif /* H_LPFC_DEBUG_FS */
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -36,21 +36,23 @@
+ LPFC_EVT_WARM_START,
+ LPFC_EVT_KILL,
+ LPFC_EVT_ELS_RETRY,
++ LPFC_EVT_DEV_LOSS_DELAY,
++ LPFC_EVT_DEV_LOSS,
+ };
+
+ /* structure used to queue event to the discovery tasklet */
+ struct lpfc_work_evt {
+ struct list_head evt_listp;
+- void * evt_arg1;
+- void * evt_arg2;
++ void *evt_arg1;
++ void *evt_arg2;
+ enum lpfc_work_type evt;
+ };
+
+
+ struct lpfc_nodelist {
+ struct list_head nlp_listp;
+- struct lpfc_name nlp_portname; /* port name */
+- struct lpfc_name nlp_nodename; /* node name */
++ struct lpfc_name nlp_portname;
++ struct lpfc_name nlp_nodename;
+ uint32_t nlp_flag; /* entry flags */
+ uint32_t nlp_DID; /* FC D_ID of entry */
+ uint32_t nlp_last_elscmd; /* Last ELS cmd sent */
+@@ -75,8 +77,9 @@
+ struct timer_list nlp_delayfunc; /* Used for delayed ELS cmds */
+ struct fc_rport *rport; /* Corresponding FC transport
+ port structure */
+- struct lpfc_hba *nlp_phba;
++ struct lpfc_vport *vport;
+ struct lpfc_work_evt els_retry_evt;
++ struct lpfc_work_evt dev_loss_evt;
+ unsigned long last_ramp_up_time; /* jiffy of last ramp up */
+ unsigned long last_q_full_time; /* jiffy of last queue full */
+ struct kref kref;
+@@ -98,7 +101,9 @@
+ ACC */
+ #define NLP_NPR_ADISC 0x2000000 /* Issue ADISC when dq'ed from
+ NPR list */
++#define NLP_RM_DFLT_RPI 0x4000000 /* need to remove leftover dflt RPI */
+ #define NLP_NODEV_REMOVE 0x8000000 /* Defer removal till discovery ends */
++#define NLP_TARGET_REMOVE 0x10000000 /* Target remove in process */
+
+ /* There are 4 different double linked lists nodelist entries can reside on.
+ * The Port Login (PLOGI) list and Address Discovery (ADISC) list are used
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c 2007-12-21 15:36:12.000000000 -0500
+@@ -35,38 +35,38 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+
+ static int lpfc_els_retry(struct lpfc_hba *, struct lpfc_iocbq *,
+ struct lpfc_iocbq *);
++static void lpfc_cmpl_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *);
++
+ static int lpfc_max_els_tries = 3;
+
+-static int
+-lpfc_els_chk_latt(struct lpfc_hba * phba)
++int
++lpfc_els_chk_latt(struct lpfc_vport *vport)
+ {
+- struct lpfc_sli *psli;
+- LPFC_MBOXQ_t *mbox;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t ha_copy;
+- int rc;
+-
+- psli = &phba->sli;
+
+- if ((phba->hba_state >= LPFC_HBA_READY) ||
+- (phba->hba_state == LPFC_LINK_DOWN))
++ if (vport->port_state >= LPFC_VPORT_READY ||
++ phba->link_state == LPFC_LINK_DOWN)
+ return 0;
+
+ /* Read the HBA Host Attention Register */
+- spin_lock_irq(phba->host->host_lock);
+ ha_copy = readl(phba->HAregaddr);
+- spin_unlock_irq(phba->host->host_lock);
+
+ if (!(ha_copy & HA_LATT))
+ return 0;
+
+ /* Pending Link Event during Discovery */
+- lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY,
+- "%d:0237 Pending Link Event during "
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0237 Pending Link Event during "
+ "Discovery: State x%x\n",
+- phba->brd_no, phba->hba_state);
++ phba->brd_no, vport->vpi, phba->pport->port_state);
+
+ /* CLEAR_LA should re-enable link attention events and
+ * we should then imediately take a LATT event. The
+@@ -74,48 +74,34 @@
+ * will cleanup any left over in-progress discovery
+ * events.
+ */
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_ABORT_DISCOVERY;
+- spin_unlock_irq(phba->host->host_lock);
+-
+- if (phba->hba_state != LPFC_CLEAR_LA) {
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+- phba->hba_state = LPFC_CLEAR_LA;
+- lpfc_clear_la(phba, mbox);
+- mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox (phba, mbox,
+- (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free(mbox, phba->mbox_mem_pool);
+- phba->hba_state = LPFC_HBA_ERROR;
+- }
+- }
+- }
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_ABORT_DISCOVERY;
++ spin_unlock_irq(shost->host_lock);
+
+- return 1;
++ if (phba->link_state != LPFC_CLEAR_LA)
++ lpfc_issue_clear_la(phba, vport);
+
++ return 1;
+ }
+
+ static struct lpfc_iocbq *
+-lpfc_prep_els_iocb(struct lpfc_hba * phba, uint8_t expectRsp,
+- uint16_t cmdSize, uint8_t retry, struct lpfc_nodelist * ndlp,
+- uint32_t did, uint32_t elscmd)
++lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp,
++ uint16_t cmdSize, uint8_t retry,
++ struct lpfc_nodelist *ndlp, uint32_t did,
++ uint32_t elscmd)
+ {
+- struct lpfc_sli_ring *pring;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *elsiocb;
+ struct lpfc_dmabuf *pcmd, *prsp, *pbuflist;
+ struct ulp_bde64 *bpl;
+ IOCB_t *icmd;
+
+- pring = &phba->sli.ring[LPFC_ELS_RING];
+
+- if (phba->hba_state < LPFC_LINK_UP)
++ if (!lpfc_is_link_up(phba))
+ return NULL;
+
+ /* Allocate buffer for command iocb */
+- spin_lock_irq(phba->host->host_lock);
+ elsiocb = lpfc_sli_get_iocbq(phba);
+- spin_unlock_irq(phba->host->host_lock);
+
+ if (elsiocb == NULL)
+ return NULL;
+@@ -123,14 +109,12 @@
+
+ /* fill in BDEs for command */
+ /* Allocate buffer for command payload */
+- if (((pcmd = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL)) == 0) ||
++ if (((pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL)) == 0) ||
+ ((pcmd->virt = lpfc_mbuf_alloc(phba,
+ MEM_PRI, &(pcmd->phys))) == 0)) {
+ kfree(pcmd);
+
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, elsiocb);
+- spin_unlock_irq(phba->host->host_lock);
+ return NULL;
+ }
+
+@@ -138,7 +122,7 @@
+
+ /* Allocate buffer for response payload */
+ if (expectRsp) {
+- prsp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
++ prsp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+ if (prsp)
+ prsp->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
+ &prsp->phys);
+@@ -146,9 +130,7 @@
+ kfree(prsp);
+ lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
+ kfree(pcmd);
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, elsiocb);
+- spin_unlock_irq(phba->host->host_lock);
+ return NULL;
+ }
+ INIT_LIST_HEAD(&prsp->list);
+@@ -157,14 +139,12 @@
+ }
+
+ /* Allocate buffer for Buffer ptr list */
+- pbuflist = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
++ pbuflist = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+ if (pbuflist)
+ pbuflist->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
+ &pbuflist->phys);
+ if (pbuflist == 0 || pbuflist->virt == 0) {
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, elsiocb);
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
+ lpfc_mbuf_free(phba, prsp->virt, prsp->phys);
+ kfree(pcmd);
+@@ -178,20 +158,28 @@
+ icmd->un.elsreq64.bdl.addrHigh = putPaddrHigh(pbuflist->phys);
+ icmd->un.elsreq64.bdl.addrLow = putPaddrLow(pbuflist->phys);
+ icmd->un.elsreq64.bdl.bdeFlags = BUFF_TYPE_BDL;
+- if (expectRsp) {
+- icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64));
+ icmd->un.elsreq64.remoteID = did; /* DID */
++ if (expectRsp) {
++ icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64));
+ icmd->ulpCommand = CMD_ELS_REQUEST64_CR;
+ icmd->ulpTimeout = phba->fc_ratov * 2;
+ } else {
+- icmd->un.elsreq64.bdl.bdeSize = sizeof (struct ulp_bde64);
++ icmd->un.elsreq64.bdl.bdeSize = sizeof(struct ulp_bde64);
+ icmd->ulpCommand = CMD_XMIT_ELS_RSP64_CX;
+ }
+-
+ icmd->ulpBdeCount = 1;
+ icmd->ulpLe = 1;
+ icmd->ulpClass = CLASS3;
+
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ icmd->un.elsreq64.myID = vport->fc_myDID;
++
++ /* For ELS_REQUEST64_CR, use the VPI by default */
++ icmd->ulpContext = vport->vpi;
++ icmd->ulpCt_h = 0;
++ icmd->ulpCt_l = 1;
++ }
++
+ bpl = (struct ulp_bde64 *) pbuflist->virt;
+ bpl->addrLow = le32_to_cpu(putPaddrLow(pcmd->phys));
+ bpl->addrHigh = le32_to_cpu(putPaddrHigh(pcmd->phys));
+@@ -209,10 +197,12 @@
+ }
+
+ /* Save for completion so we can release these resources */
++ if (elscmd != ELS_CMD_LS_RJT)
+ elsiocb->context1 = lpfc_nlp_get(ndlp);
+ elsiocb->context2 = pcmd;
+ elsiocb->context3 = pbuflist;
+ elsiocb->retry = retry;
++ elsiocb->vport = vport;
+ elsiocb->drvrTimeout = (phba->fc_ratov << 1) + LPFC_DRVR_TIMEOUT;
+
+ if (prsp) {
+@@ -222,16 +212,16 @@
+ if (expectRsp) {
+ /* Xmit ELS command <elsCmd> to remote NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0116 Xmit ELS command x%x to remote "
+- "NPORT x%x I/O tag: x%x, HBA state: x%x\n",
+- phba->brd_no, elscmd,
+- did, elsiocb->iotag, phba->hba_state);
++ "%d (%d):0116 Xmit ELS command x%x to remote "
++ "NPORT x%x I/O tag: x%x, port state: x%x\n",
++ phba->brd_no, vport->vpi, elscmd, did,
++ elsiocb->iotag, vport->port_state);
+ } else {
+ /* Xmit ELS response <elsCmd> to remote NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0117 Xmit ELS response x%x to remote "
++ "%d (%d):0117 Xmit ELS response x%x to remote "
+ "NPORT x%x I/O tag: x%x, size: x%x\n",
+- phba->brd_no, elscmd,
++ phba->brd_no, vport->vpi, elscmd,
+ ndlp->nlp_DID, elsiocb->iotag, cmdSize);
+ }
+
+@@ -240,16 +230,79 @@
+
+
+ static int
+-lpfc_cmpl_els_flogi_fabric(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+- struct serv_parm *sp, IOCB_t *irsp)
++lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ LPFC_MBOXQ_t *mbox;
+ struct lpfc_dmabuf *mp;
++ struct lpfc_nodelist *ndlp;
++ struct serv_parm *sp;
+ int rc;
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_FABRIC;
+- spin_unlock_irq(phba->host->host_lock);
++ sp = &phba->fc_fabparam;
++ ndlp = lpfc_findnode_did(vport, Fabric_DID);
++ if (!ndlp)
++ goto fail;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox)
++ goto fail;
++
++ vport->port_state = LPFC_FABRIC_CFG_LINK;
++ lpfc_config_link(phba, mbox);
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ mbox->vport = vport;
++
++ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
++ if (rc == MBX_NOT_FINISHED)
++ goto fail_free_mbox;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox)
++ goto fail;
++ rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox,
++ 0);
++ if (rc)
++ goto fail_free_mbox;
++
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login;
++ mbox->vport = vport;
++ mbox->context2 = lpfc_nlp_get(ndlp);
++
++ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
++ if (rc == MBX_NOT_FINISHED)
++ goto fail_issue_reg_login;
++
++ return 0;
++
++fail_issue_reg_login:
++ lpfc_nlp_put(ndlp);
++ mp = (struct lpfc_dmabuf *) mbox->context1;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++fail_free_mbox:
++ mempool_free(mbox, phba->mbox_mem_pool);
++
++fail:
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0249 Cannot issue Register Fabric login\n",
++ phba->brd_no, vport->vpi);
++ return -ENXIO;
++}
++
++static int
++lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ struct serv_parm *sp, IOCB_t *irsp)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_nodelist *np;
++ struct lpfc_nodelist *next_np;
++
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_FABRIC;
++ spin_unlock_irq(shost->host_lock);
+
+ phba->fc_edtov = be32_to_cpu(sp->cmn.e_d_tov);
+ if (sp->cmn.edtovResolution) /* E_D_TOV ticks are in nanoseconds */
+@@ -258,20 +311,20 @@
+ phba->fc_ratov = (be32_to_cpu(sp->cmn.w2.r_a_tov) + 999) / 1000;
+
+ if (phba->fc_topology == TOPOLOGY_LOOP) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_PUBLIC_LOOP;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_PUBLIC_LOOP;
++ spin_unlock_irq(shost->host_lock);
+ } else {
+ /*
+ * If we are a N-port connected to a Fabric, fixup sparam's so
+ * logins to devices on remote loops work.
+ */
+- phba->fc_sparam.cmn.altBbCredit = 1;
++ vport->fc_sparam.cmn.altBbCredit = 1;
+ }
+
+- phba->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
++ vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
+ memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name));
+- memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name));
++ memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof(struct lpfc_name));
+ ndlp->nlp_class_sup = 0;
+ if (sp->cls1.classValid)
+ ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -285,68 +338,85 @@
+ sp->cmn.bbRcvSizeLsb;
+ memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm));
+
+- mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (!mbox)
+- goto fail;
+-
+- phba->hba_state = LPFC_FABRIC_CFG_LINK;
+- lpfc_config_link(phba, mbox);
+- mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-
+- rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
+- if (rc == MBX_NOT_FINISHED)
+- goto fail_free_mbox;
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ if (sp->cmn.response_multiple_NPort) {
++ lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT,
++ "%d:1816 FLOGI NPIV supported, "
++ "response data 0x%x\n",
++ phba->brd_no,
++ sp->cmn.response_multiple_NPort);
++ phba->link_flag |= LS_NPIV_FAB_SUPPORTED;
+
+- mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (!mbox)
+- goto fail;
++ } else {
++ /* Because we asked f/w for NPIV it still expects us
++ to call reg_vnpid atleast for the physcial host */
++ lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT,
++ "%d:1817 Fabric does not support NPIV "
++ "- configuring single port mode.\n",
++ phba->brd_no);
++ phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED;
++ }
++ }
+
+- if (lpfc_reg_login(phba, Fabric_DID, (uint8_t *) sp, mbox, 0))
+- goto fail_free_mbox;
++ if ((vport->fc_prevDID != vport->fc_myDID) &&
++ !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+
+- mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login;
+- mbox->context2 = lpfc_nlp_get(ndlp);
++ /* If our NportID changed, we need to ensure all
++ * remaining NPORTs get unreg_login'ed.
++ */
++ list_for_each_entry_safe(np, next_np,
++ &vport->fc_nodes, nlp_listp) {
++ if ((np->nlp_state != NLP_STE_NPR_NODE) ||
++ !(np->nlp_flag & NLP_NPR_ADISC))
++ continue;
++ spin_lock_irq(shost->host_lock);
++ np->nlp_flag &= ~NLP_NPR_ADISC;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_unreg_rpi(vport, np);
++ }
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ lpfc_mbx_unreg_vpi(vport);
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++ }
++ }
+
+- rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
+- if (rc == MBX_NOT_FINISHED)
+- goto fail_issue_reg_login;
++ ndlp->nlp_sid = irsp->un.ulpWord[4] & Mask_DID;
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
+
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
++ vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) {
++ lpfc_register_new_vport(phba, vport, ndlp);
++ return 0;
++ }
++ lpfc_issue_fabric_reglogin(vport);
+ return 0;
+-
+- fail_issue_reg_login:
+- lpfc_nlp_put(ndlp);
+- mp = (struct lpfc_dmabuf *) mbox->context1;
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- fail_free_mbox:
+- mempool_free(mbox, phba->mbox_mem_pool);
+- fail:
+- return -ENXIO;
+ }
+
+ /*
+ * We FLOGIed into an NPort, initiate pt2pt protocol
+ */
+ static int
+-lpfc_cmpl_els_flogi_nport(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
++lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct serv_parm *sp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ LPFC_MBOXQ_t *mbox;
+ int rc;
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_unlock_irq(shost->host_lock);
+
+ phba->fc_edtov = FF_DEF_EDTOV;
+ phba->fc_ratov = FF_DEF_RATOV;
+- rc = memcmp(&phba->fc_portname, &sp->portName,
+- sizeof(struct lpfc_name));
++ rc = memcmp(&vport->fc_portname, &sp->portName,
++ sizeof(vport->fc_portname));
+ if (rc >= 0) {
+ /* This side will initiate the PLOGI */
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_PT2PT_PLOGI;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_PT2PT_PLOGI;
++ spin_unlock_irq(shost->host_lock);
+
+ /*
+ * N_Port ID cannot be 0, set our to LocalID the other
+@@ -355,7 +425,7 @@
+
+ /* not equal */
+ if (rc)
+- phba->fc_myDID = PT2PT_LocalID;
++ vport->fc_myDID = PT2PT_LocalID;
+
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!mbox)
+@@ -364,6 +434,7 @@
+ lpfc_config_link(phba, mbox);
+
+ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ mbox->vport = vport;
+ rc = lpfc_sli_issue_mbox(phba, mbox,
+ MBX_NOWAIT | MBX_STOP_IOCB);
+ if (rc == MBX_NOT_FINISHED) {
+@@ -372,7 +443,7 @@
+ }
+ lpfc_nlp_put(ndlp);
+
+- ndlp = lpfc_findnode_did(phba, PT2PT_RemoteID);
++ ndlp = lpfc_findnode_did(vport, PT2PT_RemoteID);
+ if (!ndlp) {
+ /*
+ * Cannot find existing Fabric ndlp, so allocate a
+@@ -382,28 +453,30 @@
+ if (!ndlp)
+ goto fail;
+
+- lpfc_nlp_init(phba, ndlp, PT2PT_RemoteID);
++ lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID);
+ }
+
+ memcpy(&ndlp->nlp_portname, &sp->portName,
+ sizeof(struct lpfc_name));
+ memcpy(&ndlp->nlp_nodename, &sp->nodeName,
+ sizeof(struct lpfc_name));
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ spin_unlock_irq(shost->host_lock);
+ } else {
+ /* This side will wait for the PLOGI */
+ lpfc_nlp_put(ndlp);
+ }
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_PT2PT;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_PT2PT;
++ spin_unlock_irq(shost->host_lock);
+
+ /* Start discovery - this should just do CLEAR_LA */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
+ return 0;
+- fail:
++fail:
+ return -ENXIO;
+ }
+
+@@ -411,6 +484,8 @@
+ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp = &rspiocb->iocb;
+ struct lpfc_nodelist *ndlp = cmdiocb->context1;
+ struct lpfc_dmabuf *pcmd = cmdiocb->context2, *prsp;
+@@ -418,21 +493,25 @@
+ int rc;
+
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba)) {
++ if (lpfc_els_chk_latt(vport)) {
+ lpfc_nlp_put(ndlp);
+ goto out;
+ }
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "FLOGI cmpl: status:x%x/x%x state:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ vport->port_state);
++
+ if (irsp->ulpStatus) {
+ /* Check for retry */
+- if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+- /* ELS command is being retried */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb))
+ goto out;
+- }
++
+ /* FLOGI failed, so there is no fabric */
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_unlock_irq(shost->host_lock);
+
+ /* If private loop, then allow max outstanding els to be
+ * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no
+@@ -443,11 +522,10 @@
+ }
+
+ /* FLOGI failure */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0100 FLOGI failure Data: x%x x%x x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0100 FLOGI failure Data: x%x x%x "
++ "x%x\n",
++ phba->brd_no, vport->vpi,
+ irsp->ulpStatus, irsp->un.ulpWord[4],
+ irsp->ulpTimeout);
+ goto flogifail;
+@@ -463,21 +541,21 @@
+
+ /* FLOGI completes successfully */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0101 FLOGI completes sucessfully "
++ "%d (%d):0101 FLOGI completes sucessfully "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ irsp->un.ulpWord[4], sp->cmn.e_d_tov,
+ sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution);
+
+- if (phba->hba_state == LPFC_FLOGI) {
++ if (vport->port_state == LPFC_FLOGI) {
+ /*
+ * If Common Service Parameters indicate Nport
+ * we are point to point, if Fport we are Fabric.
+ */
+ if (sp->cmn.fPort)
+- rc = lpfc_cmpl_els_flogi_fabric(phba, ndlp, sp, irsp);
++ rc = lpfc_cmpl_els_flogi_fabric(vport, ndlp, sp, irsp);
+ else
+- rc = lpfc_cmpl_els_flogi_nport(phba, ndlp, sp);
++ rc = lpfc_cmpl_els_flogi_nport(vport, ndlp, sp);
+
+ if (!rc)
+ goto out;
+@@ -486,14 +564,12 @@
+ flogifail:
+ lpfc_nlp_put(ndlp);
+
+- if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT ||
+- (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED &&
+- irsp->un.ulpWord[4] != IOERR_SLI_DOWN)) {
++ if (!lpfc_error_lost_link(irsp)) {
+ /* FLOGI failed, so just use loop map to make discovery list */
+- lpfc_disc_list_loopmap(phba);
++ lpfc_disc_list_loopmap(vport);
+
+ /* Start discovery */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
+ }
+
+ out:
+@@ -501,9 +577,10 @@
+ }
+
+ static int
+-lpfc_issue_els_flogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ uint8_t retry)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct serv_parm *sp;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+@@ -515,9 +592,10 @@
+
+ pring = &phba->sli.ring[LPFC_ELS_RING];
+
+- cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_FLOGI);
++
+ if (!elsiocb)
+ return 1;
+
+@@ -526,8 +604,8 @@
+
+ /* For FLOGI request, remainder of payload is service parameters */
+ *((uint32_t *) (pcmd)) = ELS_CMD_FLOGI;
+- pcmd += sizeof (uint32_t);
+- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ pcmd += sizeof(uint32_t);
++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
+ sp = (struct serv_parm *) pcmd;
+
+ /* Setup CSPs accordingly for Fabric */
+@@ -541,16 +619,32 @@
+ if (sp->cmn.fcphHigh < FC_PH3)
+ sp->cmn.fcphHigh = FC_PH3;
+
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ sp->cmn.request_multiple_Nport = 1;
++
++ /* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */
++ icmd->ulpCt_h = 1;
++ icmd->ulpCt_l = 0;
++ }
++
++ if (phba->fc_topology != TOPOLOGY_LOOP) {
++ icmd->un.elsreq64.myID = 0;
++ icmd->un.elsreq64.fl = 1;
++ }
++
+ tmo = phba->fc_ratov;
+ phba->fc_ratov = LPFC_DISC_FLOGI_TMO;
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
+ phba->fc_ratov = tmo;
+
+ phba->fc_stat.elsXmitFLOGI++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi;
+- spin_lock_irq(phba->host->host_lock);
+- rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue FLOGI: opt:x%x",
++ phba->sli3_options, 0, 0);
++
++ rc = lpfc_issue_fabric_iocb(phba, elsiocb);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -559,7 +653,7 @@
+ }
+
+ int
+-lpfc_els_abort_flogi(struct lpfc_hba * phba)
++lpfc_els_abort_flogi(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli_ring *pring;
+ struct lpfc_iocbq *iocb, *next_iocb;
+@@ -577,73 +671,99 @@
+ * Check the txcmplq for an iocb that matches the nport the driver is
+ * searching for.
+ */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
+ icmd = &iocb->iocb;
+- if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) {
++ if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR &&
++ icmd->un.elsreq64.bdl.ulpIoTag32) {
+ ndlp = (struct lpfc_nodelist *)(iocb->context1);
+- if (ndlp && (ndlp->nlp_DID == Fabric_DID))
++ if (ndlp && (ndlp->nlp_DID == Fabric_DID)) {
+ lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ }
++ spin_unlock_irq(&phba->hbalock);
+
+ return 0;
+ }
+
+ int
+-lpfc_initial_flogi(struct lpfc_hba *phba)
++lpfc_initial_flogi(struct lpfc_vport *vport)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_nodelist *ndlp;
+
+ /* First look for the Fabric ndlp */
+- ndlp = lpfc_findnode_did(phba, Fabric_DID);
++ ndlp = lpfc_findnode_did(vport, Fabric_DID);
+ if (!ndlp) {
+ /* Cannot find existing Fabric ndlp, so allocate a new one */
+ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ if (!ndlp)
+ return 0;
+- lpfc_nlp_init(phba, ndlp, Fabric_DID);
++ lpfc_nlp_init(vport, ndlp, Fabric_DID);
+ } else {
+- lpfc_dequeue_node(phba, ndlp);
++ lpfc_dequeue_node(vport, ndlp);
+ }
+- if (lpfc_issue_els_flogi(phba, ndlp, 0)) {
++ if (lpfc_issue_els_flogi(vport, ndlp, 0)) {
+ lpfc_nlp_put(ndlp);
+ }
+ return 1;
+ }
+
++int
++lpfc_initial_fdisc(struct lpfc_vport *vport)
++{
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_nodelist *ndlp;
++
++ /* First look for the Fabric ndlp */
++ ndlp = lpfc_findnode_did(vport, Fabric_DID);
++ if (!ndlp) {
++ /* Cannot find existing Fabric ndlp, so allocate a new one */
++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++ if (!ndlp)
++ return 0;
++ lpfc_nlp_init(vport, ndlp, Fabric_DID);
++ } else {
++ lpfc_dequeue_node(vport, ndlp);
++ }
++ if (lpfc_issue_els_fdisc(vport, ndlp, 0)) {
++ lpfc_nlp_put(ndlp);
++ }
++ return 1;
++}
+ static void
+-lpfc_more_plogi(struct lpfc_hba * phba)
++lpfc_more_plogi(struct lpfc_vport *vport)
+ {
+ int sentplogi;
++ struct lpfc_hba *phba = vport->phba;
+
+- if (phba->num_disc_nodes)
+- phba->num_disc_nodes--;
++ if (vport->num_disc_nodes)
++ vport->num_disc_nodes--;
+
+ /* Continue discovery with <num_disc_nodes> PLOGIs to go */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0232 Continue discovery with %d PLOGIs to go "
++ "%d (%d):0232 Continue discovery with %d PLOGIs to go "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->num_disc_nodes, phba->fc_plogi_cnt,
+- phba->fc_flag, phba->hba_state);
++ phba->brd_no, vport->vpi, vport->num_disc_nodes,
++ vport->fc_plogi_cnt, vport->fc_flag, vport->port_state);
+
+ /* Check to see if there are more PLOGIs to be sent */
+- if (phba->fc_flag & FC_NLP_MORE) {
+- /* go thru NPR list and issue any remaining ELS PLOGIs */
+- sentplogi = lpfc_els_disc_plogi(phba);
+- }
++ if (vport->fc_flag & FC_NLP_MORE)
++ /* go thru NPR nodes and issue any remaining ELS PLOGIs */
++ sentplogi = lpfc_els_disc_plogi(vport);
++
+ return;
+ }
+
+ static struct lpfc_nodelist *
+-lpfc_plogi_confirm_nport(struct lpfc_hba *phba, struct lpfc_dmabuf *prsp,
++lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
+ struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_vport *vport = ndlp->vport;
+ struct lpfc_nodelist *new_ndlp;
+- uint32_t *lp;
+ struct serv_parm *sp;
+- uint8_t name[sizeof (struct lpfc_name)];
++ uint8_t name[sizeof(struct lpfc_name)];
+ uint32_t rc;
+
+ /* Fabric nodes can have the same WWPN so we don't bother searching
+@@ -652,50 +772,51 @@
+ if (ndlp->nlp_type & NLP_FABRIC)
+ return ndlp;
+
+- lp = (uint32_t *) prsp->virt;
+- sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++ sp = (struct serv_parm *) ((uint8_t *) prsp + sizeof(uint32_t));
+ memset(name, 0, sizeof(struct lpfc_name));
+
+ /* Now we find out if the NPort we are logging into, matches the WWPN
+ * we have for that ndlp. If not, we have some work to do.
+ */
+- new_ndlp = lpfc_findnode_wwpn(phba, &sp->portName);
++ new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName);
+
+ if (new_ndlp == ndlp)
+ return ndlp;
+
+ if (!new_ndlp) {
+- rc =
+- memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name));
++ rc = memcmp(&ndlp->nlp_portname, name,
++ sizeof(struct lpfc_name));
+ if (!rc)
+ return ndlp;
+ new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+ if (!new_ndlp)
+ return ndlp;
+
+- lpfc_nlp_init(phba, new_ndlp, ndlp->nlp_DID);
++ lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID);
+ }
+
+- lpfc_unreg_rpi(phba, new_ndlp);
++ lpfc_unreg_rpi(vport, new_ndlp);
+ new_ndlp->nlp_DID = ndlp->nlp_DID;
+ new_ndlp->nlp_prev_state = ndlp->nlp_prev_state;
+- lpfc_nlp_set_state(phba, new_ndlp, ndlp->nlp_state);
++ lpfc_nlp_set_state(vport, new_ndlp, ndlp->nlp_state);
+
+- /* Move this back to NPR list */
++ /* Move this back to NPR state */
+ if (memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)) == 0)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ else {
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ ndlp->nlp_DID = 0; /* Two ndlps cannot have the same did */
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ }
+ return new_ndlp;
+ }
+
+ static void
+-lpfc_cmpl_els_plogi(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+ struct lpfc_nodelist *ndlp;
+ struct lpfc_dmabuf *prsp;
+@@ -705,32 +826,43 @@
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+
+ irsp = &rspiocb->iocb;
+- ndlp = lpfc_findnode_did(phba, irsp->un.elsreq64.remoteID);
+- if (!ndlp)
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "PLOGI cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ irsp->un.elsreq64.remoteID);
++
++ ndlp = lpfc_findnode_did(vport, irsp->un.elsreq64.remoteID);
++ if (!ndlp) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0136 PLOGI completes to NPort x%x "
++ "with no ndlp. Data: x%x x%x x%x\n",
++ phba->brd_no, vport->vpi, irsp->un.elsreq64.remoteID,
++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpIoTag);
+ goto out;
++ }
+
+ /* Since ndlp can be freed in the disc state machine, note if this node
+ * is being used during discovery.
+ */
++ spin_lock_irq(shost->host_lock);
+ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
+- spin_lock_irq(phba->host->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ rc = 0;
+
+ /* PLOGI completes to NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0102 PLOGI completes to NPort x%x "
++ "%d (%d):0102 PLOGI completes to NPort x%x "
+ "Data: x%x x%x x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+- irsp->un.ulpWord[4], irsp->ulpTimeout, disc,
+- phba->num_disc_nodes);
++ phba->brd_no, vport->vpi, ndlp->nlp_DID,
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ irsp->ulpTimeout, disc, vport->num_disc_nodes);
+
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba)) {
+- spin_lock_irq(phba->host->host_lock);
++ if (lpfc_els_chk_latt(vport)) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ goto out;
+ }
+
+@@ -743,22 +875,28 @@
+ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+ /* ELS command is being retried */
+ if (disc) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ }
+ goto out;
+ }
+
+ /* PLOGI failed */
++ if (ndlp->nlp_DID == NameServer_DID) {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0250 Nameserver login error: "
++ "0x%x / 0x%x\n",
++ phba->brd_no, vport->vpi,
++ irsp->ulpStatus, irsp->un.ulpWord[4]);
++ }
++
+ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ if (lpfc_error_lost_link(irsp)) {
+ rc = NLP_STE_FREED_NODE;
+ } else {
+- rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_PLOGI);
+ }
+ } else {
+@@ -766,33 +904,33 @@
+ prsp = list_entry(((struct lpfc_dmabuf *)
+ cmdiocb->context2)->list.next,
+ struct lpfc_dmabuf, list);
+- ndlp = lpfc_plogi_confirm_nport(phba, prsp, ndlp);
+- rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ ndlp = lpfc_plogi_confirm_nport(phba, prsp->virt, ndlp);
++ rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_PLOGI);
+ }
+
+- if (disc && phba->num_disc_nodes) {
++ if (disc && vport->num_disc_nodes) {
+ /* Check to see if there are more PLOGIs to be sent */
+- lpfc_more_plogi(phba);
++ lpfc_more_plogi(vport);
+
+- if (phba->num_disc_nodes == 0) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_NDISC_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ if (vport->num_disc_nodes == 0) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(shost->host_lock);
+
+- lpfc_can_disctmo(phba);
+- if (phba->fc_flag & FC_RSCN_MODE) {
++ lpfc_can_disctmo(vport);
++ if (vport->fc_flag & FC_RSCN_MODE) {
+ /*
+ * Check to see if more RSCNs came in while
+ * we were processing this one.
+ */
+- if ((phba->fc_rscn_id_cnt == 0) &&
+- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_RSCN_MODE;
+- spin_unlock_irq(phba->host->host_lock);
++ if ((vport->fc_rscn_id_cnt == 0) &&
++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
+ } else {
+- lpfc_els_handle_rscn(phba);
++ lpfc_els_handle_rscn(vport);
+ }
+ }
+ }
+@@ -804,8 +942,9 @@
+ }
+
+ int
+-lpfc_issue_els_plogi(struct lpfc_hba * phba, uint32_t did, uint8_t retry)
++lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct serv_parm *sp;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+@@ -813,12 +952,13 @@
+ struct lpfc_sli *psli;
+ uint8_t *pcmd;
+ uint16_t cmdsize;
++ int ret;
+
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+
+- cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, NULL, did,
++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, NULL, did,
+ ELS_CMD_PLOGI);
+ if (!elsiocb)
+ return 1;
+@@ -828,8 +968,8 @@
+
+ /* For PLOGI request, remainder of payload is service parameters */
+ *((uint32_t *) (pcmd)) = ELS_CMD_PLOGI;
+- pcmd += sizeof (uint32_t);
+- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ pcmd += sizeof(uint32_t);
++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
+ sp = (struct serv_parm *) pcmd;
+
+ if (sp->cmn.fcphLow < FC_PH_4_3)
+@@ -838,22 +978,27 @@
+ if (sp->cmn.fcphHigh < FC_PH3)
+ sp->cmn.fcphHigh = FC_PH3;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue PLOGI: did:x%x",
++ did, 0, 0);
++
+ phba->fc_stat.elsXmitPLOGI++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
+- spin_lock_irq(phba->host->host_lock);
+- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+- spin_unlock_irq(phba->host->host_lock);
++ ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
++
++ if (ret == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ return 0;
+ }
+
+ static void
+-lpfc_cmpl_els_prli(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+ struct lpfc_sli *psli;
+ struct lpfc_nodelist *ndlp;
+@@ -864,21 +1009,26 @@
+
+ irsp = &(rspiocb->iocb);
+ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_PRLI_SND;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "PRLI cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ ndlp->nlp_DID);
+
+ /* PRLI completes to NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0103 PRLI completes to NPort x%x "
++ "%d (%d):0103 PRLI completes to NPort x%x "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+- irsp->un.ulpWord[4], irsp->ulpTimeout,
+- phba->num_disc_nodes);
++ phba->brd_no, vport->vpi, ndlp->nlp_DID,
++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++ vport->num_disc_nodes);
+
+- phba->fc_prli_sent--;
++ vport->fc_prli_sent--;
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba))
++ if (lpfc_els_chk_latt(vport))
+ goto out;
+
+ if (irsp->ulpStatus) {
+@@ -889,18 +1039,16 @@
+ }
+ /* PRLI failed */
+ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ if (lpfc_error_lost_link(irsp)) {
+ goto out;
+ } else {
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_PRLI);
+ }
+ } else {
+ /* Good status, call state machine */
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI);
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++ NLP_EVT_CMPL_PRLI);
+ }
+
+ out:
+@@ -909,9 +1057,11 @@
+ }
+
+ int
+-lpfc_issue_els_prli(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ uint8_t retry)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ PRLI *npr;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+@@ -923,8 +1073,8 @@
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+
+- cmdsize = (sizeof (uint32_t) + sizeof (PRLI));
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_PRLI);
+ if (!elsiocb)
+ return 1;
+@@ -933,9 +1083,9 @@
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ /* For PRLI request, remainder of payload is service parameters */
+- memset(pcmd, 0, (sizeof (PRLI) + sizeof (uint32_t)));
++ memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t)));
+ *((uint32_t *) (pcmd)) = ELS_CMD_PRLI;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* For PRLI, remainder of payload is PRLI parameter page */
+ npr = (PRLI *) pcmd;
+@@ -955,81 +1105,88 @@
+ npr->prliType = PRLI_FCP_TYPE;
+ npr->initiatorFunc = 1;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue PRLI: did:x%x",
++ ndlp->nlp_DID, 0, 0);
++
+ phba->fc_stat.elsXmitPRLI++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_prli;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_PRLI_SND;
++ spin_unlock_irq(shost->host_lock);
+ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_PRLI_SND;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+- phba->fc_prli_sent++;
++ vport->fc_prli_sent++;
+ return 0;
+ }
+
+ static void
+-lpfc_more_adisc(struct lpfc_hba * phba)
++lpfc_more_adisc(struct lpfc_vport *vport)
+ {
+ int sentadisc;
++ struct lpfc_hba *phba = vport->phba;
+
+- if (phba->num_disc_nodes)
+- phba->num_disc_nodes--;
++ if (vport->num_disc_nodes)
++ vport->num_disc_nodes--;
+
+ /* Continue discovery with <num_disc_nodes> ADISCs to go */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0210 Continue discovery with %d ADISCs to go "
++ "%d (%d):0210 Continue discovery with %d ADISCs to go "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->num_disc_nodes, phba->fc_adisc_cnt,
+- phba->fc_flag, phba->hba_state);
++ phba->brd_no, vport->vpi, vport->num_disc_nodes,
++ vport->fc_adisc_cnt, vport->fc_flag, vport->port_state);
+
+ /* Check to see if there are more ADISCs to be sent */
+- if (phba->fc_flag & FC_NLP_MORE) {
+- lpfc_set_disctmo(phba);
+-
+- /* go thru NPR list and issue any remaining ELS ADISCs */
+- sentadisc = lpfc_els_disc_adisc(phba);
++ if (vport->fc_flag & FC_NLP_MORE) {
++ lpfc_set_disctmo(vport);
++ /* go thru NPR nodes and issue any remaining ELS ADISCs */
++ sentadisc = lpfc_els_disc_adisc(vport);
+ }
+ return;
+ }
+
+ static void
+-lpfc_rscn_disc(struct lpfc_hba * phba)
++lpfc_rscn_disc(struct lpfc_vport *vport)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ lpfc_can_disctmo(vport);
++
+ /* RSCN discovery */
+- /* go thru NPR list and issue ELS PLOGIs */
+- if (phba->fc_npr_cnt) {
+- if (lpfc_els_disc_plogi(phba))
++ /* go thru NPR nodes and issue ELS PLOGIs */
++ if (vport->fc_npr_cnt)
++ if (lpfc_els_disc_plogi(vport))
+ return;
+- }
+- if (phba->fc_flag & FC_RSCN_MODE) {
++
++ if (vport->fc_flag & FC_RSCN_MODE) {
+ /* Check to see if more RSCNs came in while we were
+ * processing this one.
+ */
+- if ((phba->fc_rscn_id_cnt == 0) &&
+- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_RSCN_MODE;
+- spin_unlock_irq(phba->host->host_lock);
++ if ((vport->fc_rscn_id_cnt == 0) &&
++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
+ } else {
+- lpfc_els_handle_rscn(phba);
++ lpfc_els_handle_rscn(vport);
+ }
+ }
+ }
+
+ static void
+-lpfc_cmpl_els_adisc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+- struct lpfc_sli *psli;
+ struct lpfc_nodelist *ndlp;
+- LPFC_MBOXQ_t *mbox;
+- int disc, rc;
+-
+- psli = &phba->sli;
++ int disc;
+
+ /* we pass cmdiocb to state machine which needs rspiocb as well */
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+@@ -1037,27 +1194,32 @@
+ irsp = &(rspiocb->iocb);
+ ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "ADISC cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ ndlp->nlp_DID);
++
+ /* Since ndlp can be freed in the disc state machine, note if this node
+ * is being used during discovery.
+ */
++ spin_lock_irq(shost->host_lock);
+ disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
+- spin_lock_irq(phba->host->host_lock);
+ ndlp->nlp_flag &= ~(NLP_ADISC_SND | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+
+ /* ADISC completes to NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0104 ADISC completes to NPort x%x "
++ "%d (%d):0104 ADISC completes to NPort x%x "
+ "Data: x%x x%x x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+- irsp->un.ulpWord[4], irsp->ulpTimeout, disc,
+- phba->num_disc_nodes);
++ phba->brd_no, vport->vpi, ndlp->nlp_DID,
++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++ disc, vport->num_disc_nodes);
+
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba)) {
+- spin_lock_irq(phba->host->host_lock);
++ if (lpfc_els_chk_latt(vport)) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ goto out;
+ }
+
+@@ -1066,67 +1228,68 @@
+ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+ /* ELS command is being retried */
+ if (disc) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_set_disctmo(phba);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_set_disctmo(vport);
+ }
+ goto out;
+ }
+ /* ADISC failed */
+ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+- if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
+- ((irsp->un.ulpWord[4] != IOERR_SLI_ABORTED) &&
+- (irsp->un.ulpWord[4] != IOERR_LINK_DOWN) &&
+- (irsp->un.ulpWord[4] != IOERR_SLI_DOWN))) {
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ if (!lpfc_error_lost_link(irsp)) {
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_ADISC);
+ }
+ } else {
+ /* Good status, call state machine */
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_ADISC);
+ }
+
+- if (disc && phba->num_disc_nodes) {
++ if (disc && vport->num_disc_nodes) {
+ /* Check to see if there are more ADISCs to be sent */
+- lpfc_more_adisc(phba);
++ lpfc_more_adisc(vport);
+
+ /* Check to see if we are done with ADISC authentication */
+- if (phba->num_disc_nodes == 0) {
+- lpfc_can_disctmo(phba);
+- /* If we get here, there is nothing left to wait for */
+- if ((phba->hba_state < LPFC_HBA_READY) &&
+- (phba->hba_state != LPFC_CLEAR_LA)) {
+- /* Link up discovery */
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool,
+- GFP_KERNEL))) {
+- phba->hba_state = LPFC_CLEAR_LA;
+- lpfc_clear_la(phba, mbox);
+- mbox->mbox_cmpl =
+- lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox
+- (phba, mbox,
+- (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free(mbox,
+- phba->mbox_mem_pool);
+- lpfc_disc_flush_list(phba);
+- psli->ring[(psli->extra_ring)].
+- flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->fcp_ring)].
+- flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->next_ring)].
+- flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- phba->hba_state =
+- LPFC_HBA_READY;
++ if (vport->num_disc_nodes == 0) {
++ /* If we get here, there is nothing left to ADISC */
++ /*
++ * For NPIV, cmpl_reg_vpi will set port_state to READY,
++ * and continue discovery.
++ */
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ !(vport->fc_flag & FC_RSCN_MODE)) {
++ lpfc_issue_reg_vpi(phba, vport);
++ goto out;
++ }
++ /*
++ * For SLI2, we need to set port_state to READY
++ * and continue discovery.
++ */
++ if (vport->port_state < LPFC_VPORT_READY) {
++ /* If we get here, there is nothing to ADISC */
++ if (vport->port_type == LPFC_PHYSICAL_PORT)
++ lpfc_issue_clear_la(phba, vport);
++
++ if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
++ vport->num_disc_nodes = 0;
++ /* go thru NPR list, issue ELS PLOGIs */
++ if (vport->fc_npr_cnt)
++ lpfc_els_disc_plogi(vport);
++
++ if (!vport->num_disc_nodes) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &=
++ ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(
++ shost->host_lock);
++ lpfc_can_disctmo(vport);
+ }
+ }
++ vport->port_state = LPFC_VPORT_READY;
+ } else {
+- lpfc_rscn_disc(phba);
++ lpfc_rscn_disc(vport);
+ }
+ }
+ }
+@@ -1136,22 +1299,21 @@
+ }
+
+ int
+-lpfc_issue_els_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ uint8_t retry)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ ADISC *ap;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+- struct lpfc_sli_ring *pring;
+- struct lpfc_sli *psli;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ uint8_t *pcmd;
+ uint16_t cmdsize;
+
+- psli = &phba->sli;
+- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+-
+- cmdsize = (sizeof (uint32_t) + sizeof (ADISC));
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ cmdsize = (sizeof(uint32_t) + sizeof(ADISC));
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_ADISC);
+ if (!elsiocb)
+ return 1;
+@@ -1161,81 +1323,97 @@
+
+ /* For ADISC request, remainder of payload is service parameters */
+ *((uint32_t *) (pcmd)) = ELS_CMD_ADISC;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* Fill in ADISC payload */
+ ap = (ADISC *) pcmd;
+ ap->hardAL_PA = phba->fc_pref_ALPA;
+- memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+- memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+- ap->DID = be32_to_cpu(phba->fc_myDID);
++ memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++ memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++ ap->DID = be32_to_cpu(vport->fc_myDID);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue ADISC: did:x%x",
++ ndlp->nlp_DID, 0, 0);
+
+ phba->fc_stat.elsXmitADISC++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_ADISC_SND;
++ spin_unlock_irq(shost->host_lock);
+ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_ADISC_SND;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ return 0;
+ }
+
+ static void
+-lpfc_cmpl_els_logo(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ struct lpfc_vport *vport = ndlp->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ IOCB_t *irsp;
+ struct lpfc_sli *psli;
+- struct lpfc_nodelist *ndlp;
+
+ psli = &phba->sli;
+ /* we pass cmdiocb to state machine which needs rspiocb as well */
+ cmdiocb->context_un.rsp_iocb = rspiocb;
+
+ irsp = &(rspiocb->iocb);
+- ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_LOGO_SND;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "LOGO cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ ndlp->nlp_DID);
+
+ /* LOGO completes to NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0105 LOGO completes to NPort x%x "
++ "%d (%d):0105 LOGO completes to NPort x%x "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+- irsp->un.ulpWord[4], irsp->ulpTimeout,
+- phba->num_disc_nodes);
++ phba->brd_no, vport->vpi, ndlp->nlp_DID,
++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++ vport->num_disc_nodes);
+
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba))
++ if (lpfc_els_chk_latt(vport))
++ goto out;
++
++ if (ndlp->nlp_flag & NLP_TARGET_REMOVE) {
++ /* NLP_EVT_DEVICE_RM should unregister the RPI
++ * which should abort all outstanding IOs.
++ */
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++ NLP_EVT_DEVICE_RM);
+ goto out;
++ }
+
+ if (irsp->ulpStatus) {
+ /* Check for retry */
+- if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb))
+ /* ELS command is being retried */
+ goto out;
+- }
+ /* LOGO failed */
+ /* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++ if (lpfc_error_lost_link(irsp))
+ goto out;
+- } else {
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++ else
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ NLP_EVT_CMPL_LOGO);
+- }
+ } else {
+ /* Good status, call state machine.
+ * This will unregister the rpi if needed.
+ */
+- lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO);
++ lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++ NLP_EVT_CMPL_LOGO);
+ }
+
+ out:
+@@ -1244,21 +1422,24 @@
+ }
+
+ int
+-lpfc_issue_els_logo(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ uint8_t retry)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+ struct lpfc_sli_ring *pring;
+ struct lpfc_sli *psli;
+ uint8_t *pcmd;
+ uint16_t cmdsize;
++ int rc;
+
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING];
+
+- cmdsize = (2 * sizeof (uint32_t)) + sizeof (struct lpfc_name);
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ cmdsize = (2 * sizeof(uint32_t)) + sizeof(struct lpfc_name);
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_LOGO);
+ if (!elsiocb)
+ return 1;
+@@ -1266,53 +1447,66 @@
+ icmd = &elsiocb->iocb;
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ *((uint32_t *) (pcmd)) = ELS_CMD_LOGO;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* Fill in LOGO payload */
+- *((uint32_t *) (pcmd)) = be32_to_cpu(phba->fc_myDID);
+- pcmd += sizeof (uint32_t);
+- memcpy(pcmd, &phba->fc_portname, sizeof (struct lpfc_name));
++ *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID);
++ pcmd += sizeof(uint32_t);
++ memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name));
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue LOGO: did:x%x",
++ ndlp->nlp_DID, 0, 0);
+
+ phba->fc_stat.elsXmitLOGO++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_logo;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_LOGO_SND;
+- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ spin_unlock_irq(shost->host_lock);
++ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
++
++ if (rc == IOCB_ERROR) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_LOGO_SND;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ return 0;
+ }
+
+ static void
+-lpfc_cmpl_els_cmd(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
+ IOCB_t *irsp;
+
+ irsp = &rspiocb->iocb;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "ELS cmd cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ irsp->un.elsreq64.remoteID);
++
+ /* ELS cmd tag <ulpIoTag> completes */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0106 ELS cmd tag x%x completes Data: x%x x%x x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0106 ELS cmd tag x%x completes Data: x%x x%x "
++ "x%x\n",
++ phba->brd_no, vport->vpi,
+ irsp->ulpIoTag, irsp->ulpStatus,
+ irsp->un.ulpWord[4], irsp->ulpTimeout);
+
+ /* Check to see if link went down during discovery */
+- lpfc_els_chk_latt(phba);
++ lpfc_els_chk_latt(vport);
+ lpfc_els_free_iocb(phba, cmdiocb);
+ return;
+ }
+
+ int
+-lpfc_issue_els_scr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+ struct lpfc_sli_ring *pring;
+@@ -1323,15 +1517,16 @@
+
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+- cmdsize = (sizeof (uint32_t) + sizeof (SCR));
++ cmdsize = (sizeof(uint32_t) + sizeof(SCR));
+ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ if (!ndlp)
+ return 1;
+
+- lpfc_nlp_init(phba, ndlp, nportid);
++ lpfc_nlp_init(vport, ndlp, nportid);
+
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_SCR);
++
+ if (!elsiocb) {
+ lpfc_nlp_put(ndlp);
+ return 1;
+@@ -1341,29 +1536,31 @@
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_SCR;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* For SCR, remainder of payload is SCR parameter page */
+- memset(pcmd, 0, sizeof (SCR));
++ memset(pcmd, 0, sizeof(SCR));
+ ((SCR *) pcmd)->Function = SCR_FUNC_FULL;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue SCR: did:x%x",
++ ndlp->nlp_DID, 0, 0);
++
+ phba->fc_stat.elsXmitSCR++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
+- spin_lock_irq(phba->host->host_lock);
+ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_nlp_put(ndlp);
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_nlp_put(ndlp);
+ return 0;
+ }
+
+ static int
+-lpfc_issue_els_farpr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *icmd;
+ struct lpfc_iocbq *elsiocb;
+ struct lpfc_sli_ring *pring;
+@@ -1377,13 +1574,14 @@
+
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+- cmdsize = (sizeof (uint32_t) + sizeof (FARP));
++ cmdsize = (sizeof(uint32_t) + sizeof(FARP));
+ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ if (!ndlp)
+ return 1;
+- lpfc_nlp_init(phba, ndlp, nportid);
+
+- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++ lpfc_nlp_init(vport, ndlp, nportid);
++
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ ndlp->nlp_DID, ELS_CMD_RNID);
+ if (!elsiocb) {
+ lpfc_nlp_put(ndlp);
+@@ -1394,44 +1592,71 @@
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_FARPR;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* Fill in FARPR payload */
+ fp = (FARP *) (pcmd);
+- memset(fp, 0, sizeof (FARP));
++ memset(fp, 0, sizeof(FARP));
+ lp = (uint32_t *) pcmd;
+ *lp++ = be32_to_cpu(nportid);
+- *lp++ = be32_to_cpu(phba->fc_myDID);
++ *lp++ = be32_to_cpu(vport->fc_myDID);
+ fp->Rflags = 0;
+ fp->Mflags = (FARP_MATCH_PORT | FARP_MATCH_NODE);
+
+- memcpy(&fp->RportName, &phba->fc_portname, sizeof (struct lpfc_name));
+- memcpy(&fp->RnodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+- if ((ondlp = lpfc_findnode_did(phba, nportid))) {
++ memcpy(&fp->RportName, &vport->fc_portname, sizeof(struct lpfc_name));
++ memcpy(&fp->RnodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++ ondlp = lpfc_findnode_did(vport, nportid);
++ if (ondlp) {
+ memcpy(&fp->OportName, &ondlp->nlp_portname,
+- sizeof (struct lpfc_name));
++ sizeof(struct lpfc_name));
+ memcpy(&fp->OnodeName, &ondlp->nlp_nodename,
+- sizeof (struct lpfc_name));
++ sizeof(struct lpfc_name));
+ }
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue FARPR: did:x%x",
++ ndlp->nlp_DID, 0, 0);
++
+ phba->fc_stat.elsXmitFARPR++;
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
+- spin_lock_irq(phba->host->host_lock);
+ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_nlp_put(ndlp);
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_nlp_put(ndlp);
+ return 0;
+ }
+
++static void
++lpfc_end_rscn(struct lpfc_vport *vport)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (vport->fc_flag & FC_RSCN_MODE) {
++ /*
++ * Check to see if more RSCNs came in while we were
++ * processing this one.
++ */
++ if (vport->fc_rscn_id_cnt ||
++ (vport->fc_flag & FC_RSCN_DISCOVERY) != 0)
++ lpfc_els_handle_rscn(vport);
++ else {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
++ }
++ }
++}
++
+ void
+-lpfc_cancel_retry_delay_tmo(struct lpfc_hba *phba, struct lpfc_nodelist * nlp)
++lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ spin_lock_irq(shost->host_lock);
+ nlp->nlp_flag &= ~NLP_DELAY_TMO;
++ spin_unlock_irq(shost->host_lock);
+ del_timer_sync(&nlp->nlp_delayfunc);
+ nlp->nlp_last_elscmd = 0;
+
+@@ -1439,30 +1664,21 @@
+ list_del_init(&nlp->els_retry_evt.evt_listp);
+
+ if (nlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ nlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+- if (phba->num_disc_nodes) {
++ spin_unlock_irq(shost->host_lock);
++ if (vport->num_disc_nodes) {
+ /* Check to see if there are more
+ * PLOGIs to be sent
+ */
+- lpfc_more_plogi(phba);
++ lpfc_more_plogi(vport);
+
+- if (phba->num_disc_nodes == 0) {
+- phba->fc_flag &= ~FC_NDISC_ACTIVE;
+- lpfc_can_disctmo(phba);
+- if (phba->fc_flag & FC_RSCN_MODE) {
+- /*
+- * Check to see if more RSCNs
+- * came in while we were
+- * processing this one.
+- */
+- if((phba->fc_rscn_id_cnt==0) &&
+- !(phba->fc_flag & FC_RSCN_DISCOVERY)) {
+- phba->fc_flag &= ~FC_RSCN_MODE;
+- }
+- else {
+- lpfc_els_handle_rscn(phba);
+- }
+- }
++ if (vport->num_disc_nodes == 0) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
++ lpfc_end_rscn(vport);
+ }
+ }
+ }
+@@ -1472,18 +1688,19 @@
+ void
+ lpfc_els_retry_delay(unsigned long ptr)
+ {
+- struct lpfc_nodelist *ndlp;
+- struct lpfc_hba *phba;
+- unsigned long iflag;
+- struct lpfc_work_evt *evtp;
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr;
++ struct lpfc_vport *vport = ndlp->vport;
++ struct lpfc_hba *phba = vport->phba;
++ unsigned long flags;
++ struct lpfc_work_evt *evtp = &ndlp->els_retry_evt;
+
+- ndlp = (struct lpfc_nodelist *)ptr;
+- phba = ndlp->nlp_phba;
++ ndlp = (struct lpfc_nodelist *) ptr;
++ phba = ndlp->vport->phba;
+ evtp = &ndlp->els_retry_evt;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, flags);
+ if (!list_empty(&evtp->evt_listp)) {
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
+ return;
+ }
+
+@@ -1491,33 +1708,31 @@
+ evtp->evt = LPFC_EVT_ELS_RETRY;
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
+
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
+ return;
+ }
+
+ void
+ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
+ {
+- struct lpfc_hba *phba;
+- uint32_t cmd;
+- uint32_t did;
+- uint8_t retry;
++ struct lpfc_vport *vport = ndlp->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ uint32_t cmd, did, retry;
+
+- phba = ndlp->nlp_phba;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ did = ndlp->nlp_DID;
+ cmd = ndlp->nlp_last_elscmd;
+ ndlp->nlp_last_elscmd = 0;
+
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ return;
+ }
+
+ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ /*
+ * If a discovery event readded nlp_delayfunc after timer
+ * firing and before processing the timer, cancel the
+@@ -1528,57 +1743,54 @@
+
+ switch (cmd) {
+ case ELS_CMD_FLOGI:
+- lpfc_issue_els_flogi(phba, ndlp, retry);
++ lpfc_issue_els_flogi(vport, ndlp, retry);
+ break;
+ case ELS_CMD_PLOGI:
+- if(!lpfc_issue_els_plogi(phba, ndlp->nlp_DID, retry)) {
++ if (!lpfc_issue_els_plogi(vport, ndlp->nlp_DID, retry)) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
+ }
+ break;
+ case ELS_CMD_ADISC:
+- if (!lpfc_issue_els_adisc(phba, ndlp, retry)) {
++ if (!lpfc_issue_els_adisc(vport, ndlp, retry)) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
+ }
+ break;
+ case ELS_CMD_PRLI:
+- if (!lpfc_issue_els_prli(phba, ndlp, retry)) {
++ if (!lpfc_issue_els_prli(vport, ndlp, retry)) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+ }
+ break;
+ case ELS_CMD_LOGO:
+- if (!lpfc_issue_els_logo(phba, ndlp, retry)) {
++ if (!lpfc_issue_els_logo(vport, ndlp, retry)) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ }
+ break;
++ case ELS_CMD_FDISC:
++ lpfc_issue_els_fdisc(vport, ndlp, retry);
++ break;
+ }
+ return;
+ }
+
+ static int
+-lpfc_els_retry(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- IOCB_t *irsp;
+- struct lpfc_dmabuf *pcmd;
+- struct lpfc_nodelist *ndlp;
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ IOCB_t *irsp = &rspiocb->iocb;
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ uint32_t *elscmd;
+ struct ls_rjt stat;
+- int retry, maxretry;
+- int delay;
+- uint32_t cmd;
++ int retry = 0, maxretry = lpfc_max_els_tries, delay = 0;
++ uint32_t cmd = 0;
+ uint32_t did;
+
+- retry = 0;
+- delay = 0;
+- maxretry = lpfc_max_els_tries;
+- irsp = &rspiocb->iocb;
+- ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+- pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+- cmd = 0;
+
+ /* Note: context2 may be 0 for internal driver abort
+ * of delays ELS command.
+@@ -1594,11 +1806,15 @@
+ else {
+ /* We should only hit this case for retrying PLOGI */
+ did = irsp->un.elsreq64.remoteID;
+- ndlp = lpfc_findnode_did(phba, did);
++ ndlp = lpfc_findnode_did(vport, did);
+ if (!ndlp && (cmd != ELS_CMD_PLOGI))
+ return 1;
+ }
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Retry ELS: wd7:x%x wd4:x%x did:x%x",
++ *(((uint32_t *) irsp) + 7), irsp->un.ulpWord[4], ndlp->nlp_DID);
++
+ switch (irsp->ulpStatus) {
+ case IOSTAT_FCP_RSP_ERROR:
+ case IOSTAT_REMOTE_STOP:
+@@ -1607,25 +1823,37 @@
+ case IOSTAT_LOCAL_REJECT:
+ switch ((irsp->un.ulpWord[4] & 0xff)) {
+ case IOERR_LOOP_OPEN_FAILURE:
+- if (cmd == ELS_CMD_PLOGI) {
+- if (cmdiocb->retry == 0) {
+- delay = 1;
+- }
+- }
++ if (cmd == ELS_CMD_PLOGI && cmdiocb->retry == 0)
++ delay = 1000;
+ retry = 1;
+ break;
+
+- case IOERR_SEQUENCE_TIMEOUT:
++ case IOERR_ILLEGAL_COMMAND:
++ if ((phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) &&
++ (cmd == ELS_CMD_FDISC)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0124 FDISC failed (3/6) retrying...\n",
++ phba->brd_no, vport->vpi);
++ lpfc_mbx_unreg_vpi(vport);
+ retry = 1;
++ /* Always retry for this case */
++ cmdiocb->retry = 0;
++ }
+ break;
+
+ case IOERR_NO_RESOURCES:
+- if (cmd == ELS_CMD_PLOGI) {
+- delay = 1;
+- }
++ retry = 1;
++ if (cmdiocb->retry > 100)
++ delay = 100;
++ maxretry = 250;
++ break;
++
++ case IOERR_ILLEGAL_FRAME:
++ delay = 100;
+ retry = 1;
+ break;
+
++ case IOERR_SEQUENCE_TIMEOUT:
+ case IOERR_INVALID_RPI:
+ retry = 1;
+ break;
+@@ -1655,27 +1883,57 @@
+ if (stat.un.b.lsRjtRsnCodeExp ==
+ LSEXP_CMD_IN_PROGRESS) {
+ if (cmd == ELS_CMD_PLOGI) {
+- delay = 1;
++ delay = 1000;
+ maxretry = 48;
+ }
+ retry = 1;
+ break;
+ }
+ if (cmd == ELS_CMD_PLOGI) {
+- delay = 1;
++ delay = 1000;
+ maxretry = lpfc_max_els_tries + 1;
+ retry = 1;
+ break;
+ }
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ (cmd == ELS_CMD_FDISC) &&
++ (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0125 FDISC Failed (x%x)."
++ " Fabric out of resources\n",
++ phba->brd_no, vport->vpi, stat.un.lsRjtError);
++ lpfc_vport_set_state(vport,
++ FC_VPORT_NO_FABRIC_RSCS);
++ }
+ break;
+
+ case LSRJT_LOGICAL_BSY:
+- if (cmd == ELS_CMD_PLOGI) {
+- delay = 1;
++ if ((cmd == ELS_CMD_PLOGI) ||
++ (cmd == ELS_CMD_PRLI)) {
++ delay = 1000;
+ maxretry = 48;
++ } else if (cmd == ELS_CMD_FDISC) {
++ /* Always retry for this case */
++ cmdiocb->retry = 0;
+ }
+ retry = 1;
+ break;
++
++ case LSRJT_LOGICAL_ERR:
++ case LSRJT_PROTOCOL_ERR:
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ (cmd == ELS_CMD_FDISC) &&
++ ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) ||
++ (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID))
++ ) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0123 FDISC Failed (x%x)."
++ " Fabric Detected Bad WWN\n",
++ phba->brd_no, vport->vpi, stat.un.lsRjtError);
++ lpfc_vport_set_state(vport,
++ FC_VPORT_FABRIC_REJ_WWN);
++ }
++ break;
+ }
+ break;
+
+@@ -1695,21 +1953,27 @@
+ retry = 0;
+ }
+
++ if ((vport->load_flag & FC_UNLOADING) != 0)
++ retry = 0;
++
+ if (retry) {
+
+ /* Retry ELS command <elsCmd> to remote NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0107 Retry ELS command x%x to remote "
++ "%d (%d):0107 Retry ELS command x%x to remote "
+ "NPORT x%x Data: x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ cmd, did, cmdiocb->retry, delay);
+
+- if ((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) {
++ if (((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) &&
++ ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
++ ((irsp->un.ulpWord[4] & 0xff) != IOERR_NO_RESOURCES))) {
++ /* Don't reset timer for no resources */
++
+ /* If discovery / RSCN timer is running, reset it */
+- if (timer_pending(&phba->fc_disctmo) ||
+- (phba->fc_flag & FC_RSCN_MODE)) {
+- lpfc_set_disctmo(phba);
+- }
++ if (timer_pending(&vport->fc_disctmo) ||
++ (vport->fc_flag & FC_RSCN_MODE))
++ lpfc_set_disctmo(vport);
+ }
+
+ phba->fc_stat.elsXmitRetry++;
+@@ -1717,50 +1981,62 @@
+ phba->fc_stat.elsDelayRetry++;
+ ndlp->nlp_retry = cmdiocb->retry;
+
+- mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
++ /* delay is specified in milliseconds */
++ mod_timer(&ndlp->nlp_delayfunc,
++ jiffies + msecs_to_jiffies(delay));
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
++ spin_unlock_irq(shost->host_lock);
+
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ if (cmd == ELS_CMD_PRLI)
++ lpfc_nlp_set_state(vport, ndlp,
++ NLP_STE_REG_LOGIN_ISSUE);
++ else
++ lpfc_nlp_set_state(vport, ndlp,
++ NLP_STE_NPR_NODE);
+ ndlp->nlp_last_elscmd = cmd;
+
+ return 1;
+ }
+ switch (cmd) {
+ case ELS_CMD_FLOGI:
+- lpfc_issue_els_flogi(phba, ndlp, cmdiocb->retry);
++ lpfc_issue_els_flogi(vport, ndlp, cmdiocb->retry);
++ return 1;
++ case ELS_CMD_FDISC:
++ lpfc_issue_els_fdisc(vport, ndlp, cmdiocb->retry);
+ return 1;
+ case ELS_CMD_PLOGI:
+ if (ndlp) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp,
++ lpfc_nlp_set_state(vport, ndlp,
+ NLP_STE_PLOGI_ISSUE);
+ }
+- lpfc_issue_els_plogi(phba, did, cmdiocb->retry);
++ lpfc_issue_els_plogi(vport, did, cmdiocb->retry);
+ return 1;
+ case ELS_CMD_ADISC:
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+- lpfc_issue_els_adisc(phba, ndlp, cmdiocb->retry);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++ lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry);
+ return 1;
+ case ELS_CMD_PRLI:
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
+- lpfc_issue_els_prli(phba, ndlp, cmdiocb->retry);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
++ lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry);
+ return 1;
+ case ELS_CMD_LOGO:
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- lpfc_issue_els_logo(phba, ndlp, cmdiocb->retry);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ lpfc_issue_els_logo(vport, ndlp, cmdiocb->retry);
+ return 1;
+ }
+ }
+
+ /* No retry ELS command <elsCmd> to remote NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0108 No retry ELS command x%x to remote NPORT x%x "
+- "Data: x%x\n",
+- phba->brd_no,
++ "%d (%d):0108 No retry ELS command x%x to remote "
++ "NPORT x%x Data: x%x\n",
++ phba->brd_no, vport->vpi,
+ cmd, did, cmdiocb->retry);
+
+ return 0;
+@@ -1795,33 +2071,36 @@
+ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
+ kfree(buf_ptr);
+ }
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, elsiocb);
+- spin_unlock_irq(phba->host->host_lock);
+ return 0;
+ }
+
+ static void
+-lpfc_cmpl_els_logo_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- struct lpfc_nodelist *ndlp;
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ struct lpfc_vport *vport = cmdiocb->vport;
++ IOCB_t *irsp;
+
+- ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ irsp = &rspiocb->iocb;
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "ACC LOGO cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4], ndlp->nlp_DID);
+
+ /* ACC to LOGO completes to NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0109 ACC to LOGO completes to NPort x%x "
++ "%d (%d):0109 ACC to LOGO completes to NPort x%x "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
+- ndlp->nlp_state, ndlp->nlp_rpi);
++ phba->brd_no, vport->vpi, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+ switch (ndlp->nlp_state) {
+ case NLP_STE_UNUSED_NODE: /* node is just allocated */
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case NLP_STE_NPR_NODE: /* NPort Recovery mode */
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ break;
+ default:
+ break;
+@@ -1830,24 +2109,38 @@
+ return;
+ }
+
++void
++lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++
++ pmb->context1 = NULL;
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free(pmb, phba->mbox_mem_pool);
++ lpfc_nlp_put(ndlp);
++ return;
++}
++
+ static void
+-lpfc_cmpl_els_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_iocbq *rspiocb)
+ {
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ struct lpfc_vport *vport = ndlp ? ndlp->vport : NULL;
++ struct Scsi_Host *shost = vport ? lpfc_shost_from_vport(vport) : NULL;
+ IOCB_t *irsp;
+- struct lpfc_nodelist *ndlp;
+ LPFC_MBOXQ_t *mbox = NULL;
+- struct lpfc_dmabuf *mp;
++ struct lpfc_dmabuf *mp = NULL;
+
+ irsp = &rspiocb->iocb;
+
+- ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+ if (cmdiocb->context_un.mbox)
+ mbox = cmdiocb->context_un.mbox;
+
+-
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba) || !ndlp) {
++ if (!ndlp || lpfc_els_chk_latt(vport)) {
+ if (mbox) {
+ mp = (struct lpfc_dmabuf *) mbox->context1;
+ if (mp) {
+@@ -1859,11 +2152,16 @@
+ goto out;
+ }
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "ACC cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4],
++ irsp->un.rcvels.remoteID);
++
+ /* ELS response tag <ulpIoTag> completes */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0110 ELS response tag x%x completes "
++ "%d (%d):0110 ELS response tag x%x completes "
+ "Data: x%x x%x x%x x%x x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus,
+ rspiocb->iocb.un.ulpWord[4], rspiocb->iocb.ulpTimeout,
+ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
+@@ -1872,11 +2170,19 @@
+ if (mbox) {
+ if ((rspiocb->iocb.ulpStatus == 0)
+ && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) {
+- lpfc_unreg_rpi(phba, ndlp);
+- mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
++ lpfc_unreg_rpi(vport, ndlp);
+ mbox->context2 = lpfc_nlp_get(ndlp);
++ mbox->vport = vport;
++ if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) {
++ mbox->mbox_flag |= LPFC_MBX_IMED_UNREG;
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
++ }
++ else {
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE);
++ lpfc_nlp_set_state(vport, ndlp,
++ NLP_STE_REG_LOGIN_ISSUE);
++ }
+ if (lpfc_sli_issue_mbox(phba, mbox,
+ (MBX_NOWAIT | MBX_STOP_IOCB))
+ != MBX_NOT_FINISHED) {
+@@ -1886,17 +2192,13 @@
+ /* NOTE: we should have messages for unsuccessful
+ reglogin */
+ } else {
+- /* Do not call NO_LIST for lpfc_els_abort'ed ELS cmds */
+- if (!((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN)))) {
+- if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
+- lpfc_drop_node(phba, ndlp);
++ /* Do not drop node for lpfc_els_abort'ed ELS cmds */
++ if (!lpfc_error_lost_link(irsp) &&
++ ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
++ lpfc_drop_node(vport, ndlp);
+ ndlp = NULL;
+ }
+ }
+- }
+ mp = (struct lpfc_dmabuf *) mbox->context1;
+ if (mp) {
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -1906,19 +2208,21 @@
+ }
+ out:
+ if (ndlp) {
+- spin_lock_irq(phba->host->host_lock);
+- ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI);
++ spin_unlock_irq(shost->host_lock);
+ }
+ lpfc_els_free_iocb(phba, cmdiocb);
+ return;
+ }
+
+ int
+-lpfc_els_rsp_acc(struct lpfc_hba * phba, uint32_t flag,
+- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp,
+- LPFC_MBOXQ_t * mbox, uint8_t newnode)
++lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp,
++ LPFC_MBOXQ_t *mbox, uint8_t newnode)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *icmd;
+ IOCB_t *oldcmd;
+ struct lpfc_iocbq *elsiocb;
+@@ -1935,22 +2239,29 @@
+
+ switch (flag) {
+ case ELS_CMD_ACC:
+- cmdsize = sizeof (uint32_t);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ cmdsize = sizeof(uint32_t);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ if (!elsiocb) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_LOGO_ACC;
++ spin_unlock_irq(shost->host_lock);
+ return 1;
+ }
++
+ icmd = &elsiocb->iocb;
+ icmd->ulpContext = oldcmd->ulpContext; /* Xri */
+ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ break;
+ case ELS_CMD_PLOGI:
+- cmdsize = (sizeof (struct serv_parm) + sizeof (uint32_t));
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ cmdsize = (sizeof(struct serv_parm) + sizeof(uint32_t));
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ if (!elsiocb)
+ return 1;
+@@ -1963,12 +2274,16 @@
+ elsiocb->context_un.mbox = mbox;
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint32_t);
+- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++ pcmd += sizeof(uint32_t);
++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC PLOGI: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ break;
+ case ELS_CMD_PRLO:
+- cmdsize = sizeof (uint32_t) + sizeof (PRLO);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++ cmdsize = sizeof(uint32_t) + sizeof(PRLO);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ ndlp, ndlp->nlp_DID, ELS_CMD_PRLO);
+ if (!elsiocb)
+ return 1;
+@@ -1978,10 +2293,14 @@
+ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ memcpy(pcmd, ((struct lpfc_dmabuf *) oldiocb->context2)->virt,
+- sizeof (uint32_t) + sizeof (PRLO));
++ sizeof(uint32_t) + sizeof(PRLO));
+ *((uint32_t *) (pcmd)) = ELS_CMD_PRLO_ACC;
+ els_pkt_ptr = (ELS_PKT *) pcmd;
+ els_pkt_ptr->un.prlo.acceptRspCode = PRLO_REQ_EXECUTED;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC PRLO: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ break;
+ default:
+ return 1;
+@@ -1994,25 +2313,23 @@
+
+ /* Xmit ELS ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0128 Xmit ELS ACC response tag x%x, XRI: x%x, "
++ "%d (%d):0128 Xmit ELS ACC response tag x%x, XRI: x%x, "
+ "DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ phba->brd_no, vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+ if (ndlp->nlp_flag & NLP_LOGO_ACC) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_LOGO_ACC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc;
+ } else {
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ }
+
+ phba->fc_stat.elsXmitACC++;
+- spin_lock_irq(phba->host->host_lock);
+ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -2021,9 +2338,11 @@
+ }
+
+ int
+-lpfc_els_rsp_reject(struct lpfc_hba * phba, uint32_t rejectError,
+- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError,
++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp,
++ LPFC_MBOXQ_t *mbox)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *icmd;
+ IOCB_t *oldcmd;
+ struct lpfc_iocbq *elsiocb;
+@@ -2036,9 +2355,9 @@
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+
+- cmdsize = 2 * sizeof (uint32_t);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+- ndlp, ndlp->nlp_DID, ELS_CMD_LS_RJT);
++ cmdsize = 2 * sizeof(uint32_t);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++ ndlp->nlp_DID, ELS_CMD_LS_RJT);
+ if (!elsiocb)
+ return 1;
+
+@@ -2048,22 +2367,30 @@
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_LS_RJT;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+ *((uint32_t *) (pcmd)) = rejectError;
+
++ if (mbox) {
++ elsiocb->context_un.mbox = mbox;
++ elsiocb->context1 = lpfc_nlp_get(ndlp);
++ }
++
+ /* Xmit ELS RJT <err> response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0129 Xmit ELS RJT x%x response tag x%x xri x%x, "
+- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+- phba->brd_no, rejectError, elsiocb->iotag,
++ "%d (%d):0129 Xmit ELS RJT x%x response tag x%x "
++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++ "rpi x%x\n",
++ phba->brd_no, vport->vpi, rejectError, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue LS_RJT: did:x%x flg:x%x err:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, rejectError);
++
+ phba->fc_stat.elsXmitLSRJT++;
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
+- spin_lock_irq(phba->host->host_lock);
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -2072,25 +2399,22 @@
+ }
+
+ int
+-lpfc_els_rsp_adisc_acc(struct lpfc_hba * phba,
+- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
++ struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ ADISC *ap;
+- IOCB_t *icmd;
+- IOCB_t *oldcmd;
++ IOCB_t *icmd, *oldcmd;
+ struct lpfc_iocbq *elsiocb;
+- struct lpfc_sli_ring *pring;
+- struct lpfc_sli *psli;
+ uint8_t *pcmd;
+ uint16_t cmdsize;
+ int rc;
+
+- psli = &phba->sli;
+- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+-
+- cmdsize = sizeof (uint32_t) + sizeof (ADISC);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+- ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
++ cmdsize = sizeof(uint32_t) + sizeof(ADISC);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++ ndlp->nlp_DID, ELS_CMD_ACC);
+ if (!elsiocb)
+ return 1;
+
+@@ -2100,28 +2424,30 @@
+
+ /* Xmit ADISC ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0130 Xmit ADISC ACC response iotag x%x xri: "
++ "%d (%d):0130 Xmit ADISC ACC response iotag x%x xri: "
+ "x%x, did x%x, nlp_flag x%x, nlp_state x%x rpi x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ phba->brd_no, vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ ap = (ADISC *) (pcmd);
+ ap->hardAL_PA = phba->fc_pref_ALPA;
+- memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+- memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+- ap->DID = be32_to_cpu(phba->fc_myDID);
++ memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++ memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++ ap->DID = be32_to_cpu(vport->fc_myDID);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC ADISC: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
+
+ phba->fc_stat.elsXmitACC++;
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
+- spin_lock_irq(phba->host->host_lock);
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -2130,9 +2456,10 @@
+ }
+
+ int
+-lpfc_els_rsp_prli_acc(struct lpfc_hba *phba, struct lpfc_iocbq *oldiocb,
++lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
+ struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ PRLI *npr;
+ lpfc_vpd_t *vpd;
+ IOCB_t *icmd;
+@@ -2147,8 +2474,8 @@
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
+
+- cmdsize = sizeof (uint32_t) + sizeof (PRLI);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ndlp,
++ cmdsize = sizeof(uint32_t) + sizeof(PRLI);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
+ ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)));
+ if (!elsiocb)
+ return 1;
+@@ -2159,19 +2486,19 @@
+
+ /* Xmit PRLI ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0131 Xmit PRLI ACC response tag x%x xri x%x, "
++ "%d (%d):0131 Xmit PRLI ACC response tag x%x xri x%x, "
+ "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ phba->brd_no, vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+ /* For PRLI, remainder of payload is PRLI parameter page */
+- memset(pcmd, 0, sizeof (PRLI));
++ memset(pcmd, 0, sizeof(PRLI));
+
+ npr = (PRLI *) pcmd;
+ vpd = &phba->vpd;
+@@ -2193,12 +2520,14 @@
+ npr->prliType = PRLI_FCP_TYPE;
+ npr->initiatorFunc = 1;
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC PRLI: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
+ phba->fc_stat.elsXmitACC++;
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+
+- spin_lock_irq(phba->host->host_lock);
+ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -2207,12 +2536,12 @@
+ }
+
+ static int
+-lpfc_els_rsp_rnid_acc(struct lpfc_hba *phba, uint8_t format,
++lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
+ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ RNID *rn;
+- IOCB_t *icmd;
+- IOCB_t *oldcmd;
++ IOCB_t *icmd, *oldcmd;
+ struct lpfc_iocbq *elsiocb;
+ struct lpfc_sli_ring *pring;
+ struct lpfc_sli *psli;
+@@ -2223,13 +2552,13 @@
+ psli = &phba->sli;
+ pring = &psli->ring[LPFC_ELS_RING];
+
+- cmdsize = sizeof (uint32_t) + sizeof (uint32_t)
+- + (2 * sizeof (struct lpfc_name));
++ cmdsize = sizeof(uint32_t) + sizeof(uint32_t)
++ + (2 * sizeof(struct lpfc_name));
+ if (format)
+- cmdsize += sizeof (RNID_TOP_DISC);
++ cmdsize += sizeof(RNID_TOP_DISC);
+
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+- ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++ ndlp->nlp_DID, ELS_CMD_ACC);
+ if (!elsiocb)
+ return 1;
+
+@@ -2239,30 +2568,30 @@
+
+ /* Xmit RNID ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0132 Xmit RNID ACC response tag x%x "
++ "%d (%d):0132 Xmit RNID ACC response tag x%x "
+ "xri x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ phba->brd_no, vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext);
+
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint32_t);
++ pcmd += sizeof(uint32_t);
+
+- memset(pcmd, 0, sizeof (RNID));
++ memset(pcmd, 0, sizeof(RNID));
+ rn = (RNID *) (pcmd);
+ rn->Format = format;
+- rn->CommonLen = (2 * sizeof (struct lpfc_name));
+- memcpy(&rn->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+- memcpy(&rn->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++ rn->CommonLen = (2 * sizeof(struct lpfc_name));
++ memcpy(&rn->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++ memcpy(&rn->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
+ switch (format) {
+ case 0:
+ rn->SpecificLen = 0;
+ break;
+ case RNID_TOPOLOGY_DISC:
+- rn->SpecificLen = sizeof (RNID_TOP_DISC);
++ rn->SpecificLen = sizeof(RNID_TOP_DISC);
+ memcpy(&rn->un.topologyDisc.portName,
+- &phba->fc_portname, sizeof (struct lpfc_name));
++ &vport->fc_portname, sizeof(struct lpfc_name));
+ rn->un.topologyDisc.unitType = RNID_HBA;
+ rn->un.topologyDisc.physPort = 0;
+ rn->un.topologyDisc.attachedNodes = 0;
+@@ -2273,15 +2602,17 @@
+ break;
+ }
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++ "Issue ACC RNID: did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
+ phba->fc_stat.elsXmitACC++;
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ lpfc_nlp_put(ndlp);
+ elsiocb->context1 = NULL; /* Don't need ndlp for cmpl,
+ * it could be freed */
+
+- spin_lock_irq(phba->host->host_lock);
+ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+- spin_unlock_irq(phba->host->host_lock);
+ if (rc == IOCB_ERROR) {
+ lpfc_els_free_iocb(phba, elsiocb);
+ return 1;
+@@ -2290,168 +2621,153 @@
+ }
+
+ int
+-lpfc_els_disc_adisc(struct lpfc_hba *phba)
++lpfc_els_disc_adisc(struct lpfc_vport *vport)
+ {
+- int sentadisc;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp, *next_ndlp;
++ int sentadisc = 0;
+
+- sentadisc = 0;
+ /* go thru NPR nodes and issue any remaining ELS ADISCs */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+ (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+ (ndlp->nlp_flag & NLP_NPR_ADISC) != 0) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+- lpfc_issue_els_adisc(phba, ndlp, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++ lpfc_issue_els_adisc(vport, ndlp, 0);
+ sentadisc++;
+- phba->num_disc_nodes++;
+- if (phba->num_disc_nodes >=
+- phba->cfg_discovery_threads) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_NLP_MORE;
+- spin_unlock_irq(phba->host->host_lock);
++ vport->num_disc_nodes++;
++ if (vport->num_disc_nodes >=
++ vport->phba->cfg_discovery_threads) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_NLP_MORE;
++ spin_unlock_irq(shost->host_lock);
+ break;
+ }
+ }
+ }
+ if (sentadisc == 0) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_NLP_MORE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NLP_MORE;
++ spin_unlock_irq(shost->host_lock);
+ }
+ return sentadisc;
+ }
+
+ int
+-lpfc_els_disc_plogi(struct lpfc_hba * phba)
++lpfc_els_disc_plogi(struct lpfc_vport *vport)
+ {
+- int sentplogi;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp, *next_ndlp;
++ int sentplogi = 0;
+
+- sentplogi = 0;
+- /* go thru NPR list and issue any remaining ELS PLOGIs */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
++ /* go thru NPR nodes and issue any remaining ELS PLOGIs */
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+ (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+ (ndlp->nlp_flag & NLP_DELAY_TMO) == 0 &&
+ (ndlp->nlp_flag & NLP_NPR_ADISC) == 0) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ sentplogi++;
+- phba->num_disc_nodes++;
+- if (phba->num_disc_nodes >=
+- phba->cfg_discovery_threads) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_NLP_MORE;
+- spin_unlock_irq(phba->host->host_lock);
++ vport->num_disc_nodes++;
++ if (vport->num_disc_nodes >=
++ vport->phba->cfg_discovery_threads) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_NLP_MORE;
++ spin_unlock_irq(shost->host_lock);
+ break;
+ }
+ }
+ }
+ if (sentplogi == 0) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_NLP_MORE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NLP_MORE;
++ spin_unlock_irq(shost->host_lock);
+ }
+ return sentplogi;
+ }
+
+-int
+-lpfc_els_flush_rscn(struct lpfc_hba * phba)
++void
++lpfc_els_flush_rscn(struct lpfc_vport *vport)
+ {
+- struct lpfc_dmabuf *mp;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ int i;
+
+- for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
+- mp = phba->fc_rscn_id_list[i];
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- phba->fc_rscn_id_list[i] = NULL;
+- }
+- phba->fc_rscn_id_cnt = 0;
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_can_disctmo(phba);
+- return 0;
++ for (i = 0; i < vport->fc_rscn_id_cnt; i++) {
++ lpfc_in_buf_free(phba, vport->fc_rscn_id_list[i]);
++ vport->fc_rscn_id_list[i] = NULL;
++ }
++ spin_lock_irq(shost->host_lock);
++ vport->fc_rscn_id_cnt = 0;
++ vport->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
+ }
+
+ int
+-lpfc_rscn_payload_check(struct lpfc_hba * phba, uint32_t did)
++lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did)
+ {
+ D_ID ns_did;
+ D_ID rscn_did;
+- struct lpfc_dmabuf *mp;
+ uint32_t *lp;
+- uint32_t payload_len, cmd, i, match;
++ uint32_t payload_len, i;
++ struct lpfc_hba *phba = vport->phba;
+
+ ns_did.un.word = did;
+- match = 0;
+
+ /* Never match fabric nodes for RSCNs */
+ if ((did & Fabric_DID_MASK) == Fabric_DID_MASK)
+- return(0);
++ return 0;
+
+ /* If we are doing a FULL RSCN rediscovery, match everything */
+- if (phba->fc_flag & FC_RSCN_DISCOVERY) {
++ if (vport->fc_flag & FC_RSCN_DISCOVERY)
+ return did;
+- }
+
+- for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
+- mp = phba->fc_rscn_id_list[i];
+- lp = (uint32_t *) mp->virt;
+- cmd = *lp++;
+- payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */
+- payload_len -= sizeof (uint32_t); /* take off word 0 */
++ for (i = 0; i < vport->fc_rscn_id_cnt; i++) {
++ lp = vport->fc_rscn_id_list[i]->virt;
++ payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK);
++ payload_len -= sizeof(uint32_t); /* take off word 0 */
+ while (payload_len) {
+- rscn_did.un.word = *lp++;
+- rscn_did.un.word = be32_to_cpu(rscn_did.un.word);
+- payload_len -= sizeof (uint32_t);
++ rscn_did.un.word = be32_to_cpu(*lp++);
++ payload_len -= sizeof(uint32_t);
+ switch (rscn_did.un.b.resv) {
+ case 0: /* Single N_Port ID effected */
+- if (ns_did.un.word == rscn_did.un.word) {
+- match = did;
+- }
++ if (ns_did.un.word == rscn_did.un.word)
++ return did;
+ break;
+ case 1: /* Whole N_Port Area effected */
+ if ((ns_did.un.b.domain == rscn_did.un.b.domain)
+ && (ns_did.un.b.area == rscn_did.un.b.area))
+- {
+- match = did;
+- }
++ return did;
+ break;
+ case 2: /* Whole N_Port Domain effected */
+ if (ns_did.un.b.domain == rscn_did.un.b.domain)
+- {
+- match = did;
+- }
+- break;
+- case 3: /* Whole Fabric effected */
+- match = did;
++ return did;
+ break;
+ default:
+- /* Unknown Identifier in RSCN list */
++ /* Unknown Identifier in RSCN node */
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0217 Unknown Identifier in "
+- "RSCN payload Data: x%x\n",
+- phba->brd_no, rscn_did.un.word);
+- break;
+- }
+- if (match) {
+- break;
++ "%d (%d):0217 Unknown "
++ "Identifier in RSCN payload "
++ "Data: x%x\n",
++ phba->brd_no, vport->vpi,
++ rscn_did.un.word);
++ case 3: /* Whole Fabric effected */
++ return did;
+ }
+ }
+ }
+- return match;
++ return 0;
+ }
+
+ static int
+-lpfc_rscn_recovery_check(struct lpfc_hba *phba)
++lpfc_rscn_recovery_check(struct lpfc_vport *vport)
+ {
+ struct lpfc_nodelist *ndlp = NULL;
+
+@@ -2459,12 +2775,12 @@
+ * them to NPR state.
+ */
+
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE ||
+- lpfc_rscn_payload_check(phba, ndlp->nlp_DID) == 0)
++ lpfc_rscn_payload_check(vport, ndlp->nlp_DID) == 0)
+ continue;
+
+- lpfc_disc_state_machine(phba, ndlp, NULL,
++ lpfc_disc_state_machine(vport, ndlp, NULL,
+ NLP_EVT_DEVICE_RECOVERY);
+
+ /*
+@@ -2472,175 +2788,248 @@
+ * recovery event.
+ */
+ if (ndlp->nlp_flag & NLP_DELAY_TMO)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ }
+
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_rscn(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb,
+- struct lpfc_nodelist * ndlp, uint8_t newnode)
++lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp, uint8_t newnode)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *pcmd;
+- uint32_t *lp;
++ struct lpfc_vport *next_vport;
++ uint32_t *lp, *datap;
+ IOCB_t *icmd;
+- uint32_t payload_len, cmd;
++ uint32_t payload_len, length, nportid, *cmd;
++ int rscn_cnt = vport->fc_rscn_id_cnt;
++ int rscn_id = 0, hba_id = 0;
+ int i;
+
+ icmd = &cmdiocb->iocb;
+ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ lp = (uint32_t *) pcmd->virt;
+
+- cmd = *lp++;
+- payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */
+- payload_len -= sizeof (uint32_t); /* take off word 0 */
+- cmd &= ELS_CMD_MASK;
++ payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK);
++ payload_len -= sizeof(uint32_t); /* take off word 0 */
+
+ /* RSCN received */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0214 RSCN received Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
+- phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt);
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0214 RSCN received Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi, vport->fc_flag, payload_len,
++ *lp, rscn_cnt);
+
+ for (i = 0; i < payload_len/sizeof(uint32_t); i++)
+- fc_host_post_event(phba->host, fc_get_event_number(),
++ fc_host_post_event(shost, fc_get_event_number(),
+ FCH_EVT_RSCN, lp[i]);
+
+ /* If we are about to begin discovery, just ACC the RSCN.
+ * Discovery processing will satisfy it.
+ */
+- if (phba->hba_state <= LPFC_NS_QRY) {
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++ if (vport->port_state <= LPFC_NS_QRY) {
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RSCN ignore: did:x%x/ste:x%x flg:x%x",
++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
+ newnode);
+ return 0;
+ }
+
++ /* If this RSCN just contains NPortIDs for other vports on this HBA,
++ * just ACC and ignore it.
++ */
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ !(phba->cfg_peer_port_login)) {
++ i = payload_len;
++ datap = lp;
++ while (i > 0) {
++ nportid = *datap++;
++ nportid = ((be32_to_cpu(nportid)) & Mask_DID);
++ i -= sizeof(uint32_t);
++ rscn_id++;
++ list_for_each_entry(next_vport, &phba->port_list,
++ listentry) {
++ if (nportid == next_vport->fc_myDID) {
++ hba_id++;
++ break;
++ }
++ }
++ }
++ if (rscn_id == hba_id) {
++ /* ALL NPortIDs in RSCN are on HBA */
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0214 Ignore RSCN Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi, vport->fc_flag, payload_len,
++ *lp, rscn_cnt);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RSCN vport: did:x%x/ste:x%x flg:x%x",
++ ndlp->nlp_DID, vport->port_state,
++ ndlp->nlp_flag);
++
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb,
++ ndlp, NULL, newnode);
++ return 0;
++ }
++ }
++
+ /* If we are already processing an RSCN, save the received
+ * RSCN payload buffer, cmdiocb->context2 to process later.
+ */
+- if (phba->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
+- if ((phba->fc_rscn_id_cnt < FC_MAX_HOLD_RSCN) &&
+- !(phba->fc_flag & FC_RSCN_DISCOVERY)) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_RSCN_MODE;
+- spin_unlock_irq(phba->host->host_lock);
+- phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
+-
++ if (vport->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RSCN defer: did:x%x/ste:x%x flg:x%x",
++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++ vport->fc_flag |= FC_RSCN_DEFERRED;
++ if ((rscn_cnt < FC_MAX_HOLD_RSCN) &&
++ !(vport->fc_flag & FC_RSCN_DISCOVERY)) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
++ if (rscn_cnt) {
++ cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt;
++ length = be32_to_cpu(*cmd & ~ELS_CMD_MASK);
++ }
++ if ((rscn_cnt) &&
++ (payload_len + length <= LPFC_BPL_SIZE)) {
++ *cmd &= ELS_CMD_MASK;
++ *cmd |= be32_to_cpu(payload_len + length);
++ memcpy(((uint8_t *)cmd) + length, lp,
++ payload_len);
++ } else {
++ vport->fc_rscn_id_list[rscn_cnt] = pcmd;
++ vport->fc_rscn_id_cnt++;
+ /* If we zero, cmdiocb->context2, the calling
+ * routine will not try to free it.
+ */
+ cmdiocb->context2 = NULL;
++ }
+
+ /* Deferred RSCN */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0235 Deferred RSCN "
++ "%d (%d):0235 Deferred RSCN "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->fc_rscn_id_cnt,
+- phba->fc_flag, phba->hba_state);
++ phba->brd_no, vport->vpi,
++ vport->fc_rscn_id_cnt, vport->fc_flag,
++ vport->port_state);
+ } else {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_RSCN_DISCOVERY;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_RSCN_DISCOVERY;
++ spin_unlock_irq(shost->host_lock);
+ /* ReDiscovery RSCN */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0234 ReDiscovery RSCN "
++ "%d (%d):0234 ReDiscovery RSCN "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->fc_rscn_id_cnt,
+- phba->fc_flag, phba->hba_state);
++ phba->brd_no, vport->vpi,
++ vport->fc_rscn_id_cnt, vport->fc_flag,
++ vport->port_state);
+ }
+ /* Send back ACC */
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
+ newnode);
+
+ /* send RECOVERY event for ALL nodes that match RSCN payload */
+- lpfc_rscn_recovery_check(phba);
++ lpfc_rscn_recovery_check(vport);
++ vport->fc_flag &= ~FC_RSCN_DEFERRED;
+ return 0;
+ }
+
+- phba->fc_flag |= FC_RSCN_MODE;
+- phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RSCN: did:x%x/ste:x%x flg:x%x",
++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
++ vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd;
+ /*
+ * If we zero, cmdiocb->context2, the calling routine will
+ * not try to free it.
+ */
+ cmdiocb->context2 = NULL;
+
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
+
+ /* Send back ACC */
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode);
+
+ /* send RECOVERY event for ALL nodes that match RSCN payload */
+- lpfc_rscn_recovery_check(phba);
++ lpfc_rscn_recovery_check(vport);
+
+- return lpfc_els_handle_rscn(phba);
++ return lpfc_els_handle_rscn(vport);
+ }
+
+ int
+-lpfc_els_handle_rscn(struct lpfc_hba * phba)
++lpfc_els_handle_rscn(struct lpfc_vport *vport)
+ {
+ struct lpfc_nodelist *ndlp;
++ struct lpfc_hba *phba = vport->phba;
++
++ /* Ignore RSCN if the port is being torn down. */
++ if (vport->load_flag & FC_UNLOADING) {
++ lpfc_els_flush_rscn(vport);
++ return 0;
++ }
+
+ /* Start timer for RSCN processing */
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
+
+ /* RSCN processed */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0215 RSCN processed Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
+- phba->fc_flag, 0, phba->fc_rscn_id_cnt,
+- phba->hba_state);
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0215 RSCN processed Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
++ vport->fc_flag, 0, vport->fc_rscn_id_cnt,
++ vport->port_state);
+
+ /* To process RSCN, first compare RSCN data with NameServer */
+- phba->fc_ns_retry = 0;
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
++ vport->fc_ns_retry = 0;
++ ndlp = lpfc_findnode_did(vport, NameServer_DID);
+ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+ /* Good ndlp, issue CT Request to NameServer */
+- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == 0) {
++ if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0)
+ /* Wait for NameServer query cmpl before we can
+ continue */
+ return 1;
+- }
+ } else {
+ /* If login to NameServer does not exist, issue one */
+ /* Good status, issue PLOGI to NameServer */
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
+- if (ndlp) {
++ ndlp = lpfc_findnode_did(vport, NameServer_DID);
++ if (ndlp)
+ /* Wait for NameServer login cmpl before we can
+ continue */
+ return 1;
+- }
++
+ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ if (!ndlp) {
+- lpfc_els_flush_rscn(phba);
++ lpfc_els_flush_rscn(vport);
+ return 0;
+ } else {
+- lpfc_nlp_init(phba, ndlp, NameServer_DID);
++ lpfc_nlp_init(vport, ndlp, NameServer_DID);
+ ndlp->nlp_type |= NLP_FABRIC;
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, NameServer_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_issue_els_plogi(vport, NameServer_DID, 0);
+ /* Wait for NameServer login cmpl before we can
+ continue */
+ return 1;
+ }
+ }
+
+- lpfc_els_flush_rscn(phba);
++ lpfc_els_flush_rscn(vport);
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_flogi(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb,
+- struct lpfc_nodelist * ndlp, uint8_t newnode)
++lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp, uint8_t newnode)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ uint32_t *lp = (uint32_t *) pcmd->virt;
+ IOCB_t *icmd = &cmdiocb->iocb;
+@@ -2655,7 +3044,7 @@
+
+ /* FLOGI received */
+
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
+
+ if (phba->fc_topology == TOPOLOGY_LOOP) {
+ /* We should never receive a FLOGI in loop mode, ignore it */
+@@ -2664,33 +3053,34 @@
+ /* An FLOGI ELS command <elsCmd> was received from DID <did> in
+ Loop Mode */
+ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+- "%d:0113 An FLOGI ELS command x%x was received "
+- "from DID x%x in Loop Mode\n",
+- phba->brd_no, cmd, did);
++ "%d (%d):0113 An FLOGI ELS command x%x was "
++ "received from DID x%x in Loop Mode\n",
++ phba->brd_no, vport->vpi, cmd, did);
+ return 1;
+ }
+
+ did = Fabric_DID;
+
+- if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) {
++ if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3))) {
+ /* For a FLOGI we accept, then if our portname is greater
+ * then the remote portname we initiate Nport login.
+ */
+
+- rc = memcmp(&phba->fc_portname, &sp->portName,
+- sizeof (struct lpfc_name));
++ rc = memcmp(&vport->fc_portname, &sp->portName,
++ sizeof(struct lpfc_name));
+
+ if (!rc) {
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool,
+- GFP_KERNEL)) == 0) {
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox)
+ return 1;
+- }
++
+ lpfc_linkdown(phba);
+ lpfc_init_link(phba, mbox,
+ phba->cfg_topology,
+ phba->cfg_link_speed);
+ mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ mbox->vport = vport;
+ rc = lpfc_sli_issue_mbox
+ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
+ lpfc_set_loopback_flag(phba);
+@@ -2699,31 +3089,34 @@
+ }
+ return 1;
+ } else if (rc > 0) { /* greater than */
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_PT2PT_PLOGI;
+- spin_unlock_irq(phba->host->host_lock);
+- }
+- phba->fc_flag |= FC_PT2PT;
+- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_PT2PT_PLOGI;
++ spin_unlock_irq(shost->host_lock);
++ }
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_PT2PT;
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_unlock_irq(shost->host_lock);
+ } else {
+ /* Reject this request because invalid parameters */
+ stat.un.b.lsRjtRsvd0 = 0;
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ return 1;
+ }
+
+ /* Send back ACC */
+- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode);
+
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_rnid(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rnid(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp)
+ {
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+@@ -2746,7 +3139,7 @@
+ case 0:
+ case RNID_TOPOLOGY_DISC:
+ /* Send back ACC */
+- lpfc_els_rsp_rnid_acc(phba, rn->Format, cmdiocb, ndlp);
++ lpfc_els_rsp_rnid_acc(vport, rn->Format, cmdiocb, ndlp);
+ break;
+ default:
+ /* Reject this request because format not supported */
+@@ -2754,13 +3147,14 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ }
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_lirr(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++lpfc_els_rcv_lirr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_nodelist *ndlp)
+ {
+ struct ls_rjt stat;
+@@ -2770,15 +3164,15 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ return 0;
+ }
+
+ static void
+ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ MAILBOX_t *mb;
+ IOCB_t *icmd;
+ RPS_RSP *rps_rsp;
+@@ -2788,8 +3182,6 @@
+ uint16_t xri, status;
+ uint32_t cmdsize;
+
+- psli = &phba->sli;
+- pring = &psli->ring[LPFC_ELS_RING];
+ mb = &pmb->mb;
+
+ ndlp = (struct lpfc_nodelist *) pmb->context2;
+@@ -2804,7 +3196,8 @@
+
+ cmdsize = sizeof(RPS_RSP) + sizeof(uint32_t);
+ mempool_free(pmb, phba->mbox_mem_pool);
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, lpfc_max_els_tries, ndlp,
++ elsiocb = lpfc_prep_els_iocb(phba->pport, 0, cmdsize,
++ lpfc_max_els_tries, ndlp,
+ ndlp->nlp_DID, ELS_CMD_ACC);
+ lpfc_nlp_put(ndlp);
+ if (!elsiocb)
+@@ -2815,14 +3208,14 @@
+
+ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint32_t); /* Skip past command */
++ pcmd += sizeof(uint32_t); /* Skip past command */
+ rps_rsp = (RPS_RSP *)pcmd;
+
+ if (phba->fc_topology != TOPOLOGY_LOOP)
+ status = 0x10;
+ else
+ status = 0x8;
+- if (phba->fc_flag & FC_FABRIC)
++ if (phba->pport->fc_flag & FC_FABRIC)
+ status |= 0x4;
+
+ rps_rsp->rsvd1 = 0;
+@@ -2836,25 +3229,25 @@
+
+ /* Xmit ELS RPS ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0118 Xmit ELS RPS ACC response tag x%x xri x%x, "
+- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ "%d (%d):0118 Xmit ELS RPS ACC response tag x%x "
++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++ "rpi x%x\n",
++ phba->brd_no, ndlp->vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ phba->fc_stat.elsXmitACC++;
+-
+- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR)
+ lpfc_els_free_iocb(phba, elsiocb);
+- }
+ return;
+ }
+
+ static int
+-lpfc_els_rcv_rps(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t *lp;
+ uint8_t flag;
+ LPFC_MBOXQ_t *mbox;
+@@ -2868,7 +3261,8 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ }
+
+ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+@@ -2878,19 +3272,24 @@
+
+ if ((flag == 0) ||
+ ((flag == 1) && (be32_to_cpu(rps->un.portNum) == 0)) ||
+- ((flag == 2) && (memcmp(&rps->un.portName, &phba->fc_portname,
+- sizeof (struct lpfc_name)) == 0))) {
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++ ((flag == 2) && (memcmp(&rps->un.portName, &vport->fc_portname,
++ sizeof(struct lpfc_name)) == 0))) {
++
++ printk("Fix me....\n");
++ dump_stack();
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
++ if (mbox) {
+ lpfc_read_lnk_stat(phba, mbox);
+ mbox->context1 =
+- (void *)((unsigned long)cmdiocb->iocb.ulpContext);
++ (void *)((unsigned long) cmdiocb->iocb.ulpContext);
+ mbox->context2 = lpfc_nlp_get(ndlp);
++ mbox->vport = vport;
+ mbox->mbox_cmpl = lpfc_els_rsp_rps_acc;
+ if (lpfc_sli_issue_mbox (phba, mbox,
+- (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) {
++ (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED)
+ /* Mbox completion will send ELS Response */
+ return 0;
+- }
++
+ lpfc_nlp_put(ndlp);
+ mempool_free(mbox, phba->mbox_mem_pool);
+ }
+@@ -2899,27 +3298,25 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ return 0;
+ }
+
+ static int
+-lpfc_els_rsp_rpl_acc(struct lpfc_hba * phba, uint16_t cmdsize,
+- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize,
++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp)
+ {
+- IOCB_t *icmd;
+- IOCB_t *oldcmd;
++ struct lpfc_hba *phba = vport->phba;
++ IOCB_t *icmd, *oldcmd;
+ RPL_RSP rpl_rsp;
+ struct lpfc_iocbq *elsiocb;
+- struct lpfc_sli_ring *pring;
+- struct lpfc_sli *psli;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ uint8_t *pcmd;
+
+- psli = &phba->sli;
+- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */
++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++ ndlp->nlp_DID, ELS_CMD_ACC);
+
+- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+- ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ if (!elsiocb)
+ return 1;
+
+@@ -2929,7 +3326,7 @@
+
+ pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ *((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+- pcmd += sizeof (uint16_t);
++ pcmd += sizeof(uint16_t);
+ *((uint16_t *)(pcmd)) = be16_to_cpu(cmdsize);
+ pcmd += sizeof(uint16_t);
+
+@@ -2937,8 +3334,8 @@
+ rpl_rsp.listLen = be32_to_cpu(1);
+ rpl_rsp.index = 0;
+ rpl_rsp.port_num_blk.portNum = 0;
+- rpl_rsp.port_num_blk.portID = be32_to_cpu(phba->fc_myDID);
+- memcpy(&rpl_rsp.port_num_blk.portName, &phba->fc_portname,
++ rpl_rsp.port_num_blk.portID = be32_to_cpu(vport->fc_myDID);
++ memcpy(&rpl_rsp.port_num_blk.portName, &vport->fc_portname,
+ sizeof(struct lpfc_name));
+
+ memcpy(pcmd, &rpl_rsp, cmdsize - sizeof(uint32_t));
+@@ -2946,13 +3343,14 @@
+
+ /* Xmit ELS RPL ACC response tag <ulpIoTag> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0120 Xmit ELS RPL ACC response tag x%x xri x%x, "
+- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+- phba->brd_no, elsiocb->iotag,
++ "%d (%d):0120 Xmit ELS RPL ACC response tag x%x "
++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++ "rpi x%x\n",
++ phba->brd_no, vport->vpi, elsiocb->iotag,
+ elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+
+ phba->fc_stat.elsXmitACC++;
+ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+@@ -2963,8 +3361,8 @@
+ }
+
+ static int
+-lpfc_els_rcv_rpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rpl(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp)
+ {
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+@@ -2979,7 +3377,8 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ }
+
+ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+@@ -2996,15 +3395,16 @@
+ } else {
+ cmdsize = sizeof(uint32_t) + maxsize * sizeof(uint32_t);
+ }
+- lpfc_els_rsp_rpl_acc(phba, cmdsize, cmdiocb, ndlp);
++ lpfc_els_rsp_rpl_acc(vport, cmdsize, cmdiocb, ndlp);
+
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_farp(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_farp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+ IOCB_t *icmd;
+@@ -3020,11 +3420,9 @@
+ fp = (FARP *) lp;
+
+ /* FARP-REQ received from DID <did> */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0601 FARP-REQ received from DID x%x\n",
+- phba->brd_no, did);
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0601 FARP-REQ received from DID x%x\n",
++ phba->brd_no, vport->vpi, did);
+
+ /* We will only support match on WWPN or WWNN */
+ if (fp->Mflags & ~(FARP_MATCH_NODE | FARP_MATCH_PORT)) {
+@@ -3034,15 +3432,15 @@
+ cnt = 0;
+ /* If this FARP command is searching for my portname */
+ if (fp->Mflags & FARP_MATCH_PORT) {
+- if (memcmp(&fp->RportName, &phba->fc_portname,
+- sizeof (struct lpfc_name)) == 0)
++ if (memcmp(&fp->RportName, &vport->fc_portname,
++ sizeof(struct lpfc_name)) == 0)
+ cnt = 1;
+ }
+
+ /* If this FARP command is searching for my nodename */
+ if (fp->Mflags & FARP_MATCH_NODE) {
+- if (memcmp(&fp->RnodeName, &phba->fc_nodename,
+- sizeof (struct lpfc_name)) == 0)
++ if (memcmp(&fp->RnodeName, &vport->fc_nodename,
++ sizeof(struct lpfc_name)) == 0)
+ cnt = 1;
+ }
+
+@@ -3052,28 +3450,28 @@
+ /* Log back into the node before sending the FARP. */
+ if (fp->Rflags & FARP_REQUEST_PLOGI) {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp,
++ lpfc_nlp_set_state(vport, ndlp,
+ NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ }
+
+ /* Send a FARP response to that node */
+- if (fp->Rflags & FARP_REQUEST_FARPR) {
+- lpfc_issue_els_farpr(phba, did, 0);
+- }
++ if (fp->Rflags & FARP_REQUEST_FARPR)
++ lpfc_issue_els_farpr(vport, did, 0);
+ }
+ }
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_farpr(struct lpfc_hba * phba,
+- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *ndlp)
+ {
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+ IOCB_t *icmd;
+ uint32_t cmd, did;
++ struct lpfc_hba *phba = vport->phba;
+
+ icmd = &cmdiocb->iocb;
+ did = icmd->un.elsreq64.remoteID;
+@@ -3082,21 +3480,18 @@
+
+ cmd = *lp++;
+ /* FARP-RSP received from DID <did> */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0600 FARP-RSP received from DID x%x\n",
+- phba->brd_no, did);
+-
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0600 FARP-RSP received from DID x%x\n",
++ phba->brd_no, vport->vpi, did);
+ /* ACCEPT the Farp resp request */
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+
+ return 0;
+ }
+
+ static int
+-lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_nodelist * fan_ndlp)
++lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_nodelist *fan_ndlp)
+ {
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+@@ -3104,10 +3499,12 @@
+ uint32_t cmd, did;
+ FAN *fp;
+ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct lpfc_hba *phba = vport->phba;
+
+ /* FAN received */
+- lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:0265 FAN received\n",
+- phba->brd_no);
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0265 FAN received\n",
++ phba->brd_no, vport->vpi);
+
+ icmd = &cmdiocb->iocb;
+ did = icmd->un.elsreq64.remoteID;
+@@ -3115,11 +3512,11 @@
+ lp = (uint32_t *)pcmd->virt;
+
+ cmd = *lp++;
+- fp = (FAN *)lp;
++ fp = (FAN *) lp;
+
+ /* FAN received; Fan does not have a reply sequence */
+
+- if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++ if (phba->pport->port_state == LPFC_LOCAL_CFG_LINK) {
+ if ((memcmp(&phba->fc_fabparam.nodeName, &fp->FnodeName,
+ sizeof(struct lpfc_name)) != 0) ||
+ (memcmp(&phba->fc_fabparam.portName, &fp->FportName,
+@@ -3130,7 +3527,7 @@
+ */
+
+ list_for_each_entry_safe(ndlp, next_ndlp,
+- &phba->fc_nodes, nlp_listp) {
++ &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ continue;
+ if (ndlp->nlp_type & NLP_FABRIC) {
+@@ -3138,24 +3535,24 @@
+ * Clean up old Fabric, Nameserver and
+ * other NLP_FABRIC logins
+ */
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+ /* Fail outstanding I/O now since this
+ * device is marked for PLOGI
+ */
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ }
+ }
+
+- phba->hba_state = LPFC_FLOGI;
+- lpfc_set_disctmo(phba);
+- lpfc_initial_flogi(phba);
++ vport->port_state = LPFC_FLOGI;
++ lpfc_set_disctmo(vport);
++ lpfc_initial_flogi(vport);
+ return 0;
+ }
+ /* Discovery not needed,
+ * move the nodes to their original state.
+ */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ nlp_listp) {
+ if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ continue;
+@@ -3163,13 +3560,13 @@
+ switch (ndlp->nlp_prev_state) {
+ case NLP_STE_UNMAPPED_NODE:
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp,
++ lpfc_nlp_set_state(vport, ndlp,
+ NLP_STE_UNMAPPED_NODE);
+ break;
+
+ case NLP_STE_MAPPED_NODE:
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp,
++ lpfc_nlp_set_state(vport, ndlp,
+ NLP_STE_MAPPED_NODE);
+ break;
+
+@@ -3179,7 +3576,7 @@
+ }
+
+ /* Start discovery - this should just do CLEAR_LA */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
+ }
+ return 0;
+ }
+@@ -3187,42 +3584,42 @@
+ void
+ lpfc_els_timeout(unsigned long ptr)
+ {
+- struct lpfc_hba *phba;
++ struct lpfc_vport *vport = (struct lpfc_vport *) ptr;
++ struct lpfc_hba *phba = vport->phba;
+ unsigned long iflag;
+
+- phba = (struct lpfc_hba *)ptr;
+- if (phba == 0)
+- return;
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
+- phba->work_hba_events |= WORKER_ELS_TMO;
++ spin_lock_irqsave(&vport->work_port_lock, iflag);
++ if ((vport->work_port_events & WORKER_ELS_TMO) == 0) {
++ vport->work_port_events |= WORKER_ELS_TMO;
++ spin_unlock_irqrestore(&vport->work_port_lock, iflag);
++
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ else
++ spin_unlock_irqrestore(&vport->work_port_lock, iflag);
+ return;
+ }
+
+ void
+-lpfc_els_timeout_handler(struct lpfc_hba *phba)
++lpfc_els_timeout_handler(struct lpfc_vport *vport)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli_ring *pring;
+ struct lpfc_iocbq *tmp_iocb, *piocb;
+ IOCB_t *cmd = NULL;
+ struct lpfc_dmabuf *pcmd;
+- uint32_t *elscmd;
+- uint32_t els_command=0;
++ uint32_t els_command = 0;
+ uint32_t timeout;
+- uint32_t remote_ID;
++ uint32_t remote_ID = 0xffffffff;
+
+- if (phba == 0)
+- return;
+- spin_lock_irq(phba->host->host_lock);
+ /* If the timer is already canceled do nothing */
+- if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
+- spin_unlock_irq(phba->host->host_lock);
++ if ((vport->work_port_events & WORKER_ELS_TMO) == 0) {
+ return;
+ }
++ spin_lock_irq(&phba->hbalock);
+ timeout = (uint32_t)(phba->fc_ratov << 1);
+
+ pring = &phba->sli.ring[LPFC_ELS_RING];
+@@ -3230,63 +3627,70 @@
+ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
+ cmd = &piocb->iocb;
+
+- if ((piocb->iocb_flag & LPFC_IO_LIBDFC) ||
+- (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN) ||
+- (piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)) {
++ if ((piocb->iocb_flag & LPFC_IO_LIBDFC) != 0 ||
++ piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
++ piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+ continue;
+- }
++
++ if (piocb->vport != vport)
++ continue;
++
+ pcmd = (struct lpfc_dmabuf *) piocb->context2;
+- if (pcmd) {
+- elscmd = (uint32_t *) (pcmd->virt);
+- els_command = *elscmd;
+- }
++ if (pcmd)
++ els_command = *(uint32_t *) (pcmd->virt);
+
+- if ((els_command == ELS_CMD_FARP)
+- || (els_command == ELS_CMD_FARPR)) {
++ if (els_command == ELS_CMD_FARP ||
++ els_command == ELS_CMD_FARPR ||
++ els_command == ELS_CMD_FDISC)
++ continue;
++
++ if (vport != piocb->vport)
+ continue;
+- }
+
+ if (piocb->drvrTimeout > 0) {
+- if (piocb->drvrTimeout >= timeout) {
++ if (piocb->drvrTimeout >= timeout)
+ piocb->drvrTimeout -= timeout;
+- } else {
++ else
+ piocb->drvrTimeout = 0;
+- }
+ continue;
+ }
+
+- if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) {
++ remote_ID = 0xffffffff;
++ if (cmd->ulpCommand != CMD_GEN_REQUEST64_CR)
++ remote_ID = cmd->un.elsreq64.remoteID;
++ else {
+ struct lpfc_nodelist *ndlp;
+- ndlp = __lpfc_findnode_rpi(phba, cmd->ulpContext);
++ ndlp = __lpfc_findnode_rpi(vport, cmd->ulpContext);
++ if (ndlp)
+ remote_ID = ndlp->nlp_DID;
+- } else {
+- remote_ID = cmd->un.elsreq64.remoteID;
+ }
+
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_ELS,
+- "%d:0127 ELS timeout Data: x%x x%x x%x x%x\n",
+- phba->brd_no, els_command,
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0127 ELS timeout Data: x%x x%x x%x "
++ "x%x\n",
++ phba->brd_no, vport->vpi, els_command,
+ remote_ID, cmd->ulpCommand, cmd->ulpIoTag);
+
+ lpfc_sli_issue_abort_iotag(phba, pring, piocb);
+ }
+- if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt)
+- mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout);
++ spin_unlock_irq(&phba->hbalock);
+
+- spin_unlock_irq(phba->host->host_lock);
++ if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt)
++ mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
+ }
+
+ void
+-lpfc_els_flush_cmd(struct lpfc_hba *phba)
++lpfc_els_flush_cmd(struct lpfc_vport *vport)
+ {
+ LIST_HEAD(completions);
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+ struct lpfc_iocbq *tmp_iocb, *piocb;
+ IOCB_t *cmd = NULL;
+
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_fabric_abort_vport(vport);
++
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) {
+ cmd = &piocb->iocb;
+
+@@ -3301,271 +3705,1042 @@
+ cmd->ulpCommand == CMD_ABORT_XRI_CN)
+ continue;
+
++ if (piocb->vport != vport)
++ continue;
++
+ list_move_tail(&piocb->list, &completions);
+ pring->txq_cnt--;
+-
+ }
+
+ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
+- cmd = &piocb->iocb;
+-
+ if (piocb->iocb_flag & LPFC_IO_LIBDFC) {
+ continue;
+ }
+
++ if (piocb->vport != vport)
++ continue;
++
+ lpfc_sli_issue_abort_iotag(phba, pring, piocb);
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+- while(!list_empty(&completions)) {
++ while (!list_empty(&completions)) {
+ piocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ cmd = &piocb->iocb;
+- list_del(&piocb->list);
++ list_del_init(&piocb->list);
+
+- if (piocb->iocb_cmpl) {
++ if (!piocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, piocb);
++ else {
+ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ (piocb->iocb_cmpl) (phba, piocb, piocb);
+- } else
+- lpfc_sli_release_iocbq(phba, piocb);
++ }
+ }
+
+ return;
+ }
+
+-void
+-lpfc_els_unsol_event(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring, struct lpfc_iocbq * elsiocb)
++static void
++lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_vport *vport, struct lpfc_iocbq *elsiocb)
+ {
+- struct lpfc_sli *psli;
+ struct lpfc_nodelist *ndlp;
+- struct lpfc_dmabuf *mp;
+- uint32_t *lp;
+- IOCB_t *icmd;
+ struct ls_rjt stat;
+- uint32_t cmd;
+- uint32_t did;
+- uint32_t newnode;
+- uint32_t drop_cmd = 0; /* by default do NOT drop received cmd */
+- uint32_t rjt_err = 0;
+-
+- psli = &phba->sli;
+- icmd = &elsiocb->iocb;
+-
+- if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+- ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
+- /* Not enough posted buffers; Try posting more buffers */
+- phba->fc_stat.NoRcvBuf++;
+- lpfc_post_buffer(phba, pring, 0, 1);
+- return;
+- }
+-
+- /* If there are no BDEs associated with this IOCB,
+- * there is nothing to do.
+- */
+- if (icmd->ulpBdeCount == 0)
+- return;
++ uint32_t *payload;
++ uint32_t cmd, did, newnode, rjt_err = 0;
++ IOCB_t *icmd = &elsiocb->iocb;
+
+- /* type of ELS cmd is first 32bit word in packet */
+- mp = lpfc_sli_ringpostbuf_get(phba, pring, getPaddr(icmd->un.
+- cont64[0].
+- addrHigh,
+- icmd->un.
+- cont64[0].addrLow));
+- if (mp == 0) {
+- drop_cmd = 1;
++ if (vport == NULL || elsiocb->context2 == NULL)
+ goto dropit;
+- }
+
+ newnode = 0;
+- lp = (uint32_t *) mp->virt;
+- cmd = *lp++;
+- lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], 1, 1);
++ payload = ((struct lpfc_dmabuf *)elsiocb->context2)->virt;
++ cmd = *payload;
++ if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) == 0)
++ lpfc_post_buffer(phba, pring, 1, 1);
+
++ did = icmd->un.rcvels.remoteID;
+ if (icmd->ulpStatus) {
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- drop_cmd = 1;
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV Unsol ELS: status:x%x/x%x did:x%x",
++ icmd->ulpStatus, icmd->un.ulpWord[4], did);
+ goto dropit;
+ }
+
+ /* Check to see if link went down during discovery */
+- if (lpfc_els_chk_latt(phba)) {
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- drop_cmd = 1;
++ if (lpfc_els_chk_latt(vport))
+ goto dropit;
+- }
+
+- did = icmd->un.rcvels.remoteID;
+- ndlp = lpfc_findnode_did(phba, did);
++ /* Ignore traffic recevied during vport shutdown. */
++ if (vport->load_flag & FC_UNLOADING)
++ goto dropit;
++
++ ndlp = lpfc_findnode_did(vport, did);
+ if (!ndlp) {
+ /* Cannot find existing Fabric ndlp, so allocate a new one */
+ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+- if (!ndlp) {
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- drop_cmd = 1;
++ if (!ndlp)
+ goto dropit;
+- }
+
+- lpfc_nlp_init(phba, ndlp, did);
++ lpfc_nlp_init(vport, ndlp, did);
+ newnode = 1;
+ if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) {
+ ndlp->nlp_type |= NLP_FABRIC;
+ }
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ }
+
+ phba->fc_stat.elsRcvFrame++;
+ if (elsiocb->context1)
+ lpfc_nlp_put(elsiocb->context1);
+ elsiocb->context1 = lpfc_nlp_get(ndlp);
+- elsiocb->context2 = mp;
++ elsiocb->vport = vport;
+
+ if ((cmd & ELS_CMD_MASK) == ELS_CMD_RSCN) {
+ cmd &= ELS_CMD_MASK;
+ }
+ /* ELS command <elsCmd> received from NPORT <did> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+- "%d:0112 ELS command x%x received from NPORT x%x "
+- "Data: x%x\n", phba->brd_no, cmd, did, phba->hba_state);
++ "%d (%d):0112 ELS command x%x received from NPORT x%x "
++ "Data: x%x\n", phba->brd_no, vport->vpi, cmd, did,
++ vport->port_state);
+
+ switch (cmd) {
+ case ELS_CMD_PLOGI:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV PLOGI: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvPLOGI++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ ndlp = lpfc_plogi_confirm_nport(phba, payload, ndlp);
++
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- ndlp = lpfc_plogi_confirm_nport(phba, mp, ndlp);
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PLOGI);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb,
++ NLP_EVT_RCV_PLOGI);
++
+ break;
+ case ELS_CMD_FLOGI:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV FLOGI: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvFLOGI++;
+- lpfc_els_rcv_flogi(phba, elsiocb, ndlp, newnode);
++ lpfc_els_rcv_flogi(vport, elsiocb, ndlp, newnode);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case ELS_CMD_LOGO:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV LOGO: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvLOGO++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
+ break;
+ case ELS_CMD_PRLO:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV PRLO: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvPRLO++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
+ break;
+ case ELS_CMD_RSCN:
+ phba->fc_stat.elsRcvRSCN++;
+- lpfc_els_rcv_rscn(phba, elsiocb, ndlp, newnode);
++ lpfc_els_rcv_rscn(vport, elsiocb, ndlp, newnode);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case ELS_CMD_ADISC:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV ADISC: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvADISC++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_ADISC);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb,
++ NLP_EVT_RCV_ADISC);
+ break;
+ case ELS_CMD_PDISC:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV PDISC: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvPDISC++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PDISC);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb,
++ NLP_EVT_RCV_PDISC);
+ break;
+ case ELS_CMD_FARPR:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV FARPR: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvFARPR++;
+- lpfc_els_rcv_farpr(phba, elsiocb, ndlp);
++ lpfc_els_rcv_farpr(vport, elsiocb, ndlp);
+ break;
+ case ELS_CMD_FARP:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV FARP: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvFARP++;
+- lpfc_els_rcv_farp(phba, elsiocb, ndlp);
++ lpfc_els_rcv_farp(vport, elsiocb, ndlp);
+ break;
+ case ELS_CMD_FAN:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV FAN: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvFAN++;
+- lpfc_els_rcv_fan(phba, elsiocb, ndlp);
++ lpfc_els_rcv_fan(vport, elsiocb, ndlp);
+ break;
+ case ELS_CMD_PRLI:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV PRLI: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvPRLI++;
+- if (phba->hba_state < LPFC_DISC_AUTH) {
+- rjt_err = 1;
++ if (vport->port_state < LPFC_DISC_AUTH) {
++ rjt_err = LSRJT_UNABLE_TPC;
+ break;
+ }
+- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
+ break;
+ case ELS_CMD_LIRR:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV LIRR: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvLIRR++;
+- lpfc_els_rcv_lirr(phba, elsiocb, ndlp);
++ lpfc_els_rcv_lirr(vport, elsiocb, ndlp);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case ELS_CMD_RPS:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RPS: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvRPS++;
+- lpfc_els_rcv_rps(phba, elsiocb, ndlp);
++ lpfc_els_rcv_rps(vport, elsiocb, ndlp);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case ELS_CMD_RPL:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RPL: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvRPL++;
+- lpfc_els_rcv_rpl(phba, elsiocb, ndlp);
++ lpfc_els_rcv_rpl(vport, elsiocb, ndlp);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ case ELS_CMD_RNID:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV RNID: did:x%x/ste:x%x flg:x%x",
++ did, vport->port_state, ndlp->nlp_flag);
++
+ phba->fc_stat.elsRcvRNID++;
+- lpfc_els_rcv_rnid(phba, elsiocb, ndlp);
++ lpfc_els_rcv_rnid(vport, elsiocb, ndlp);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ default:
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++ "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x",
++ cmd, did, vport->port_state);
++
+ /* Unsupported ELS command, reject */
+- rjt_err = 1;
++ rjt_err = LSRJT_INVALID_CMD;
+
+ /* Unknown ELS command <elsCmd> received from NPORT <did> */
+ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+- "%d:0115 Unknown ELS command x%x received from "
+- "NPORT x%x\n", phba->brd_no, cmd, did);
++ "%d (%d):0115 Unknown ELS command x%x "
++ "received from NPORT x%x\n",
++ phba->brd_no, vport->vpi, cmd, did);
+ if (newnode)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ break;
+ }
+
+ /* check if need to LS_RJT received ELS cmd */
+ if (rjt_err) {
+- stat.un.b.lsRjtRsvd0 = 0;
+- stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++ memset(&stat, 0, sizeof(stat));
++ stat.un.b.lsRjtRsnCode = rjt_err;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+- stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, elsiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp,
++ NULL);
++ if (newnode)
++ lpfc_drop_node(vport, ndlp);
+ }
+
+- lpfc_nlp_put(elsiocb->context1);
+- elsiocb->context1 = NULL;
+- if (elsiocb->context2) {
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- }
++ return;
++
+ dropit:
+- /* check if need to drop received ELS cmd */
+- if (drop_cmd == 1) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+- "%d:0111 Dropping received ELS cmd "
+- "Data: x%x x%x x%x\n", phba->brd_no,
++ "%d (%d):0111 Dropping received ELS cmd "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, vport ? vport->vpi : 0xffff,
+ icmd->ulpStatus, icmd->un.ulpWord[4],
+ icmd->ulpTimeout);
+ phba->fc_stat.elsRcvDrop++;
++}
++
++static struct lpfc_vport *
++lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi)
++{
++ struct lpfc_vport *vport;
++
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ if (vport->vpi == vpi)
++ return vport;
++ }
++ return NULL;
++}
++
++void
++lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *elsiocb)
++{
++ struct lpfc_vport *vport = phba->pport;
++ IOCB_t *icmd = &elsiocb->iocb;
++ dma_addr_t paddr;
++ struct lpfc_dmabuf *bdeBuf1 = elsiocb->context2;
++ struct lpfc_dmabuf *bdeBuf2 = elsiocb->context3;
++
++ elsiocb->context2 = NULL;
++ elsiocb->context3 = NULL;
++
++ if (icmd->ulpStatus == IOSTAT_NEED_BUFFER) {
++ lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
++ } else if (icmd->ulpStatus == IOSTAT_LOCAL_REJECT &&
++ (icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING) {
++ phba->fc_stat.NoRcvBuf++;
++ /* Not enough posted buffers; Try posting more buffers */
++ if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED))
++ lpfc_post_buffer(phba, pring, 0, 1);
++ return;
++ }
++
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ (icmd->ulpCommand == CMD_IOCB_RCV_ELS64_CX ||
++ icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
++ if (icmd->unsli3.rcvsli3.vpi == 0xffff)
++ vport = phba->pport;
++ else {
++ uint16_t vpi = icmd->unsli3.rcvsli3.vpi;
++ vport = lpfc_find_vport_by_vpid(phba, vpi);
++ }
++ }
++ /* If there are no BDEs associated
++ * with this IOCB, there is nothing to do.
++ */
++ if (icmd->ulpBdeCount == 0)
++ return;
++
++ /* type of ELS cmd is first 32bit word
++ * in packet
++ */
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++ elsiocb->context2 = bdeBuf1;
++ } else {
++ paddr = getPaddr(icmd->un.cont64[0].addrHigh,
++ icmd->un.cont64[0].addrLow);
++ elsiocb->context2 = lpfc_sli_ringpostbuf_get(phba, pring,
++ paddr);
++ }
++
++ lpfc_els_unsol_buffer(phba, pring, vport, elsiocb);
++ /*
++ * The different unsolicited event handlers would tell us
++ * if they are done with "mp" by setting context2 to NULL.
++ */
++ lpfc_nlp_put(elsiocb->context1);
++ elsiocb->context1 = NULL;
++ if (elsiocb->context2) {
++ lpfc_in_buf_free(phba, (struct lpfc_dmabuf *)elsiocb->context2);
++ elsiocb->context2 = NULL;
++ }
++
++ /* RCV_ELS64_CX provide for 2 BDEs - process 2nd if included */
++ if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) &&
++ icmd->ulpBdeCount == 2) {
++ elsiocb->context2 = bdeBuf2;
++ lpfc_els_unsol_buffer(phba, pring, vport, elsiocb);
++ /* free mp if we are done with it */
++ if (elsiocb->context2) {
++ lpfc_in_buf_free(phba, elsiocb->context2);
++ elsiocb->context2 = NULL;
++ }
++ }
++}
++
++void
++lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++ struct lpfc_nodelist *ndlp, *ndlp_fdmi;
++
++ ndlp = lpfc_findnode_did(vport, NameServer_DID);
++ if (!ndlp) {
++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++ if (!ndlp) {
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ lpfc_disc_start(vport);
++ return;
++ }
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0251 NameServer login: no memory\n",
++ phba->brd_no, vport->vpi);
++ return;
++ }
++ lpfc_nlp_init(vport, ndlp, NameServer_DID);
++ ndlp->nlp_type |= NLP_FABRIC;
++ }
++
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++
++ if (lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0)) {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0252 Cannot issue NameServer login\n",
++ phba->brd_no, vport->vpi);
++ return;
++ }
++
++ if (phba->cfg_fdmi_on) {
++ ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool,
++ GFP_KERNEL);
++ if (ndlp_fdmi) {
++ lpfc_nlp_init(vport, ndlp_fdmi, FDMI_DID);
++ ndlp_fdmi->nlp_type |= NLP_FABRIC;
++ ndlp_fdmi->nlp_state =
++ NLP_STE_PLOGI_ISSUE;
++ lpfc_issue_els_plogi(vport, ndlp_fdmi->nlp_DID,
++ 0);
++ }
++ }
++ return;
++}
++
++static void
++lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++ struct lpfc_vport *vport = pmb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++ MAILBOX_t *mb = &pmb->mb;
++
++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++ lpfc_nlp_put(ndlp);
++
++ if (mb->mbxStatus) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d (%d):0915 Register VPI failed: 0x%x\n",
++ phba->brd_no, vport->vpi, mb->mbxStatus);
++
++ switch (mb->mbxStatus) {
++ case 0x11: /* unsupported feature */
++ case 0x9603: /* max_vpi exceeded */
++ /* giving up on vport registration */
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
++ break;
++ default:
++ /* Try to recover from this error */
++ lpfc_mbx_unreg_vpi(vport);
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++ lpfc_initial_fdisc(vport);
++ break;
++ }
++
++ } else {
++ if (vport == phba->pport)
++ lpfc_issue_fabric_reglogin(vport);
++ else
++ lpfc_do_scr_ns_plogi(phba, vport);
+ }
++ mempool_free(pmb, phba->mbox_mem_pool);
+ return;
+ }
++
++void
++lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp)
++{
++ LPFC_MBOXQ_t *mbox;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mbox) {
++ lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox);
++ mbox->vport = vport;
++ mbox->context2 = lpfc_nlp_get(ndlp);
++ mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport;
++ if (lpfc_sli_issue_mbox(phba, mbox,
++ MBX_NOWAIT | MBX_STOP_IOCB)
++ == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d (%d):0253 Register VPI: Cannot send mbox\n",
++ phba->brd_no, vport->vpi);
++ }
++ } else {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d (%d):0254 Register VPI: no memory\n",
++ phba->brd_no, vport->vpi);
++
++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++ lpfc_nlp_put(ndlp);
++ }
++}
++
++static void
++lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ struct lpfc_vport *vport = cmdiocb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++ struct lpfc_nodelist *np;
++ struct lpfc_nodelist *next_np;
++ IOCB_t *irsp = &rspiocb->iocb;
++ struct lpfc_iocbq *piocb;
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0123 FDISC completes. x%x/x%x prevDID: x%x\n",
++ phba->brd_no, vport->vpi,
++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID);
++
++ /* Since all FDISCs are being single threaded, we
++ * must reset the discovery timer for ALL vports
++ * waiting to send FDISC when one completes.
++ */
++ list_for_each_entry(piocb, &phba->fabric_iocb_list, list) {
++ lpfc_set_disctmo(piocb->vport);
++ }
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "FDISC cmpl: status:x%x/x%x prevdid:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID);
++
++ if (irsp->ulpStatus) {
++ /* Check for retry */
++ if (lpfc_els_retry(phba, cmdiocb, rspiocb))
++ goto out;
++
++ /* FDISC failed */
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0124 FDISC failed. (%d/%d)\n",
++ phba->brd_no, vport->vpi,
++ irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++ if (vport->fc_vport->vport_state == FC_VPORT_INITIALIZING)
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++ lpfc_nlp_put(ndlp);
++ /* giving up on FDISC. Cancel discovery timer */
++ lpfc_can_disctmo(vport);
++ } else {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_FABRIC;
++ if (vport->phba->fc_topology == TOPOLOGY_LOOP)
++ vport->fc_flag |= FC_PUBLIC_LOOP;
++ spin_unlock_irq(shost->host_lock);
++
++ vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
++ lpfc_vport_set_state(vport, FC_VPORT_ACTIVE);
++ if ((vport->fc_prevDID != vport->fc_myDID) &&
++ !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
++ /* If our NportID changed, we need to ensure all
++ * remaining NPORTs get unreg_login'ed so we can
++ * issue unreg_vpi.
++ */
++ list_for_each_entry_safe(np, next_np,
++ &vport->fc_nodes, nlp_listp) {
++ if (np->nlp_state != NLP_STE_NPR_NODE
++ || !(np->nlp_flag & NLP_NPR_ADISC))
++ continue;
++ spin_lock_irq(shost->host_lock);
++ np->nlp_flag &= ~NLP_NPR_ADISC;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_unreg_rpi(vport, np);
++ }
++ lpfc_mbx_unreg_vpi(vport);
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++ }
++
++ if (vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
++ lpfc_register_new_vport(phba, vport, ndlp);
++ else
++ lpfc_do_scr_ns_plogi(phba, vport);
++
++ lpfc_nlp_put(ndlp); /* Free Fabric ndlp for vports */
++ }
++
++out:
++ lpfc_els_free_iocb(phba, cmdiocb);
++}
++
++int
++lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ uint8_t retry)
++{
++ struct lpfc_hba *phba = vport->phba;
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ struct serv_parm *sp;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++ int did = ndlp->nlp_DID;
++ int rc;
++
++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, did,
++ ELS_CMD_FDISC);
++ if (!elsiocb) {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0255 Issue FDISC: no IOCB\n",
++ phba->brd_no, vport->vpi);
++ return 1;
++ }
++
++ icmd = &elsiocb->iocb;
++ icmd->un.elsreq64.myID = 0;
++ icmd->un.elsreq64.fl = 1;
++
++ /* For FDISC, Let FDISC rsp set the NPortID for this VPI */
++ icmd->ulpCt_h = 1;
++ icmd->ulpCt_l = 0;
++
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++ *((uint32_t *) (pcmd)) = ELS_CMD_FDISC;
++ pcmd += sizeof(uint32_t); /* CSP Word 1 */
++ memcpy(pcmd, &vport->phba->pport->fc_sparam, sizeof(struct serv_parm));
++ sp = (struct serv_parm *) pcmd;
++ /* Setup CSPs accordingly for Fabric */
++ sp->cmn.e_d_tov = 0;
++ sp->cmn.w2.r_a_tov = 0;
++ sp->cls1.classValid = 0;
++ sp->cls2.seqDelivery = 1;
++ sp->cls3.seqDelivery = 1;
++
++ pcmd += sizeof(uint32_t); /* CSP Word 2 */
++ pcmd += sizeof(uint32_t); /* CSP Word 3 */
++ pcmd += sizeof(uint32_t); /* CSP Word 4 */
++ pcmd += sizeof(uint32_t); /* Port Name */
++ memcpy(pcmd, &vport->fc_portname, 8);
++ pcmd += sizeof(uint32_t); /* Node Name */
++ pcmd += sizeof(uint32_t); /* Node Name */
++ memcpy(pcmd, &vport->fc_nodename, 8);
++
++ lpfc_set_disctmo(vport);
++
++ phba->fc_stat.elsXmitFDISC++;
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_fdisc;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue FDISC: did:x%x",
++ did, 0, 0);
++
++ rc = lpfc_issue_fabric_iocb(phba, elsiocb);
++ if (rc == IOCB_ERROR) {
++ lpfc_els_free_iocb(phba, elsiocb);
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0256 Issue FDISC: Cannot send IOCB\n",
++ phba->brd_no, vport->vpi);
++
++ return 1;
++ }
++ lpfc_vport_set_state(vport, FC_VPORT_INITIALIZING);
++ vport->port_state = LPFC_FDISC;
++ return 0;
++}
++
++static void
++lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ struct lpfc_vport *vport = cmdiocb->vport;
++ IOCB_t *irsp;
++
++ irsp = &rspiocb->iocb;
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "LOGO npiv cmpl: status:x%x/x%x did:x%x",
++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->un.rcvels.remoteID);
++
++ lpfc_els_free_iocb(phba, cmdiocb);
++ vport->unreg_vpi_cmpl = VPORT_ERROR;
++}
++
++int
++lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++ IOCB_t *icmd;
++ struct lpfc_iocbq *elsiocb;
++ uint8_t *pcmd;
++ uint16_t cmdsize;
++
++ cmdsize = 2 * sizeof(uint32_t) + sizeof(struct lpfc_name);
++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, 0, ndlp, ndlp->nlp_DID,
++ ELS_CMD_LOGO);
++ if (!elsiocb)
++ return 1;
++
++ icmd = &elsiocb->iocb;
++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++ *((uint32_t *) (pcmd)) = ELS_CMD_LOGO;
++ pcmd += sizeof(uint32_t);
++
++ /* Fill in LOGO payload */
++ *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID);
++ pcmd += sizeof(uint32_t);
++ memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name));
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Issue LOGO npiv did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
++ elsiocb->iocb_cmpl = lpfc_cmpl_els_npiv_logo;
++ spin_lock_irq(shost->host_lock);
++ ndlp->nlp_flag |= NLP_LOGO_SND;
++ spin_unlock_irq(shost->host_lock);
++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++ spin_lock_irq(shost->host_lock);
++ ndlp->nlp_flag &= ~NLP_LOGO_SND;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_els_free_iocb(phba, elsiocb);
++ return 1;
++ }
++ return 0;
++}
++
++void
++lpfc_fabric_block_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr;
++ unsigned long iflags;
++ uint32_t tmo_posted;
++ spin_lock_irqsave(&phba->pport->work_port_lock, iflags);
++ tmo_posted = phba->pport->work_port_events & WORKER_FABRIC_BLOCK_TMO;
++ if (!tmo_posted)
++ phba->pport->work_port_events |= WORKER_FABRIC_BLOCK_TMO;
++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags);
++
++ if (!tmo_posted) {
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ if (phba->work_wait)
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ }
++}
++
++static void
++lpfc_resume_fabric_iocbs(struct lpfc_hba *phba)
++{
++ struct lpfc_iocbq *iocb;
++ unsigned long iflags;
++ int ret;
++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++ IOCB_t *cmd;
++
++repeat:
++ iocb = NULL;
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ /* Post any pending iocb to the SLI layer */
++ if (atomic_read(&phba->fabric_iocb_count) == 0) {
++ list_remove_head(&phba->fabric_iocb_list, iocb, typeof(*iocb),
++ list);
++ if (iocb)
++ atomic_inc(&phba->fabric_iocb_count);
++ }
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ if (iocb) {
++ iocb->fabric_iocb_cmpl = iocb->iocb_cmpl;
++ iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb;
++ iocb->iocb_flag |= LPFC_IO_FABRIC;
++
++ lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD,
++ "Fabric sched1: ste:x%x",
++ iocb->vport->port_state, 0, 0);
++
++ ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
++
++ if (ret == IOCB_ERROR) {
++ iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
++ iocb->fabric_iocb_cmpl = NULL;
++ iocb->iocb_flag &= ~LPFC_IO_FABRIC;
++ cmd = &iocb->iocb;
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ iocb->iocb_cmpl(phba, iocb, iocb);
++
++ atomic_dec(&phba->fabric_iocb_count);
++ goto repeat;
++ }
++ }
++
++ return;
++}
++
++void
++lpfc_unblock_fabric_iocbs(struct lpfc_hba *phba)
++{
++ clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++
++ lpfc_resume_fabric_iocbs(phba);
++ return;
++}
++
++static void
++lpfc_block_fabric_iocbs(struct lpfc_hba *phba)
++{
++ int blocked;
++
++ blocked = test_and_set_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++ /* Start a timer to unblock fabric
++ * iocbs after 100ms
++ */
++ if (!blocked)
++ mod_timer(&phba->fabric_block_timer, jiffies + HZ/10 );
++
++ return;
++}
++
++static void
++lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ struct ls_rjt stat;
++
++ if ((cmdiocb->iocb_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC)
++ BUG();
++
++ switch (rspiocb->iocb.ulpStatus) {
++ case IOSTAT_NPORT_RJT:
++ case IOSTAT_FABRIC_RJT:
++ if (rspiocb->iocb.un.ulpWord[4] & RJT_UNAVAIL_TEMP) {
++ lpfc_block_fabric_iocbs(phba);
++ }
++ break;
++
++ case IOSTAT_NPORT_BSY:
++ case IOSTAT_FABRIC_BSY:
++ lpfc_block_fabric_iocbs(phba);
++ break;
++
++ case IOSTAT_LS_RJT:
++ stat.un.lsRjtError =
++ be32_to_cpu(rspiocb->iocb.un.ulpWord[4]);
++ if ((stat.un.b.lsRjtRsnCode == LSRJT_UNABLE_TPC) ||
++ (stat.un.b.lsRjtRsnCode == LSRJT_LOGICAL_BSY))
++ lpfc_block_fabric_iocbs(phba);
++ break;
++ }
++
++ if (atomic_read(&phba->fabric_iocb_count) == 0)
++ BUG();
++
++ cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl;
++ cmdiocb->fabric_iocb_cmpl = NULL;
++ cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC;
++ cmdiocb->iocb_cmpl(phba, cmdiocb, rspiocb);
++
++ atomic_dec(&phba->fabric_iocb_count);
++ if (!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags)) {
++ /* Post any pending iocbs to HBA */
++ lpfc_resume_fabric_iocbs(phba);
++ }
++}
++
++int
++lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
++{
++ unsigned long iflags;
++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++ int ready;
++ int ret;
++
++ if (atomic_read(&phba->fabric_iocb_count) > 1)
++ BUG();
++
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ ready = atomic_read(&phba->fabric_iocb_count) == 0 &&
++ !test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ if (ready) {
++ iocb->fabric_iocb_cmpl = iocb->iocb_cmpl;
++ iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb;
++ iocb->iocb_flag |= LPFC_IO_FABRIC;
++
++ lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD,
++ "Fabric sched2: ste:x%x",
++ iocb->vport->port_state, 0, 0);
++
++ atomic_inc(&phba->fabric_iocb_count);
++ ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
++
++ if (ret == IOCB_ERROR) {
++ iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
++ iocb->fabric_iocb_cmpl = NULL;
++ iocb->iocb_flag &= ~LPFC_IO_FABRIC;
++ atomic_dec(&phba->fabric_iocb_count);
++ }
++ } else {
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ list_add_tail(&iocb->list, &phba->fabric_iocb_list);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ ret = IOCB_SUCCESS;
++ }
++ return ret;
++}
++
++
++void lpfc_fabric_abort_vport(struct lpfc_vport *vport)
++{
++ LIST_HEAD(completions);
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_iocbq *tmp_iocb, *piocb;
++ IOCB_t *cmd;
++
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++ list) {
++
++ if (piocb->vport != vport)
++ continue;
++
++ list_move_tail(&piocb->list, &completions);
++ }
++ spin_unlock_irq(&phba->hbalock);
++
++ while (!list_empty(&completions)) {
++ piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++ list_del_init(&piocb->list);
++
++ cmd = &piocb->iocb;
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ }
++}
++
++void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp)
++{
++ LIST_HEAD(completions);
++ struct lpfc_hba *phba = ndlp->vport->phba;
++ struct lpfc_iocbq *tmp_iocb, *piocb;
++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++ IOCB_t *cmd;
++
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++ list) {
++ if ((lpfc_check_sli_ndlp(phba, pring, piocb, ndlp))) {
++
++ list_move_tail(&piocb->list, &completions);
++ }
++ }
++ spin_unlock_irq(&phba->hbalock);
++
++ while (!list_empty(&completions)) {
++ piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++ list_del_init(&piocb->list);
++
++ cmd = &piocb->iocb;
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ }
++}
++
++void lpfc_fabric_abort_hba(struct lpfc_hba *phba)
++{
++ LIST_HEAD(completions);
++ struct lpfc_iocbq *piocb;
++ IOCB_t *cmd;
++
++ spin_lock_irq(&phba->hbalock);
++ list_splice_init(&phba->fabric_iocb_list, &completions);
++ spin_unlock_irq(&phba->hbalock);
++
++ while (!list_empty(&completions)) {
++ piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++ list_del_init(&piocb->list);
++
++ cmd = &piocb->iocb;
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ }
++}
++
++
++void lpfc_fabric_abort_flogi(struct lpfc_hba *phba)
++{
++ LIST_HEAD(completions);
++ struct lpfc_iocbq *tmp_iocb, *piocb;
++ IOCB_t *cmd;
++ struct lpfc_nodelist *ndlp;
++
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++ list) {
++
++ cmd = &piocb->iocb;
++ ndlp = (struct lpfc_nodelist *) piocb->context1;
++ if (cmd->ulpCommand == CMD_ELS_REQUEST64_CR &&
++ ndlp != NULL &&
++ ndlp->nlp_DID == Fabric_DID)
++ list_move_tail(&piocb->list, &completions);
++ }
++ spin_unlock_irq(&phba->hbalock);
++
++ while (!list_empty(&completions)) {
++ piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++ list_del_init(&piocb->list);
++
++ cmd = &piocb->iocb;
++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (piocb->iocb_cmpl) (phba, piocb, piocb);
++ }
++}
++
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -36,6 +36,8 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+
+ /* AlpaArray for assignment of scsid for scan-down and bind_method */
+ static uint8_t lpfcAlpaArray[] = {
+@@ -54,7 +56,7 @@
+ 0x10, 0x0F, 0x08, 0x04, 0x02, 0x01
+ };
+
+-static void lpfc_disc_timeout_handler(struct lpfc_hba *);
++static void lpfc_disc_timeout_handler(struct lpfc_vport *);
+
+ void
+ lpfc_terminate_rport_io(struct fc_rport *rport)
+@@ -74,14 +76,16 @@
+ return;
+ }
+
+- phba = ndlp->nlp_phba;
++ phba = ndlp->vport->phba;
++
++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
++ "rport terminate: sid:x%x did:x%x flg:x%x",
++ ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag);
+
+- spin_lock_irq(phba->host->host_lock);
+ if (ndlp->nlp_sid != NLP_NO_SID) {
+ lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+ }
+- spin_unlock_irq(phba->host->host_lock);
+
+ return;
+ }
+@@ -94,28 +98,98 @@
+ {
+ struct lpfc_rport_data *rdata;
+ struct lpfc_nodelist * ndlp;
+- uint8_t *name;
+- int warn_on = 0;
++ struct lpfc_vport *vport;
+ struct lpfc_hba *phba;
++ struct completion devloss_compl;
++ struct lpfc_work_evt *evtp;
+
+ rdata = rport->dd_data;
+ ndlp = rdata->pnode;
+
+ if (!ndlp) {
+- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++ if (rport->scsi_target_id != -1) {
+ printk(KERN_ERR "Cannot find remote node"
+ " for rport in dev_loss_tmo_callbk x%x\n",
+ rport->port_id);
++ }
+ return;
+ }
+
+- if (ndlp->nlp_state == NLP_STE_MAPPED_NODE)
++ vport = ndlp->vport;
++ phba = vport->phba;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++ "rport devlosscb: sid:x%x did:x%x flg:x%x",
++ ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag);
++
++ init_completion(&devloss_compl);
++ evtp = &ndlp->dev_loss_evt;
++
++ if (!list_empty(&evtp->evt_listp))
++ return;
++
++ spin_lock_irq(&phba->hbalock);
++ evtp->evt_arg1 = ndlp;
++ evtp->evt_arg2 = &devloss_compl;
++ evtp->evt = LPFC_EVT_DEV_LOSS;
++ list_add_tail(&evtp->evt_listp, &phba->work_list);
++ if (phba->work_wait)
++ wake_up(phba->work_wait);
++
++ spin_unlock_irq(&phba->hbalock);
++
++ wait_for_completion(&devloss_compl);
++
++ return;
++}
++
++/*
++ * This function is called from the worker thread when dev_loss_tmo
++ * expire.
++ */
++void
++lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
++{
++ struct lpfc_rport_data *rdata;
++ struct fc_rport *rport;
++ struct lpfc_vport *vport;
++ struct lpfc_hba *phba;
++ uint8_t *name;
++ int warn_on = 0;
++
++ rport = ndlp->rport;
++
++ if (!rport)
+ return;
+
+- name = (uint8_t *)&ndlp->nlp_portname;
+- phba = ndlp->nlp_phba;
++ rdata = rport->dd_data;
++ name = (uint8_t *) &ndlp->nlp_portname;
++ vport = ndlp->vport;
++ phba = vport->phba;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++ "rport devlosstmo:did:x%x type:x%x id:x%x",
++ ndlp->nlp_DID, ndlp->nlp_type, rport->scsi_target_id);
+
+- spin_lock_irq(phba->host->host_lock);
++ if (!(vport->load_flag & FC_UNLOADING) &&
++ ndlp->nlp_state == NLP_STE_MAPPED_NODE)
++ return;
++
++ if (ndlp->nlp_type & NLP_FABRIC) {
++ int put_node;
++ int put_rport;
++
++ /* We will clean up these Nodes in linkup */
++ put_node = rdata->pnode != NULL;
++ put_rport = ndlp->rport != NULL;
++ rdata->pnode = NULL;
++ ndlp->rport = NULL;
++ if (put_node)
++ lpfc_nlp_put(ndlp);
++ if (put_rport)
++ put_device(&rport->dev);
++ return;
++ }
+
+ if (ndlp->nlp_sid != NLP_NO_SID) {
+ warn_on = 1;
+@@ -123,76 +197,114 @@
+ lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+ }
+- if (phba->fc_flag & FC_UNLOADING)
++ if (vport->load_flag & FC_UNLOADING)
+ warn_on = 0;
+
+- spin_unlock_irq(phba->host->host_lock);
+-
+ if (warn_on) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0203 Devloss timeout on "
++ "%d (%d):0203 Devloss timeout on "
+ "WWPN %x:%x:%x:%x:%x:%x:%x:%x "
+ "NPort x%x Data: x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ *name, *(name+1), *(name+2), *(name+3),
+ *(name+4), *(name+5), *(name+6), *(name+7),
+ ndlp->nlp_DID, ndlp->nlp_flag,
+ ndlp->nlp_state, ndlp->nlp_rpi);
+ } else {
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0204 Devloss timeout on "
++ "%d (%d):0204 Devloss timeout on "
+ "WWPN %x:%x:%x:%x:%x:%x:%x:%x "
+ "NPort x%x Data: x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ *name, *(name+1), *(name+2), *(name+3),
+ *(name+4), *(name+5), *(name+6), *(name+7),
+ ndlp->nlp_DID, ndlp->nlp_flag,
+ ndlp->nlp_state, ndlp->nlp_rpi);
+ }
+
+- if (!(phba->fc_flag & FC_UNLOADING) &&
++ if (!(vport->load_flag & FC_UNLOADING) &&
+ !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+ !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
+ (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE))
+- lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
++ lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+ else {
++ int put_node;
++ int put_rport;
++
++ put_node = rdata->pnode != NULL;
++ put_rport = ndlp->rport != NULL;
+ rdata->pnode = NULL;
+ ndlp->rport = NULL;
++ if (put_node)
+ lpfc_nlp_put(ndlp);
++ if (put_rport)
+ put_device(&rport->dev);
+ }
++}
++
+
++void
++lpfc_worker_wake_up(struct lpfc_hba *phba)
++{
++ wake_up(phba->work_wait);
+ return;
+ }
+
+ static void
+-lpfc_work_list_done(struct lpfc_hba * phba)
++lpfc_work_list_done(struct lpfc_hba *phba)
+ {
+ struct lpfc_work_evt *evtp = NULL;
+ struct lpfc_nodelist *ndlp;
++ struct lpfc_vport *vport;
+ int free_evt;
+
+- spin_lock_irq(phba->host->host_lock);
+- while(!list_empty(&phba->work_list)) {
++ spin_lock_irq(&phba->hbalock);
++ while (!list_empty(&phba->work_list)) {
+ list_remove_head((&phba->work_list), evtp, typeof(*evtp),
+ evt_listp);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ free_evt = 1;
+ switch (evtp->evt) {
++ case LPFC_EVT_DEV_LOSS_DELAY:
++ free_evt = 0; /* evt is part of ndlp */
++ ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
++ vport = ndlp->vport;
++ if (!vport)
++ break;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++ "rport devlossdly:did:x%x flg:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
++ if (!(vport->load_flag & FC_UNLOADING) &&
++ !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
++ !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
++ lpfc_disc_state_machine(vport, ndlp, NULL,
++ NLP_EVT_DEVICE_RM);
++ }
++ break;
+ case LPFC_EVT_ELS_RETRY:
+- ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++ ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
+ lpfc_els_retry_delay_handler(ndlp);
++ free_evt = 0; /* evt is part of ndlp */
++ break;
++ case LPFC_EVT_DEV_LOSS:
++ ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++ lpfc_nlp_get(ndlp);
++ lpfc_dev_loss_tmo_handler(ndlp);
+ free_evt = 0;
++ complete((struct completion *)(evtp->evt_arg2));
++ lpfc_nlp_put(ndlp);
+ break;
+ case LPFC_EVT_ONLINE:
+- if (phba->hba_state < LPFC_LINK_DOWN)
+- *(int *)(evtp->evt_arg1) = lpfc_online(phba);
++ if (phba->link_state < LPFC_LINK_DOWN)
++ *(int *) (evtp->evt_arg1) = lpfc_online(phba);
+ else
+- *(int *)(evtp->evt_arg1) = 0;
++ *(int *) (evtp->evt_arg1) = 0;
+ complete((struct completion *)(evtp->evt_arg2));
+ break;
+ case LPFC_EVT_OFFLINE_PREP:
+- if (phba->hba_state >= LPFC_LINK_DOWN)
++ if (phba->link_state >= LPFC_LINK_DOWN)
+ lpfc_offline_prep(phba);
+ *(int *)(evtp->evt_arg1) = 0;
+ complete((struct completion *)(evtp->evt_arg2));
+@@ -218,33 +330,31 @@
+ case LPFC_EVT_KILL:
+ lpfc_offline(phba);
+ *(int *)(evtp->evt_arg1)
+- = (phba->stopped) ? 0 : lpfc_sli_brdkill(phba);
++ = (phba->pport->stopped)
++ ? 0 : lpfc_sli_brdkill(phba);
+ lpfc_unblock_mgmt_io(phba);
+ complete((struct completion *)(evtp->evt_arg2));
+ break;
+ }
+ if (free_evt)
+ kfree(evtp);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ }
+
+-static void
+-lpfc_work_done(struct lpfc_hba * phba)
++void
++lpfc_work_done(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli_ring *pring;
+- int i;
+- uint32_t ha_copy;
+- uint32_t control;
+- uint32_t work_hba_events;
++ uint32_t ha_copy, status, control, work_port_events;
++ struct lpfc_vport *vport;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ ha_copy = phba->work_ha;
+ phba->work_ha = 0;
+- work_hba_events=phba->work_hba_events;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ if (ha_copy & HA_ERATT)
+ lpfc_handle_eratt(phba);
+@@ -255,66 +365,111 @@
+ if (ha_copy & HA_LATT)
+ lpfc_handle_latt(phba);
+
+- if (work_hba_events & WORKER_DISC_TMO)
+- lpfc_disc_timeout_handler(phba);
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (!scsi_host_get(shost)) {
++ continue;
++ }
++ spin_unlock_irq(&phba->hbalock);
++ work_port_events = vport->work_port_events;
++
++ if (work_port_events & WORKER_DISC_TMO)
++ lpfc_disc_timeout_handler(vport);
+
+- if (work_hba_events & WORKER_ELS_TMO)
+- lpfc_els_timeout_handler(phba);
++ if (work_port_events & WORKER_ELS_TMO)
++ lpfc_els_timeout_handler(vport);
+
+- if (work_hba_events & WORKER_MBOX_TMO)
++ if (work_port_events & WORKER_HB_TMO)
++ lpfc_hb_timeout_handler(phba);
++
++ if (work_port_events & WORKER_MBOX_TMO)
+ lpfc_mbox_timeout_handler(phba);
+
+- if (work_hba_events & WORKER_FDMI_TMO)
+- lpfc_fdmi_tmo_handler(phba);
++ if (work_port_events & WORKER_FABRIC_BLOCK_TMO)
++ lpfc_unblock_fabric_iocbs(phba);
++
++ if (work_port_events & WORKER_FDMI_TMO)
++ lpfc_fdmi_timeout_handler(vport);
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->work_hba_events &= ~work_hba_events;
+- spin_unlock_irq(phba->host->host_lock);
+-
+- for (i = 0; i < phba->sli.num_rings; i++, ha_copy >>= 4) {
+- pring = &phba->sli.ring[i];
+- if ((ha_copy & HA_RXATT)
++ if (work_port_events & WORKER_RAMP_DOWN_QUEUE)
++ lpfc_ramp_down_queue_handler(phba);
++
++ if (work_port_events & WORKER_RAMP_UP_QUEUE)
++ lpfc_ramp_up_queue_handler(phba);
++
++ spin_lock_irq(&vport->work_port_lock);
++ vport->work_port_events &= ~work_port_events;
++ spin_unlock_irq(&vport->work_port_lock);
++ scsi_host_put(shost);
++ spin_lock_irq(&phba->hbalock);
++ }
++ spin_unlock_irq(&phba->hbalock);
++
++ pring = &phba->sli.ring[LPFC_ELS_RING];
++ status = (ha_copy & (HA_RXMASK << (4*LPFC_ELS_RING)));
++ status >>= (4*LPFC_ELS_RING);
++ if ((status & HA_RXMASK)
+ || (pring->flag & LPFC_DEFERRED_RING_EVENT)) {
+ if (pring->flag & LPFC_STOP_IOCB_MASK) {
+ pring->flag |= LPFC_DEFERRED_RING_EVENT;
+ } else {
+ lpfc_sli_handle_slow_ring_event(phba, pring,
+- (ha_copy &
++ (status &
+ HA_RXMASK));
+ pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
+ }
+ /*
+ * Turn on Ring interrupts
+ */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ control = readl(phba->HCregaddr);
+- control |= (HC_R0INT_ENA << i);
++ if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
++ control |= (HC_R0INT_ENA << LPFC_ELS_RING);
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
+ }
++ spin_unlock_irq(&phba->hbalock);
+ }
+-
+- lpfc_work_list_done (phba);
+-
++ lpfc_work_list_done(phba);
+ }
+
+ static int
+-check_work_wait_done(struct lpfc_hba *phba) {
++check_work_wait_done(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport;
++ struct lpfc_sli_ring *pring;
++ int rc = 0;
+
+- spin_lock_irq(phba->host->host_lock);
+- if (phba->work_ha ||
+- phba->work_hba_events ||
+- (!list_empty(&phba->work_list)) ||
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ if (vport->work_port_events) {
++ rc = 1;
++ goto exit;
++ }
++ }
++
++ if (phba->work_ha || (!list_empty(&phba->work_list)) ||
+ kthread_should_stop()) {
+- spin_unlock_irq(phba->host->host_lock);
+- return 1;
+- } else {
+- spin_unlock_irq(phba->host->host_lock);
+- return 0;
++ rc = 1;
++ goto exit;
+ }
++
++ pring = &phba->sli.ring[LPFC_ELS_RING];
++ if (pring->flag & LPFC_DEFERRED_RING_EVENT)
++ rc = 1;
++exit:
++ if (rc)
++ phba->work_found++;
++ else
++ phba->work_found = 0;
++
++ spin_unlock_irq(&phba->hbalock);
++ return rc;
+ }
+
++
+ int
+ lpfc_do_work(void *p)
+ {
+@@ -324,11 +479,13 @@
+
+ set_user_nice(current, -20);
+ phba->work_wait = &work_waitq;
++ phba->work_found = 0;
+
+ while (1) {
+
+ rc = wait_event_interruptible(work_waitq,
+ check_work_wait_done(phba));
++
+ BUG_ON(rc);
+
+ if (kthread_should_stop())
+@@ -336,6 +493,17 @@
+
+ lpfc_work_done(phba);
+
++ /* If there is alot of slow ring work, like during link up
++ * check_work_wait_done() may cause this thread to not give
++ * up the CPU for very long periods of time. This may cause
++ * soft lockups or other problems. To avoid these situations
++ * give up the CPU here after LPFC_MAX_WORKER_ITERATION
++ * consecutive iterations.
++ */
++ if (phba->work_found >= LPFC_MAX_WORKER_ITERATION) {
++ phba->work_found = 0;
++ schedule();
++ }
+ }
+ phba->work_wait = NULL;
+ return 0;
+@@ -347,16 +515,17 @@
+ * embedding it in the IOCB.
+ */
+ int
+-lpfc_workq_post_event(struct lpfc_hba * phba, void *arg1, void *arg2,
++lpfc_workq_post_event(struct lpfc_hba *phba, void *arg1, void *arg2,
+ uint32_t evt)
+ {
+ struct lpfc_work_evt *evtp;
++ unsigned long flags;
+
+ /*
+ * All Mailbox completions and LPFC_ELS_RING rcv ring IOCB events will
+ * be queued to worker thread for processing
+ */
+- evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_KERNEL);
++ evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_ATOMIC);
+ if (!evtp)
+ return 0;
+
+@@ -364,136 +533,210 @@
+ evtp->evt_arg2 = arg2;
+ evtp->evt = evt;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irqsave(&phba->hbalock, flags);
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
+
+ return 1;
+ }
+
+-int
+-lpfc_linkdown(struct lpfc_hba *phba)
++void
++lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
+ {
+- struct lpfc_sli *psli;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_nodelist *ndlp, *next_ndlp;
+- LPFC_MBOXQ_t *mb;
+ int rc;
+
+- psli = &phba->sli;
+- /* sysfs or selective reset may call this routine to clean up */
+- if (phba->hba_state >= LPFC_LINK_DOWN) {
+- if (phba->hba_state == LPFC_LINK_DOWN)
+- return 0;
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++ continue;
++
++ if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN)
++ lpfc_unreg_rpi(vport, ndlp);
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->hba_state = LPFC_LINK_DOWN;
+- spin_unlock_irq(phba->host->host_lock);
++ /* Leave Fabric nodes alone on link down */
++ if (!remove && ndlp->nlp_type & NLP_FABRIC)
++ continue;
++ rc = lpfc_disc_state_machine(vport, ndlp, NULL,
++ remove
++ ? NLP_EVT_DEVICE_RM
++ : NLP_EVT_DEVICE_RECOVERY);
++ }
++ if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) {
++ lpfc_mbx_unreg_vpi(vport);
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+ }
++}
++
++static void
++lpfc_linkdown_port(struct lpfc_vport *vport)
++{
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+
+- fc_host_post_event(phba->host, fc_get_event_number(),
+- FCH_EVT_LINKDOWN, 0);
++ fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0);
+
+- /* Clean up any firmware default rpi's */
+- if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+- lpfc_unreg_did(phba, 0xffffffff, mb);
+- mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+- if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
+- == MBX_NOT_FINISHED) {
+- mempool_free( mb, phba->mbox_mem_pool);
+- }
+- }
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Link Down: state:x%x rtry:x%x flg:x%x",
++ vport->port_state, vport->fc_ns_retry, vport->fc_flag);
+
+ /* Cleanup any outstanding RSCN activity */
+- lpfc_els_flush_rscn(phba);
++ lpfc_els_flush_rscn(vport);
+
+ /* Cleanup any outstanding ELS commands */
+- lpfc_els_flush_cmd(phba);
++ lpfc_els_flush_cmd(vport);
++
++ lpfc_cleanup_rpis(vport, 0);
+
+- /*
+- * Issue a LINK DOWN event to all nodes.
+- */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
+ /* free any ndlp's on unused list */
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
++ /* free any ndlp's in unused state */
+ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
+- lpfc_drop_node(phba, ndlp);
+- else /* otherwise, force node recovery. */
+- rc = lpfc_disc_state_machine(phba, ndlp, NULL,
+- NLP_EVT_DEVICE_RECOVERY);
++ lpfc_drop_node(vport, ndlp);
++
++ /* Turn off discovery timer if its running */
++ lpfc_can_disctmo(vport);
++}
++
++int
++lpfc_linkdown(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport = phba->pport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_vport *port_iterator;
++ LPFC_MBOXQ_t *mb;
++
++ if (phba->link_state == LPFC_LINK_DOWN) {
++ return 0;
++ }
++ spin_lock_irq(&phba->hbalock);
++ if (phba->link_state > LPFC_LINK_DOWN) {
++ phba->link_state = LPFC_LINK_DOWN;
++ phba->pport->fc_flag &= ~FC_LBIT;
++ }
++ spin_unlock_irq(&phba->hbalock);
++
++ list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++
++ /* Issue a LINK DOWN event to all nodes */
++ lpfc_linkdown_port(port_iterator);
++ }
++
++ /* Clean up any firmware default rpi's */
++ mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mb) {
++ lpfc_unreg_did(phba, 0xffff, 0xffffffff, mb);
++ mb->vport = vport;
++ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(mb, phba->mbox_mem_pool);
++ }
+ }
+
+ /* Setup myDID for link up if we are in pt2pt mode */
+- if (phba->fc_flag & FC_PT2PT) {
+- phba->fc_myDID = 0;
+- if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
++ if (phba->pport->fc_flag & FC_PT2PT) {
++ phba->pport->fc_myDID = 0;
++ mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mb) {
+ lpfc_config_link(phba, mb);
+- mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+- if (lpfc_sli_issue_mbox
+- (phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ mb->vport = vport;
++ if (lpfc_sli_issue_mbox(phba, mb,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
+ == MBX_NOT_FINISHED) {
+- mempool_free( mb, phba->mbox_mem_pool);
++ mempool_free(mb, phba->mbox_mem_pool);
+ }
+ }
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
++ spin_unlock_irq(shost->host_lock);
+ }
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_LBIT;
+- spin_unlock_irq(phba->host->host_lock);
+-
+- /* Turn off discovery timer if its running */
+- lpfc_can_disctmo(phba);
+
+- /* Must process IOCBs on all rings to handle ABORTed I/Os */
+ return 0;
+ }
+
+-static int
+-lpfc_linkup(struct lpfc_hba *phba)
++static void
++lpfc_linkup_cleanup_nodes(struct lpfc_vport *vport)
+ {
+- struct lpfc_nodelist *ndlp, *next_ndlp;
+-
+- fc_host_post_event(phba->host, fc_get_event_number(),
+- FCH_EVT_LINKUP, 0);
+-
+- spin_lock_irq(phba->host->host_lock);
+- phba->hba_state = LPFC_LINK_UP;
+- phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
+- FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY);
+- phba->fc_flag |= FC_NDISC_ACTIVE;
+- phba->fc_ns_retry = 0;
+- spin_unlock_irq(phba->host->host_lock);
++ struct lpfc_nodelist *ndlp;
+
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++ continue;
+
+- if (phba->fc_flag & FC_LBIT) {
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
+- if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) {
+ if (ndlp->nlp_type & NLP_FABRIC) {
+- /*
+- * On Linkup its safe to clean up the
+- * ndlp from Fabric connections.
++ /* On Linkup its safe to clean up the ndlp
++ * from Fabric connections.
+ */
+- lpfc_nlp_set_state(phba, ndlp,
+- NLP_STE_UNUSED_NODE);
++ if (ndlp->nlp_DID != Fabric_DID)
++ lpfc_unreg_rpi(vport, ndlp);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+- /*
+- * Fail outstanding IO now since
+- * device is marked for PLOGI.
++ /* Fail outstanding IO now since device is
++ * marked for PLOGI.
+ */
+- lpfc_unreg_rpi(phba, ndlp);
+- }
+- }
++ lpfc_unreg_rpi(vport, ndlp);
+ }
+ }
++}
+
+- /* free any ndlp's on unused list */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
+- nlp_listp) {
++static void
++lpfc_linkup_port(struct lpfc_vport *vport)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct lpfc_hba *phba = vport->phba;
++
++ if ((vport->load_flag & FC_UNLOADING) != 0)
++ return;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "Link Up: top:x%x speed:x%x flg:x%x",
++ phba->fc_topology, phba->fc_linkspeed, phba->link_flag);
++
++ /* If NPIV is not enabled, only bring the physical port up */
++ if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ (vport != phba->pport))
++ return;
++
++ fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0);
++
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
++ FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY);
++ vport->fc_flag |= FC_NDISC_ACTIVE;
++ vport->fc_ns_retry = 0;
++ spin_unlock_irq(shost->host_lock);
++
++ if (vport->fc_flag & FC_LBIT)
++ lpfc_linkup_cleanup_nodes(vport);
++
++ /* free any ndlp's in unused state */
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
++ nlp_listp)
+ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
++}
++
++static int
++lpfc_linkup(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport;
++
++ phba->link_state = LPFC_LINK_UP;
++
++ /* Unblock fabric iocbs if they are blocked */
++ clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++ del_timer_sync(&phba->fabric_block_timer);
++
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ lpfc_linkup_port(vport);
+ }
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++ lpfc_issue_clear_la(phba, phba->pport);
+
+ return 0;
+ }
+@@ -505,14 +748,14 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- MAILBOX_t *mb;
++ struct lpfc_vport *vport = pmb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_sli *psli = &phba->sli;
++ MAILBOX_t *mb = &pmb->mb;
+ uint32_t control;
+
+- psli = &phba->sli;
+- mb = &pmb->mb;
+ /* Since we don't do discovery right now, turn these off here */
+ psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+ psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+@@ -522,69 +765,74 @@
+ if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) {
+ /* CLEAR_LA mbox error <mbxStatus> state <hba_state> */
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+- "%d:0320 CLEAR_LA mbxStatus error x%x hba "
++ "%d (%d):0320 CLEAR_LA mbxStatus error x%x hba "
+ "state x%x\n",
+- phba->brd_no, mb->mbxStatus, phba->hba_state);
++ phba->brd_no, vport->vpi, mb->mbxStatus,
++ vport->port_state);
+
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ goto out;
+ }
+
+- if (phba->fc_flag & FC_ABORT_DISCOVERY)
+- goto out;
++ if (vport->port_type == LPFC_PHYSICAL_PORT)
++ phba->link_state = LPFC_HBA_READY;
+
+- phba->num_disc_nodes = 0;
+- /* go thru NPR list and issue ELS PLOGIs */
+- if (phba->fc_npr_cnt) {
+- lpfc_els_disc_plogi(phba);
+- }
++ spin_lock_irq(&phba->hbalock);
++ psli->sli_flag |= LPFC_PROCESS_LA;
++ control = readl(phba->HCregaddr);
++ control |= HC_LAINT_ENA;
++ writel(control, phba->HCregaddr);
++ readl(phba->HCregaddr); /* flush */
++ spin_unlock_irq(&phba->hbalock);
++ return;
++
++ vport->num_disc_nodes = 0;
++ /* go thru NPR nodes and issue ELS PLOGIs */
++ if (vport->fc_npr_cnt)
++ lpfc_els_disc_plogi(vport);
+
+- if (!phba->num_disc_nodes) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_NDISC_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ if (!vport->num_disc_nodes) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(shost->host_lock);
+ }
+
+- phba->hba_state = LPFC_HBA_READY;
++ vport->port_state = LPFC_VPORT_READY;
+
+ out:
+ /* Device Discovery completes */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0225 Device Discovery completes\n",
+- phba->brd_no);
+-
+- mempool_free( pmb, phba->mbox_mem_pool);
+-
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_ABORT_DISCOVERY;
+- if (phba->fc_flag & FC_ESTABLISH_LINK) {
+- phba->fc_flag &= ~FC_ESTABLISH_LINK;
+- }
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0225 Device Discovery completes\n",
++ phba->brd_no, vport->vpi);
++
++ mempool_free(pmb, phba->mbox_mem_pool);
++
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_ABORT_DISCOVERY | FC_ESTABLISH_LINK);
++ spin_unlock_irq(shost->host_lock);
+
+ del_timer_sync(&phba->fc_estabtmo);
+
+- lpfc_can_disctmo(phba);
++ lpfc_can_disctmo(vport);
+
+ /* turn on Link Attention interrupts */
+- spin_lock_irq(phba->host->host_lock);
++
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag |= LPFC_PROCESS_LA;
+ control = readl(phba->HCregaddr);
+ control |= HC_LAINT_ENA;
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return;
+ }
+
++
+ static void
+ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli = &phba->sli;
+- int rc;
++ struct lpfc_vport *vport = pmb->vport;
+
+ if (pmb->mb.mbxStatus)
+ goto out;
+@@ -592,127 +840,110 @@
+ mempool_free(pmb, phba->mbox_mem_pool);
+
+ if (phba->fc_topology == TOPOLOGY_LOOP &&
+- phba->fc_flag & FC_PUBLIC_LOOP &&
+- !(phba->fc_flag & FC_LBIT)) {
++ vport->fc_flag & FC_PUBLIC_LOOP &&
++ !(vport->fc_flag & FC_LBIT)) {
+ /* Need to wait for FAN - use discovery timer
+- * for timeout. hba_state is identically
++ * for timeout. port_state is identically
+ * LPFC_LOCAL_CFG_LINK while waiting for FAN
+ */
+- lpfc_set_disctmo(phba);
++ lpfc_set_disctmo(vport);
+ return;
+ }
+
+- /* Start discovery by sending a FLOGI. hba_state is identically
++ /* Start discovery by sending a FLOGI. port_state is identically
+ * LPFC_FLOGI while waiting for FLOGI cmpl
+ */
+- phba->hba_state = LPFC_FLOGI;
+- lpfc_set_disctmo(phba);
+- lpfc_initial_flogi(phba);
++ if (vport->port_state != LPFC_FLOGI) {
++ vport->port_state = LPFC_FLOGI;
++ lpfc_set_disctmo(vport);
++ lpfc_initial_flogi(vport);
++ }
+ return;
+
+ out:
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+- "%d:0306 CONFIG_LINK mbxStatus error x%x "
++ "%d (%d):0306 CONFIG_LINK mbxStatus error x%x "
+ "HBA state x%x\n",
+- phba->brd_no, pmb->mb.mbxStatus, phba->hba_state);
++ phba->brd_no, vport->vpi, pmb->mb.mbxStatus,
++ vport->port_state);
+
+- lpfc_linkdown(phba);
++ mempool_free(pmb, phba->mbox_mem_pool);
+
+- phba->hba_state = LPFC_HBA_ERROR;
++ lpfc_linkdown(phba);
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0200 CONFIG_LINK bad hba state x%x\n",
+- phba->brd_no, phba->hba_state);
++ "%d (%d):0200 CONFIG_LINK bad hba state x%x\n",
++ phba->brd_no, vport->vpi, vport->port_state);
+
+- lpfc_clear_la(phba, pmb);
+- pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free(pmb, phba->mbox_mem_pool);
+- lpfc_disc_flush_list(phba);
+- psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+- phba->hba_state = LPFC_HBA_READY;
+- }
++ lpfc_issue_clear_la(phba, vport);
+ return;
+ }
+
+ static void
+-lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli = &phba->sli;
+ MAILBOX_t *mb = &pmb->mb;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
++ struct lpfc_vport *vport = pmb->vport;
+
+
+ /* Check for error */
+ if (mb->mbxStatus) {
+ /* READ_SPARAM mbox error <mbxStatus> state <hba_state> */
+ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+- "%d:0319 READ_SPARAM mbxStatus error x%x "
++ "%d (%d):0319 READ_SPARAM mbxStatus error x%x "
+ "hba state x%x>\n",
+- phba->brd_no, mb->mbxStatus, phba->hba_state);
++ phba->brd_no, vport->vpi, mb->mbxStatus,
++ vport->port_state);
+
+ lpfc_linkdown(phba);
+- phba->hba_state = LPFC_HBA_ERROR;
+ goto out;
+ }
+
+- memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt,
++ memcpy((uint8_t *) &vport->fc_sparam, (uint8_t *) mp->virt,
+ sizeof (struct serv_parm));
+ if (phba->cfg_soft_wwnn)
+- u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn);
++ u64_to_wwn(phba->cfg_soft_wwnn,
++ vport->fc_sparam.nodeName.u.wwn);
+ if (phba->cfg_soft_wwpn)
+- u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
+- memcpy((uint8_t *) & phba->fc_nodename,
+- (uint8_t *) & phba->fc_sparam.nodeName,
+- sizeof (struct lpfc_name));
+- memcpy((uint8_t *) & phba->fc_portname,
+- (uint8_t *) & phba->fc_sparam.portName,
+- sizeof (struct lpfc_name));
++ u64_to_wwn(phba->cfg_soft_wwpn,
++ vport->fc_sparam.portName.u.wwn);
++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
++ sizeof(vport->fc_nodename));
++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
++ sizeof(vport->fc_portname));
++ if (vport->port_type == LPFC_PHYSICAL_PORT) {
++ memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn));
++ memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn));
++ }
++
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+- mempool_free( pmb, phba->mbox_mem_pool);
++ mempool_free(pmb, phba->mbox_mem_pool);
+ return;
+
+ out:
+ pmb->context1 = NULL;
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+- if (phba->hba_state != LPFC_CLEAR_LA) {
+- lpfc_clear_la(phba, pmb);
+- pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB))
+- == MBX_NOT_FINISHED) {
+- mempool_free( pmb, phba->mbox_mem_pool);
+- lpfc_disc_flush_list(phba);
+- psli->ring[(psli->extra_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->fcp_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->next_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- phba->hba_state = LPFC_HBA_READY;
+- }
+- } else {
+- mempool_free( pmb, phba->mbox_mem_pool);
+- }
++ lpfc_issue_clear_la(phba, vport);
++ mempool_free(pmb, phba->mbox_mem_pool);
+ return;
+ }
+
+ static void
+ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
+ {
+- int i;
++ struct lpfc_vport *vport = phba->pport;
+ LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox;
++ int i;
+ struct lpfc_dmabuf *mp;
+ int rc;
+
+ sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ switch (la->UlnkSpeed) {
+ case LA_1GHZ_LINK:
+ phba->fc_linkspeed = LA_1GHZ_LINK;
+@@ -732,14 +963,16 @@
+ }
+
+ phba->fc_topology = la->topology;
++ phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED;
+
+ if (phba->fc_topology == TOPOLOGY_LOOP) {
+- /* Get Loop Map information */
++ phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED;
+
++ /* Get Loop Map information */
+ if (la->il)
+- phba->fc_flag |= FC_LBIT;
++ vport->fc_flag |= FC_LBIT;
+
+- phba->fc_myDID = la->granted_AL_PA;
++ vport->fc_myDID = la->granted_AL_PA;
+ i = la->un.lilpBde64.tus.f.bdeSize;
+
+ if (i == 0) {
+@@ -781,14 +1014,20 @@
+ }
+ }
+ } else {
+- phba->fc_myDID = phba->fc_pref_DID;
+- phba->fc_flag |= FC_LBIT;
++ if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) {
++ if (phba->max_vpi && phba->cfg_npiv_enable &&
++ (phba->sli_rev == 3))
++ phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
++ }
++ vport->fc_myDID = phba->fc_pref_DID;
++ vport->fc_flag |= FC_LBIT;
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ lpfc_linkup(phba);
+ if (sparam_mbox) {
+- lpfc_read_sparam(phba, sparam_mbox);
++ lpfc_read_sparam(phba, sparam_mbox, 0);
++ sparam_mbox->vport = vport;
+ sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam;
+ rc = lpfc_sli_issue_mbox(phba, sparam_mbox,
+ (MBX_NOWAIT | MBX_STOP_IOCB));
+@@ -799,36 +1038,48 @@
+ mempool_free(sparam_mbox, phba->mbox_mem_pool);
+ if (cfglink_mbox)
+ mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+- return;
++ goto out;
+ }
+ }
+
+ if (cfglink_mbox) {
+- phba->hba_state = LPFC_LOCAL_CFG_LINK;
++ vport->port_state = LPFC_LOCAL_CFG_LINK;
+ lpfc_config_link(phba, cfglink_mbox);
++ cfglink_mbox->vport = vport;
+ cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
+ rc = lpfc_sli_issue_mbox(phba, cfglink_mbox,
+ (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED)
++ if (rc != MBX_NOT_FINISHED)
++ return;
+ mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+ }
++out:
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d (%d):0263 Discovery Mailbox error: state: 0x%x : %p %p\n",
++ phba->brd_no, vport->vpi,
++ vport->port_state, sparam_mbox, cfglink_mbox);
++
++ lpfc_issue_clear_la(phba, vport);
++ return;
+ }
+
+ static void
+-lpfc_mbx_issue_link_down(struct lpfc_hba *phba) {
++lpfc_mbx_issue_link_down(struct lpfc_hba *phba)
++{
+ uint32_t control;
+ struct lpfc_sli *psli = &phba->sli;
+
+ lpfc_linkdown(phba);
+
+ /* turn on Link Attention interrupts - no CLEAR_LA needed */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag |= LPFC_PROCESS_LA;
+ control = readl(phba->HCregaddr);
+ control |= HC_LAINT_ENA;
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ }
+
+ /*
+@@ -838,22 +1089,21 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
++ struct lpfc_vport *vport = pmb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ READ_LA_VAR *la;
+ MAILBOX_t *mb = &pmb->mb;
+ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+
+ /* Check for error */
+ if (mb->mbxStatus) {
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_LINK_EVENT,
++ lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
+ "%d:1307 READ_LA mbox error x%x state x%x\n",
+- phba->brd_no,
+- mb->mbxStatus, phba->hba_state);
++ phba->brd_no, mb->mbxStatus, vport->port_state);
+ lpfc_mbx_issue_link_down(phba);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ goto lpfc_mbx_cmpl_read_la_free_mbuf;
+ }
+
+@@ -861,27 +1111,26 @@
+
+ memcpy(&phba->alpa_map[0], mp->virt, 128);
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ if (la->pb)
+- phba->fc_flag |= FC_BYPASSED_MODE;
++ vport->fc_flag |= FC_BYPASSED_MODE;
+ else
+- phba->fc_flag &= ~FC_BYPASSED_MODE;
+- spin_unlock_irq(phba->host->host_lock);
++ vport->fc_flag &= ~FC_BYPASSED_MODE;
++ spin_unlock_irq(shost->host_lock);
+
+ if (((phba->fc_eventTag + 1) < la->eventTag) ||
+ (phba->fc_eventTag == la->eventTag)) {
+ phba->fc_stat.LinkMultiEvent++;
+- if (la->attType == AT_LINK_UP) {
++ if (la->attType == AT_LINK_UP)
+ if (phba->fc_eventTag != 0)
+ lpfc_linkdown(phba);
+ }
+- }
+
+ phba->fc_eventTag = la->eventTag;
+
+ if (la->attType == AT_LINK_UP) {
+ phba->fc_stat.LinkUp++;
+- if (phba->fc_flag & FC_LOOPBACK_MODE) {
++ if (phba->link_flag & LS_LOOPBACK_MODE) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
+ "%d:1306 Link Up Event in loop back mode "
+ "x%x received Data: x%x x%x x%x x%x\n",
+@@ -903,7 +1152,7 @@
+ "%d:1305 Link Down Event x%x received "
+ "Data: x%x x%x x%x\n",
+ phba->brd_no, la->eventTag, phba->fc_eventTag,
+- phba->hba_state, phba->fc_flag);
++ phba->pport->port_state, vport->fc_flag);
+ lpfc_mbx_issue_link_down(phba);
+ }
+
+@@ -921,31 +1170,115 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- MAILBOX_t *mb;
+- struct lpfc_dmabuf *mp;
+- struct lpfc_nodelist *ndlp;
+-
+- psli = &phba->sli;
+- mb = &pmb->mb;
+-
+- ndlp = (struct lpfc_nodelist *) pmb->context2;
+- mp = (struct lpfc_dmabuf *) (pmb->context1);
++ struct lpfc_vport *vport = pmb->vport;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+
+ pmb->context1 = NULL;
+
+ /* Good status, call state machine */
+- lpfc_disc_state_machine(phba, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN);
++ lpfc_disc_state_machine(vport, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN);
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+- mempool_free( pmb, phba->mbox_mem_pool);
++ mempool_free(pmb, phba->mbox_mem_pool);
+ lpfc_nlp_put(ndlp);
+
+ return;
+ }
+
++static void
++lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_vport *vport = pmb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ switch (mb->mbxStatus) {
++ case 0x0011:
++ case 0x0020:
++ case 0x9700:
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d (%d):0911 cmpl_unreg_vpi, "
++ "mb status = 0x%x\n",
++ phba->brd_no, vport->vpi, mb->mbxStatus);
++ break;
++ }
++ vport->unreg_vpi_cmpl = VPORT_OK;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ /*
++ * This shost reference might have been taken at the beginning of
++ * lpfc_vport_delete()
++ */
++ if (vport->load_flag & FC_UNLOADING)
++ scsi_host_put(shost);
++}
++
++void
++lpfc_mbx_unreg_vpi(struct lpfc_vport *vport)
++{
++ struct lpfc_hba *phba = vport->phba;
++ LPFC_MBOXQ_t *mbox;
++ int rc;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox)
++ return;
++
++ lpfc_unreg_vpi(phba, vport->vpi, mbox);
++ mbox->vport = vport;
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_unreg_vpi;
++ rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++ if (rc == MBX_NOT_FINISHED) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
++ "%d (%d):1800 Could not issue unreg_vpi\n",
++ phba->brd_no, vport->vpi);
++ mempool_free(mbox, phba->mbox_mem_pool);
++ vport->unreg_vpi_cmpl = VPORT_ERROR;
++ }
++}
++
++static void
++lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++ struct lpfc_vport *vport = pmb->vport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ MAILBOX_t *mb = &pmb->mb;
++
++ switch (mb->mbxStatus) {
++ case 0x0011:
++ case 0x9601:
++ case 0x9602:
++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++ "%d (%d):0912 cmpl_reg_vpi, mb status = 0x%x\n",
++ phba->brd_no, vport->vpi, mb->mbxStatus);
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++ spin_unlock_irq(shost->host_lock);
++ vport->fc_myDID = 0;
++ goto out;
++ }
++
++ vport->num_disc_nodes = 0;
++ /* go thru NPR list and issue ELS PLOGIs */
++ if (vport->fc_npr_cnt)
++ lpfc_els_disc_plogi(vport);
++
++ if (!vport->num_disc_nodes) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
++ }
++ vport->port_state = LPFC_VPORT_READY;
++
++out:
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return;
++}
++
+ /*
+ * This routine handles processing a Fabric REG_LOGIN mailbox
+ * command upon completion. It is setup in the LPFC_MBOXQ
+@@ -953,20 +1286,14 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- MAILBOX_t *mb;
+- struct lpfc_dmabuf *mp;
++ struct lpfc_vport *vport = pmb->vport;
++ struct lpfc_vport *next_vport;
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+ struct lpfc_nodelist *ndlp;
+- struct lpfc_nodelist *ndlp_fdmi;
+-
+-
+- psli = &phba->sli;
+- mb = &pmb->mb;
+-
+ ndlp = (struct lpfc_nodelist *) pmb->context2;
+- mp = (struct lpfc_dmabuf *) (pmb->context1);
+
+ pmb->context1 = NULL;
+ pmb->context2 = NULL;
+@@ -977,60 +1304,46 @@
+ mempool_free(pmb, phba->mbox_mem_pool);
+ lpfc_nlp_put(ndlp);
+
+- /* FLOGI failed, so just use loop map to make discovery list */
+- lpfc_disc_list_loopmap(phba);
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ /* FLOGI failed, use loop map to make discovery list */
++ lpfc_disc_list_loopmap(vport);
+
+ /* Start discovery */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
++ return;
++ }
++
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++ "%d (%d):0258 Register Fabric login error: 0x%x\n",
++ phba->brd_no, vport->vpi, mb->mbxStatus);
++
+ return;
+ }
+
+ ndlp->nlp_rpi = mb->un.varWords[0];
+ ndlp->nlp_type |= NLP_FABRIC;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+
+ lpfc_nlp_put(ndlp); /* Drop the reference from the mbox */
+
+- if (phba->hba_state == LPFC_FABRIC_CFG_LINK) {
+- /* This NPort has been assigned an NPort_ID by the fabric as a
+- * result of the completed fabric login. Issue a State Change
+- * Registration (SCR) ELS request to the fabric controller
+- * (SCR_DID) so that this NPort gets RSCN events from the
+- * fabric.
+- */
+- lpfc_issue_els_scr(phba, SCR_DID, 0);
++ if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
++ list_for_each_entry(next_vport, &phba->port_list, listentry) {
++ if (next_vport->port_type == LPFC_PHYSICAL_PORT)
++ continue;
+
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
+- if (!ndlp) {
+- /* Allocate a new node instance. If the pool is empty,
+- * start the discovery process and skip the Nameserver
+- * login process. This is attempted again later on.
+- * Otherwise, issue a Port Login (PLOGI) to NameServer.
+- */
+- ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+- if (!ndlp) {
+- lpfc_disc_start(phba);
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
+- kfree(mp);
+- mempool_free(pmb, phba->mbox_mem_pool);
+- return;
+- } else {
+- lpfc_nlp_init(phba, ndlp, NameServer_DID);
+- ndlp->nlp_type |= NLP_FABRIC;
+- }
+- }
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, NameServer_DID, 0);
+- if (phba->cfg_fdmi_on) {
+- ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool,
+- GFP_KERNEL);
+- if (ndlp_fdmi) {
+- lpfc_nlp_init(phba, ndlp_fdmi, FDMI_DID);
+- ndlp_fdmi->nlp_type |= NLP_FABRIC;
+- ndlp_fdmi->nlp_state = NLP_STE_PLOGI_ISSUE;
+- lpfc_issue_els_plogi(phba, FDMI_DID, 0);
++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
++ lpfc_initial_fdisc(next_vport);
++ else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++ lpfc_vport_set_state(vport,
++ FC_VPORT_NO_FABRIC_SUPP);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0259 No NPIV Fabric "
++ "support\n",
++ phba->brd_no, vport->vpi);
+ }
+ }
++ lpfc_do_scr_ns_plogi(phba, vport);
+ }
+
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -1046,32 +1359,36 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- MAILBOX_t *mb;
+- struct lpfc_dmabuf *mp;
+- struct lpfc_nodelist *ndlp;
+-
+- psli = &phba->sli;
+- mb = &pmb->mb;
+-
+- ndlp = (struct lpfc_nodelist *) pmb->context2;
+- mp = (struct lpfc_dmabuf *) (pmb->context1);
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++ struct lpfc_vport *vport = pmb->vport;
+
+ if (mb->mbxStatus) {
++out:
+ lpfc_nlp_put(ndlp);
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ mempool_free(pmb, phba->mbox_mem_pool);
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+
+- /* RegLogin failed, so just use loop map to make discovery
+- list */
+- lpfc_disc_list_loopmap(phba);
++ if (phba->fc_topology == TOPOLOGY_LOOP) {
++ /*
++ * RegLogin failed, use loop map to make discovery
++ * list
++ */
++ lpfc_disc_list_loopmap(vport);
+
+ /* Start discovery */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
++ return;
++ }
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0260 Register NameServer error: 0x%x\n",
++ phba->brd_no, vport->vpi, mb->mbxStatus);
+ return;
+ }
+
+@@ -1079,37 +1396,43 @@
+
+ ndlp->nlp_rpi = mb->un.varWords[0];
+ ndlp->nlp_type |= NLP_FABRIC;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+
+- if (phba->hba_state < LPFC_HBA_READY) {
+- /* Link up discovery requires Fabrib registration. */
+- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RNN_ID);
+- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RSNN_NN);
+- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFT_ID);
+- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFF_ID);
++ if (vport->port_state < LPFC_VPORT_READY) {
++ /* Link up discovery requires Fabric registration. */
++ lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0); /* Do this first! */
++ lpfc_ns_cmd(vport, SLI_CTNS_RNN_ID, 0, 0);
++ lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0);
++ lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
++ lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0);
++
++ /* Issue SCR just before NameServer GID_FT Query */
++ lpfc_issue_els_scr(vport, SCR_DID, 0);
+ }
+
+- phba->fc_ns_retry = 0;
++ vport->fc_ns_retry = 0;
+ /* Good status, issue CT Request to NameServer */
+- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT)) {
++ if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) {
+ /* Cannot issue NameServer Query, so finish up discovery */
+- lpfc_disc_start(phba);
++ goto out;
+ }
+
+ lpfc_nlp_put(ndlp);
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+- mempool_free( pmb, phba->mbox_mem_pool);
++ mempool_free(pmb, phba->mbox_mem_pool);
+
+ return;
+ }
+
+ static void
+-lpfc_register_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct fc_rport *rport;
+ struct lpfc_rport_data *rdata;
+ struct fc_rport_identifiers rport_ids;
++ struct lpfc_hba *phba = vport->phba;
+
+ /* Remote port has reappeared. Re-register w/ FC transport */
+ rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+@@ -1125,10 +1448,15 @@
+ * registered the port.
+ */
+ if (ndlp->rport && ndlp->rport->dd_data &&
+- *(struct lpfc_rport_data **) ndlp->rport->dd_data) {
++ ((struct lpfc_rport_data *) ndlp->rport->dd_data)->pnode == ndlp) {
+ lpfc_nlp_put(ndlp);
+ }
+- ndlp->rport = rport = fc_remote_port_add(phba->host, 0, &rport_ids);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++ "rport add: did:x%x flg:x%x type x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
++
++ ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids);
+ if (!rport || !get_device(&rport->dev)) {
+ dev_printk(KERN_WARNING, &phba->pcidev->dev,
+ "Warning: fc_remote_port_add failed\n");
+@@ -1154,22 +1482,17 @@
+ (rport->scsi_target_id < LPFC_MAX_TARGET)) {
+ ndlp->nlp_sid = rport->scsi_target_id;
+ }
+-
+ return;
+ }
+
+ static void
+-lpfc_unregister_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
+ {
+ struct fc_rport *rport = ndlp->rport;
+- struct lpfc_rport_data *rdata = rport->dd_data;
+
+- if (rport->scsi_target_id == -1) {
+- ndlp->rport = NULL;
+- rdata->pnode = NULL;
+- lpfc_nlp_put(ndlp);
+- put_device(&rport->dev);
+- }
++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
++ "rport delete: did:x%x flg:x%x type x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
+
+ fc_remote_port_delete(rport);
+
+@@ -1177,42 +1500,46 @@
+ }
+
+ static void
+-lpfc_nlp_counters(struct lpfc_hba *phba, int state, int count)
++lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
+ {
+- spin_lock_irq(phba->host->host_lock);
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ spin_lock_irq(shost->host_lock);
+ switch (state) {
+ case NLP_STE_UNUSED_NODE:
+- phba->fc_unused_cnt += count;
++ vport->fc_unused_cnt += count;
+ break;
+ case NLP_STE_PLOGI_ISSUE:
+- phba->fc_plogi_cnt += count;
++ vport->fc_plogi_cnt += count;
+ break;
+ case NLP_STE_ADISC_ISSUE:
+- phba->fc_adisc_cnt += count;
++ vport->fc_adisc_cnt += count;
+ break;
+ case NLP_STE_REG_LOGIN_ISSUE:
+- phba->fc_reglogin_cnt += count;
++ vport->fc_reglogin_cnt += count;
+ break;
+ case NLP_STE_PRLI_ISSUE:
+- phba->fc_prli_cnt += count;
++ vport->fc_prli_cnt += count;
+ break;
+ case NLP_STE_UNMAPPED_NODE:
+- phba->fc_unmap_cnt += count;
++ vport->fc_unmap_cnt += count;
+ break;
+ case NLP_STE_MAPPED_NODE:
+- phba->fc_map_cnt += count;
++ vport->fc_map_cnt += count;
+ break;
+ case NLP_STE_NPR_NODE:
+- phba->fc_npr_cnt += count;
++ vport->fc_npr_cnt += count;
+ break;
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ }
+
+ static void
+-lpfc_nlp_state_cleanup(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
++lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ int old_state, int new_state)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ if (new_state == NLP_STE_UNMAPPED_NODE) {
+ ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
+ ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
+@@ -1226,21 +1553,20 @@
+ /* Transport interface */
+ if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE ||
+ old_state == NLP_STE_UNMAPPED_NODE)) {
+- phba->nport_event_cnt++;
+- lpfc_unregister_remote_port(phba, ndlp);
++ vport->phba->nport_event_cnt++;
++ lpfc_unregister_remote_port(ndlp);
+ }
+
+ if (new_state == NLP_STE_MAPPED_NODE ||
+ new_state == NLP_STE_UNMAPPED_NODE) {
+- phba->nport_event_cnt++;
++ vport->phba->nport_event_cnt++;
+ /*
+ * Tell the fc transport about the port, if we haven't
+ * already. If we have, and it's a scsi entity, be
+ * sure to unblock any attached scsi devices
+ */
+- lpfc_register_remote_port(phba, ndlp);
++ lpfc_register_remote_port(vport, ndlp);
+ }
+-
+ /*
+ * if we added to Mapped list, but the remote port
+ * registration failed or assigned a target id outside
+@@ -1251,10 +1577,10 @@
+ (!ndlp->rport ||
+ ndlp->rport->scsi_target_id == -1 ||
+ ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_TGT_NO_SCSIID;
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ }
+ }
+
+@@ -1280,61 +1606,74 @@
+ }
+
+ void
+-lpfc_nlp_set_state(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int state)
++lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ int state)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ int old_state = ndlp->nlp_state;
+ char name1[16], name2[16];
+
+- lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+- "%d:0904 NPort state transition x%06x, %s -> %s\n",
+- phba->brd_no,
++ lpfc_printf_log(vport->phba, KERN_INFO, LOG_NODE,
++ "%d (%d):0904 NPort state transition x%06x, %s -> %s\n",
++ vport->phba->brd_no, vport->vpi,
+ ndlp->nlp_DID,
+ lpfc_nlp_state_name(name1, sizeof(name1), old_state),
+ lpfc_nlp_state_name(name2, sizeof(name2), state));
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE,
++ "node statechg did:x%x old:%d ste:%d",
++ ndlp->nlp_DID, old_state, state);
++
+ if (old_state == NLP_STE_NPR_NODE &&
+ (ndlp->nlp_flag & NLP_DELAY_TMO) != 0 &&
+ state != NLP_STE_NPR_NODE)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ if (old_state == NLP_STE_UNMAPPED_NODE) {
+ ndlp->nlp_flag &= ~NLP_TGT_NO_SCSIID;
+ ndlp->nlp_type &= ~NLP_FC_NODE;
+ }
+
+ if (list_empty(&ndlp->nlp_listp)) {
+- spin_lock_irq(phba->host->host_lock);
+- list_add_tail(&ndlp->nlp_listp, &phba->fc_nodes);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes);
++ spin_unlock_irq(shost->host_lock);
+ } else if (old_state)
+- lpfc_nlp_counters(phba, old_state, -1);
++ lpfc_nlp_counters(vport, old_state, -1);
+
+ ndlp->nlp_state = state;
+- lpfc_nlp_counters(phba, state, 1);
+- lpfc_nlp_state_cleanup(phba, ndlp, old_state, state);
++ lpfc_nlp_counters(vport, state, 1);
++ lpfc_nlp_state_cleanup(vport, ndlp, old_state, state);
+ }
+
+ void
+-lpfc_dequeue_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_dequeue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp))
+- lpfc_nlp_counters(phba, ndlp->nlp_state, -1);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_counters(vport, ndlp->nlp_state, -1);
++ spin_lock_irq(shost->host_lock);
+ list_del_init(&ndlp->nlp_listp);
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_nlp_state_cleanup(phba, ndlp, ndlp->nlp_state, 0);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_nlp_state_cleanup(vport, ndlp, ndlp->nlp_state,
++ NLP_STE_UNUSED_NODE);
+ }
+
+ void
+-lpfc_drop_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp))
+- lpfc_nlp_counters(phba, ndlp->nlp_state, -1);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_counters(vport, ndlp->nlp_state, -1);
++ spin_lock_irq(shost->host_lock);
+ list_del_init(&ndlp->nlp_listp);
+- spin_unlock_irq(phba->host->host_lock);
++ ndlp->nlp_flag &= ~NLP_TARGET_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ lpfc_nlp_put(ndlp);
+ }
+
+@@ -1342,11 +1681,13 @@
+ * Start / ReStart rescue timer for Discovery / RSCN handling
+ */
+ void
+-lpfc_set_disctmo(struct lpfc_hba * phba)
++lpfc_set_disctmo(struct lpfc_vport *vport)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t tmo;
+
+- if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++ if (vport->port_state == LPFC_LOCAL_CFG_LINK) {
+ /* For FAN, timeout should be greater then edtov */
+ tmo = (((phba->fc_edtov + 999) / 1000) + 1);
+ } else {
+@@ -1356,18 +1697,25 @@
+ tmo = ((phba->fc_ratov * 3) + 3);
+ }
+
+- mod_timer(&phba->fc_disctmo, jiffies + HZ * tmo);
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_DISC_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++
++ if (!timer_pending(&vport->fc_disctmo)) {
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "set disc timer: tmo:x%x state:x%x flg:x%x",
++ tmo, vport->port_state, vport->fc_flag);
++ }
++
++ mod_timer(&vport->fc_disctmo, jiffies + HZ * tmo);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_DISC_TMO;
++ spin_unlock_irq(shost->host_lock);
+
+ /* Start Discovery Timer state <hba_state> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0247 Start Discovery Timer state x%x "
++ "%d (%d):0247 Start Discovery Timer state x%x "
+ "Data: x%x x%lx x%x x%x\n",
+- phba->brd_no,
+- phba->hba_state, tmo, (unsigned long)&phba->fc_disctmo,
+- phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++ phba->brd_no, vport->vpi, vport->port_state, tmo,
++ (unsigned long)&vport->fc_disctmo, vport->fc_plogi_cnt,
++ vport->fc_adisc_cnt);
+
+ return;
+ }
+@@ -1376,23 +1724,34 @@
+ * Cancel rescue timer for Discovery / RSCN handling
+ */
+ int
+-lpfc_can_disctmo(struct lpfc_hba * phba)
++lpfc_can_disctmo(struct lpfc_vport *vport)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ unsigned long iflags;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "can disc timer: state:x%x rtry:x%x flg:x%x",
++ vport->port_state, vport->fc_ns_retry, vport->fc_flag);
++
+ /* Turn off discovery timer if its running */
+- if (phba->fc_flag & FC_DISC_TMO) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_DISC_TMO;
+- spin_unlock_irq(phba->host->host_lock);
+- del_timer_sync(&phba->fc_disctmo);
+- phba->work_hba_events &= ~WORKER_DISC_TMO;
++ if (vport->fc_flag & FC_DISC_TMO) {
++ spin_lock_irqsave(shost->host_lock, iflags);
++ vport->fc_flag &= ~FC_DISC_TMO;
++ spin_unlock_irqrestore(shost->host_lock, iflags);
++ del_timer_sync(&vport->fc_disctmo);
++ spin_lock_irqsave(&vport->work_port_lock, iflags);
++ vport->work_port_events &= ~WORKER_DISC_TMO;
++ spin_unlock_irqrestore(&vport->work_port_lock, iflags);
+ }
+
+ /* Cancel Discovery Timer state <hba_state> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0248 Cancel Discovery Timer state x%x "
++ "%d (%d):0248 Cancel Discovery Timer state x%x "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->hba_state, phba->fc_flag,
+- phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++ phba->brd_no, vport->vpi, vport->port_state,
++ vport->fc_flag, vport->fc_plogi_cnt,
++ vport->fc_adisc_cnt);
+
+ return 0;
+ }
+@@ -1402,15 +1761,18 @@
+ * Return true if iocb matches the specified nport
+ */
+ int
+-lpfc_check_sli_ndlp(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring,
+- struct lpfc_iocbq * iocb, struct lpfc_nodelist * ndlp)
++lpfc_check_sli_ndlp(struct lpfc_hba *phba,
++ struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *iocb,
++ struct lpfc_nodelist *ndlp)
+ {
+- struct lpfc_sli *psli;
+- IOCB_t *icmd;
++ struct lpfc_sli *psli = &phba->sli;
++ IOCB_t *icmd = &iocb->iocb;
++ struct lpfc_vport *vport = ndlp->vport;
++
++ if (iocb->vport != vport)
++ return 0;
+
+- psli = &phba->sli;
+- icmd = &iocb->iocb;
+ if (pring->ringno == LPFC_ELS_RING) {
+ switch (icmd->ulpCommand) {
+ case CMD_GEN_REQUEST64_CR:
+@@ -1445,7 +1807,7 @@
+ * associated with nlp_rpi in the LPFC_NODELIST entry.
+ */
+ static int
+-lpfc_no_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ LIST_HEAD(completions);
+ struct lpfc_sli *psli;
+@@ -1454,6 +1816,8 @@
+ IOCB_t *icmd;
+ uint32_t rpi, i;
+
++ lpfc_fabric_abort_nport(ndlp);
++
+ /*
+ * Everything that matches on txcmplq will be returned
+ * by firmware with a no rpi error.
+@@ -1465,15 +1829,15 @@
+ for (i = 0; i < psli->num_rings; i++) {
+ pring = &psli->ring[i];
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txq,
+ list) {
+ /*
+ * Check to see if iocb matches the nport we are
+ * looking for
+ */
+- if ((lpfc_check_sli_ndlp
+- (phba, pring, iocb, ndlp))) {
++ if ((lpfc_check_sli_ndlp(phba, pring, iocb,
++ ndlp))) {
+ /* It matches, so deque and call compl
+ with an error */
+ list_move_tail(&iocb->list,
+@@ -1481,22 +1845,22 @@
+ pring->txq_cnt--;
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
+-
++ spin_unlock_irq(&phba->hbalock);
+ }
+ }
+
+ while (!list_empty(&completions)) {
+ iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+- list_del(&iocb->list);
++ list_del_init(&iocb->list);
+
+- if (iocb->iocb_cmpl) {
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
+ icmd = &iocb->iocb;
+ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+- (iocb->iocb_cmpl) (phba, iocb, iocb);
+- } else
+- lpfc_sli_release_iocbq(phba, iocb);
++ (iocb->iocb_cmpl)(phba, iocb, iocb);
++ }
+ }
+
+ return 0;
+@@ -1512,19 +1876,22 @@
+ * we are waiting to PLOGI back to the remote NPort.
+ */
+ int
+-lpfc_unreg_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ LPFC_MBOXQ_t *mbox;
+ int rc;
+
+ if (ndlp->nlp_rpi) {
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+- lpfc_unreg_login(phba, ndlp->nlp_rpi, mbox);
+- mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+- rc = lpfc_sli_issue_mbox
+- (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mbox) {
++ lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox);
++ mbox->vport = vport;
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ rc = lpfc_sli_issue_mbox(phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB));
+ if (rc == MBX_NOT_FINISHED)
+- mempool_free( mbox, phba->mbox_mem_pool);
++ mempool_free(mbox, phba->mbox_mem_pool);
+ }
+ lpfc_no_rpi(phba, ndlp);
+ ndlp->nlp_rpi = 0;
+@@ -1533,25 +1900,70 @@
+ return 0;
+ }
+
++void
++lpfc_unreg_all_rpis(struct lpfc_vport *vport)
++{
++ struct lpfc_hba *phba = vport->phba;
++ LPFC_MBOXQ_t *mbox;
++ int rc;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mbox) {
++ lpfc_unreg_login(phba, vport->vpi, 0xffff, mbox);
++ mbox->vport = vport;
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ rc = lpfc_sli_issue_mbox(phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB));
++ if (rc == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ }
++ }
++}
++
++void
++lpfc_unreg_default_rpis(struct lpfc_vport *vport)
++{
++ struct lpfc_hba *phba = vport->phba;
++ LPFC_MBOXQ_t *mbox;
++ int rc;
++
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (mbox) {
++ lpfc_unreg_did(phba, vport->vpi, 0xffffffff, mbox);
++ mbox->vport = vport;
++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ rc = lpfc_sli_issue_mbox(phba, mbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB));
++ if (rc == MBX_NOT_FINISHED) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
++ "%d (%d):1815 Could not issue "
++ "unreg_did (default rpis)\n",
++ phba->brd_no, vport->vpi);
++ mempool_free(mbox, phba->mbox_mem_pool);
++ }
++ }
++}
++
+ /*
+ * Free resources associated with LPFC_NODELIST entry
+ * so it can be freed.
+ */
+ static int
+-lpfc_cleanup_node(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
+- LPFC_MBOXQ_t *mb;
+- LPFC_MBOXQ_t *nextmb;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ LPFC_MBOXQ_t *mb, *nextmb;
+ struct lpfc_dmabuf *mp;
+
+ /* Cleanup node for NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+- "%d:0900 Cleanup node for NPort x%x "
++ "%d (%d):0900 Cleanup node for NPort x%x "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_flag,
+ ndlp->nlp_state, ndlp->nlp_rpi);
+
+- lpfc_dequeue_node(phba, ndlp);
++ lpfc_dequeue_node(vport, ndlp);
+
+ /* cleanup any ndlp on mbox q waiting for reglogin cmpl */
+ if ((mb = phba->sli.mbox_active)) {
+@@ -1562,13 +1974,13 @@
+ }
+ }
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
+ if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
+ mp = (struct lpfc_dmabuf *) (mb->context1);
+ if (mp) {
+- lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ __lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ }
+ list_del(&mb->list);
+@@ -1576,20 +1988,27 @@
+ lpfc_nlp_put(ndlp);
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ lpfc_els_abort(phba,ndlp);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+
+ ndlp->nlp_last_elscmd = 0;
+ del_timer_sync(&ndlp->nlp_delayfunc);
+
+ if (!list_empty(&ndlp->els_retry_evt.evt_listp))
+ list_del_init(&ndlp->els_retry_evt.evt_listp);
++ if (!list_empty(&ndlp->dev_loss_evt.evt_listp))
++ list_del_init(&ndlp->dev_loss_evt.evt_listp);
++
++ if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) {
++ list_del_init(&ndlp->dev_loss_evt.evt_listp);
++ complete((struct completion *)(ndlp->dev_loss_evt.evt_arg2));
++ }
+
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+
+ return 0;
+ }
+@@ -1600,18 +2019,22 @@
+ * machine, defer the free till we reach the end of the state machine.
+ */
+ static void
+-lpfc_nlp_remove(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
+ struct lpfc_rport_data *rdata;
+
+ if (ndlp->nlp_flag & NLP_DELAY_TMO) {
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ }
+
+- lpfc_cleanup_node(phba, ndlp);
++ lpfc_cleanup_node(vport, ndlp);
+
+- if ((ndlp->rport) && !(phba->fc_flag & FC_UNLOADING)) {
+- put_device(&ndlp->rport->dev);
++ /*
++ * We can get here with a non-NULL ndlp->rport because when we
++ * unregister a rport we don't break the rport/node linkage. So if we
++ * do, make sure we don't leaving any dangling pointers behind.
++ */
++ if (ndlp->rport) {
+ rdata = ndlp->rport->dd_data;
+ rdata->pnode = NULL;
+ ndlp->rport = NULL;
+@@ -1619,11 +2042,10 @@
+ }
+
+ static int
+-lpfc_matchdid(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did)
++lpfc_matchdid(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ uint32_t did)
+ {
+- D_ID mydid;
+- D_ID ndlpdid;
+- D_ID matchdid;
++ D_ID mydid, ndlpdid, matchdid;
+
+ if (did == Bcast_DID)
+ return 0;
+@@ -1637,7 +2059,7 @@
+ return 1;
+
+ /* Next check for area/domain identically equals 0 match */
+- mydid.un.word = phba->fc_myDID;
++ mydid.un.word = vport->fc_myDID;
+ if ((mydid.un.b.domain == 0) && (mydid.un.b.area == 0)) {
+ return 0;
+ }
+@@ -1669,101 +2091,116 @@
+ }
+
+ /* Search for a nodelist entry */
+-struct lpfc_nodelist *
+-lpfc_findnode_did(struct lpfc_hba *phba, uint32_t did)
++static struct lpfc_nodelist *
++__lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_nodelist *ndlp;
+ uint32_t data1;
+
+- spin_lock_irq(phba->host->host_lock);
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
+- if (lpfc_matchdid(phba, ndlp, did)) {
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++ if (lpfc_matchdid(vport, ndlp, did)) {
+ data1 = (((uint32_t) ndlp->nlp_state << 24) |
+ ((uint32_t) ndlp->nlp_xri << 16) |
+ ((uint32_t) ndlp->nlp_type << 8) |
+ ((uint32_t) ndlp->nlp_rpi & 0xff));
+ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+- "%d:0929 FIND node DID "
++ "%d (%d):0929 FIND node DID "
+ " Data: x%p x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ ndlp, ndlp->nlp_DID,
+ ndlp->nlp_flag, data1);
+- spin_unlock_irq(phba->host->host_lock);
+ return ndlp;
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
+
+ /* FIND node did <did> NOT FOUND */
+ lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+- "%d:0932 FIND node did x%x NOT FOUND.\n",
+- phba->brd_no, did);
++ "%d (%d):0932 FIND node did x%x NOT FOUND.\n",
++ phba->brd_no, vport->vpi, did);
+ return NULL;
+ }
+
+ struct lpfc_nodelist *
+-lpfc_setup_disc_node(struct lpfc_hba * phba, uint32_t did)
++lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_nodelist *ndlp;
++
++ spin_lock_irq(shost->host_lock);
++ ndlp = __lpfc_findnode_did(vport, did);
++ spin_unlock_irq(shost->host_lock);
++ return ndlp;
++}
++
++struct lpfc_nodelist *
++lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp;
+
+- ndlp = lpfc_findnode_did(phba, did);
++ ndlp = lpfc_findnode_did(vport, did);
+ if (!ndlp) {
+- if ((phba->fc_flag & FC_RSCN_MODE) &&
+- ((lpfc_rscn_payload_check(phba, did) == 0)))
++ if ((vport->fc_flag & FC_RSCN_MODE) != 0 &&
++ lpfc_rscn_payload_check(vport, did) == 0)
+ return NULL;
+ ndlp = (struct lpfc_nodelist *)
+- mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++ mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+ if (!ndlp)
+ return NULL;
+- lpfc_nlp_init(phba, ndlp, did);
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_init(vport, ndlp, did);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp;
+ }
+- if (phba->fc_flag & FC_RSCN_MODE) {
+- if (lpfc_rscn_payload_check(phba, did)) {
++ if (vport->fc_flag & FC_RSCN_MODE) {
++ if (lpfc_rscn_payload_check(vport, did)) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ spin_unlock_irq(shost->host_lock);
+
+ /* Since this node is marked for discovery,
+ * delay timeout is not needed.
+ */
+ if (ndlp->nlp_flag & NLP_DELAY_TMO)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ } else
+ ndlp = NULL;
+ } else {
+ if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE ||
+ ndlp->nlp_state == NLP_STE_PLOGI_ISSUE)
+ return NULL;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++ spin_unlock_irq(shost->host_lock);
+ }
+ return ndlp;
+ }
+
+ /* Build a list of nodes to discover based on the loopmap */
+ void
+-lpfc_disc_list_loopmap(struct lpfc_hba * phba)
++lpfc_disc_list_loopmap(struct lpfc_vport *vport)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ int j;
+ uint32_t alpa, index;
+
+- if (phba->hba_state <= LPFC_LINK_DOWN) {
++ if (!lpfc_is_link_up(phba))
+ return;
+- }
+- if (phba->fc_topology != TOPOLOGY_LOOP) {
++
++ if (phba->fc_topology != TOPOLOGY_LOOP)
+ return;
+- }
+
+ /* Check for loop map present or not */
+ if (phba->alpa_map[0]) {
+ for (j = 1; j <= phba->alpa_map[0]; j++) {
+ alpa = phba->alpa_map[j];
+-
+- if (((phba->fc_myDID & 0xff) == alpa) || (alpa == 0)) {
++ if (((vport->fc_myDID & 0xff) == alpa) || (alpa == 0))
+ continue;
+- }
+- lpfc_setup_disc_node(phba, alpa);
++ lpfc_setup_disc_node(vport, alpa);
+ }
+ } else {
+ /* No alpamap, so try all alpa's */
+@@ -1776,113 +2213,167 @@
+ else
+ index = FC_MAXLOOP - j - 1;
+ alpa = lpfcAlpaArray[index];
+- if ((phba->fc_myDID & 0xff) == alpa) {
++ if ((vport->fc_myDID & 0xff) == alpa)
+ continue;
+- }
+-
+- lpfc_setup_disc_node(phba, alpa);
++ lpfc_setup_disc_node(vport, alpa);
+ }
+ }
+ return;
+ }
+
+-/* Start Link up / RSCN discovery on NPR list */
+ void
+-lpfc_disc_start(struct lpfc_hba * phba)
++lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
+ {
+- struct lpfc_sli *psli;
+ LPFC_MBOXQ_t *mbox;
+- struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring];
++ struct lpfc_sli_ring *fcp_ring = &psli->ring[psli->fcp_ring];
++ struct lpfc_sli_ring *next_ring = &psli->ring[psli->next_ring];
++ int rc;
++
++ /*
++ * if it's not a physical port or if we already send
++ * clear_la then don't send it.
++ */
++ if ((phba->link_state >= LPFC_CLEAR_LA) ||
++ (vport->port_type != LPFC_PHYSICAL_PORT))
++ return;
++
++ /* Link up discovery */
++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) {
++ phba->link_state = LPFC_CLEAR_LA;
++ lpfc_clear_la(phba, mbox);
++ mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++ mbox->vport = vport;
++ rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT |
++ MBX_STOP_IOCB));
++ if (rc == MBX_NOT_FINISHED) {
++ mempool_free(mbox, phba->mbox_mem_pool);
++ lpfc_disc_flush_list(vport);
++ extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++ fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++ next_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++ phba->link_state = LPFC_HBA_ERROR;
++ }
++ }
++}
++
++/* Reg_vpi to tell firmware to resume normal operations */
++void
++lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++ LPFC_MBOXQ_t *regvpimbox;
++
++ regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (regvpimbox) {
++ lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox);
++ regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi;
++ regvpimbox->vport = vport;
++ if (lpfc_sli_issue_mbox(phba, regvpimbox,
++ (MBX_NOWAIT | MBX_STOP_IOCB))
++ == MBX_NOT_FINISHED) {
++ mempool_free(regvpimbox, phba->mbox_mem_pool);
++ }
++ }
++}
++
++/* Start Link up / RSCN discovery on NPR nodes */
++void
++lpfc_disc_start(struct lpfc_vport *vport)
++{
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t num_sent;
+ uint32_t clear_la_pending;
+ int did_changed;
+- int rc;
+-
+- psli = &phba->sli;
+
+- if (phba->hba_state <= LPFC_LINK_DOWN) {
++ if (!lpfc_is_link_up(phba))
+ return;
+- }
+- if (phba->hba_state == LPFC_CLEAR_LA)
++
++ if (phba->link_state == LPFC_CLEAR_LA)
+ clear_la_pending = 1;
+ else
+ clear_la_pending = 0;
+
+- if (phba->hba_state < LPFC_HBA_READY) {
+- phba->hba_state = LPFC_DISC_AUTH;
+- }
+- lpfc_set_disctmo(phba);
++ if (vport->port_state < LPFC_VPORT_READY)
++ vport->port_state = LPFC_DISC_AUTH;
++
++ lpfc_set_disctmo(vport);
+
+- if (phba->fc_prevDID == phba->fc_myDID) {
++ if (vport->fc_prevDID == vport->fc_myDID)
+ did_changed = 0;
+- } else {
++ else
+ did_changed = 1;
+- }
+- phba->fc_prevDID = phba->fc_myDID;
+- phba->num_disc_nodes = 0;
++
++ vport->fc_prevDID = vport->fc_myDID;
++ vport->num_disc_nodes = 0;
+
+ /* Start Discovery state <hba_state> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0202 Start Discovery hba state x%x "
++ "%d (%d):0202 Start Discovery hba state x%x "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, phba->hba_state, phba->fc_flag,
+- phba->fc_plogi_cnt, phba->fc_adisc_cnt);
+-
+- /* If our did changed, we MUST do PLOGI */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
+- if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+- (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+- did_changed) {
+- spin_lock_irq(phba->host->host_lock);
+- ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
+- }
+- }
++ phba->brd_no, vport->vpi, vport->port_state,
++ vport->fc_flag, vport->fc_plogi_cnt,
++ vport->fc_adisc_cnt);
+
+ /* First do ADISCs - if any */
+- num_sent = lpfc_els_disc_adisc(phba);
++ num_sent = lpfc_els_disc_adisc(vport);
+
+ if (num_sent)
+ return;
+
+- if ((phba->hba_state < LPFC_HBA_READY) && (!clear_la_pending)) {
++ /*
++ * For SLI3, cmpl_reg_vpi will set port_state to READY, and
++ * continue discovery.
++ */
++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++ !(vport->fc_flag & FC_RSCN_MODE)) {
++ lpfc_issue_reg_vpi(phba, vport);
++ return;
++ }
++
++ /*
++ * For SLI2, we need to set port_state to READY and continue
++ * discovery.
++ */
++ if (vport->port_state < LPFC_VPORT_READY && !clear_la_pending) {
+ /* If we get here, there is nothing to ADISC */
+- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+- phba->hba_state = LPFC_CLEAR_LA;
+- lpfc_clear_la(phba, mbox);
+- mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox(phba, mbox,
+- (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free( mbox, phba->mbox_mem_pool);
+- lpfc_disc_flush_list(phba);
+- psli->ring[(psli->extra_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->fcp_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- psli->ring[(psli->next_ring)].flag &=
+- ~LPFC_STOP_IOCB_EVENT;
+- phba->hba_state = LPFC_HBA_READY;
++ if (vport->port_type == LPFC_PHYSICAL_PORT)
++ lpfc_issue_clear_la(phba, vport);
++
++ if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
++ vport->num_disc_nodes = 0;
++ /* go thru NPR nodes and issue ELS PLOGIs */
++ if (vport->fc_npr_cnt)
++ lpfc_els_disc_plogi(vport);
++
++ if (!vport->num_disc_nodes) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_NDISC_ACTIVE;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
+ }
+ }
++ vport->port_state = LPFC_VPORT_READY;
+ } else {
+ /* Next do PLOGIs - if any */
+- num_sent = lpfc_els_disc_plogi(phba);
++ num_sent = lpfc_els_disc_plogi(vport);
+
+ if (num_sent)
+ return;
+
+- if (phba->fc_flag & FC_RSCN_MODE) {
++ if (vport->fc_flag & FC_RSCN_MODE) {
+ /* Check to see if more RSCNs came in while we
+ * were processing this one.
+ */
+- if ((phba->fc_rscn_id_cnt == 0) &&
+- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_RSCN_MODE;
+- spin_unlock_irq(phba->host->host_lock);
++ if ((vport->fc_rscn_id_cnt == 0) &&
++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_RSCN_MODE;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_can_disctmo(vport);
+ } else
+- lpfc_els_handle_rscn(phba);
++ lpfc_els_handle_rscn(vport);
+ }
+ }
+ return;
+@@ -1893,7 +2384,7 @@
+ * ring the match the sppecified nodelist.
+ */
+ static void
+-lpfc_free_tx(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ LIST_HEAD(completions);
+ struct lpfc_sli *psli;
+@@ -1907,7 +2398,7 @@
+ /* Error matching iocb on txq or txcmplq
+ * First check the txq.
+ */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+ if (iocb->context1 != ndlp) {
+ continue;
+@@ -1927,36 +2418,36 @@
+ continue;
+ }
+ icmd = &iocb->iocb;
+- if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) ||
+- (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) {
++ if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR ||
++ icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX) {
+ lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ while (!list_empty(&completions)) {
+ iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+- list_del(&iocb->list);
++ list_del_init(&iocb->list);
+
+- if (iocb->iocb_cmpl) {
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
+ icmd = &iocb->iocb;
+ icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ (iocb->iocb_cmpl) (phba, iocb, iocb);
+- } else
+- lpfc_sli_release_iocbq(phba, iocb);
+ }
+-
+- return;
++ }
+ }
+
+ void
+-lpfc_disc_flush_list(struct lpfc_hba * phba)
++lpfc_disc_flush_list(struct lpfc_vport *vport)
+ {
+ struct lpfc_nodelist *ndlp, *next_ndlp;
++ struct lpfc_hba *phba = vport->phba;
+
+- if (phba->fc_plogi_cnt || phba->fc_adisc_cnt) {
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++ if (vport->fc_plogi_cnt || vport->fc_adisc_cnt) {
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE ||
+ ndlp->nlp_state == NLP_STE_ADISC_ISSUE) {
+@@ -1967,6 +2458,14 @@
+ }
+ }
+
++void
++lpfc_cleanup_discovery_resources(struct lpfc_vport *vport)
++{
++ lpfc_els_flush_rscn(vport);
++ lpfc_els_flush_cmd(vport);
++ lpfc_disc_flush_list(vport);
++}
++
+ /*****************************************************************************/
+ /*
+ * NAME: lpfc_disc_timeout
+@@ -1985,158 +2484,154 @@
+ void
+ lpfc_disc_timeout(unsigned long ptr)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ struct lpfc_vport *vport = (struct lpfc_vport *) ptr;
++ struct lpfc_hba *phba = vport->phba;
+ unsigned long flags = 0;
+
+ if (unlikely(!phba))
+ return;
+
+- spin_lock_irqsave(phba->host->host_lock, flags);
+- if (!(phba->work_hba_events & WORKER_DISC_TMO)) {
+- phba->work_hba_events |= WORKER_DISC_TMO;
++ if ((vport->work_port_events & WORKER_DISC_TMO) == 0) {
++ spin_lock_irqsave(&vport->work_port_lock, flags);
++ vport->work_port_events |= WORKER_DISC_TMO;
++ spin_unlock_irqrestore(&vport->work_port_lock, flags);
++
++ spin_lock_irqsave(&phba->hbalock, flags);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
+ }
+- spin_unlock_irqrestore(phba->host->host_lock, flags);
+ return;
+ }
+
+ static void
+-lpfc_disc_timeout_handler(struct lpfc_hba *phba)
++lpfc_disc_timeout_handler(struct lpfc_vport *vport)
+ {
+- struct lpfc_sli *psli;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_nodelist *ndlp, *next_ndlp;
+- LPFC_MBOXQ_t *clearlambox, *initlinkmbox;
++ LPFC_MBOXQ_t *initlinkmbox;
+ int rc, clrlaerr = 0;
+
+- if (unlikely(!phba))
+- return;
+-
+- if (!(phba->fc_flag & FC_DISC_TMO))
++ if (!(vport->fc_flag & FC_DISC_TMO))
+ return;
+
+- psli = &phba->sli;
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_DISC_TMO;
++ spin_unlock_irq(shost->host_lock);
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_DISC_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++ "disc timeout: state:x%x rtry:x%x flg:x%x",
++ vport->port_state, vport->fc_ns_retry, vport->fc_flag);
+
+- switch (phba->hba_state) {
++ switch (vport->port_state) {
+
+ case LPFC_LOCAL_CFG_LINK:
+- /* hba_state is identically LPFC_LOCAL_CFG_LINK while waiting for FAN */
++ /* port_state is identically LPFC_LOCAL_CFG_LINK while waiting for
++ * FAN
++ */
+ /* FAN timeout */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_DISCOVERY,
+- "%d:0221 FAN timeout\n",
+- phba->brd_no);
++ lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY,
++ "%d (%d):0221 FAN timeout\n",
++ phba->brd_no, vport->vpi);
+
+ /* Start discovery by sending FLOGI, clean up old rpis */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ nlp_listp) {
+ if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ continue;
+ if (ndlp->nlp_type & NLP_FABRIC) {
+ /* Clean up the ndlp on Fabric connections */
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+ /* Fail outstanding IO now since device
+ * is marked for PLOGI.
+ */
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ }
+ }
+- phba->hba_state = LPFC_FLOGI;
+- lpfc_set_disctmo(phba);
+- lpfc_initial_flogi(phba);
++ if (vport->port_state != LPFC_FLOGI) {
++ vport->port_state = LPFC_FLOGI;
++ lpfc_set_disctmo(vport);
++ lpfc_initial_flogi(vport);
++ }
+ break;
+
++ case LPFC_FDISC:
+ case LPFC_FLOGI:
+- /* hba_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */
++ /* port_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */
+ /* Initial FLOGI timeout */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0222 Initial FLOGI timeout\n",
+- phba->brd_no);
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0222 Initial %s timeout\n",
++ phba->brd_no, vport->vpi,
++ vport->vpi ? "FLOGI" : "FDISC");
+
+ /* Assume no Fabric and go on with discovery.
+ * Check for outstanding ELS FLOGI to abort.
+ */
+
+ /* FLOGI failed, so just use loop map to make discovery list */
+- lpfc_disc_list_loopmap(phba);
++ lpfc_disc_list_loopmap(vport);
+
+ /* Start discovery */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
+ break;
+
+ case LPFC_FABRIC_CFG_LINK:
+ /* hba_state is identically LPFC_FABRIC_CFG_LINK while waiting for
+ NameServer login */
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0223 Timeout while waiting for NameServer "
+- "login\n", phba->brd_no);
++ "%d (%d):0223 Timeout while waiting for "
++ "NameServer login\n",
++ phba->brd_no, vport->vpi);
+
+ /* Next look for NameServer ndlp */
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
++ ndlp = lpfc_findnode_did(vport, NameServer_DID);
+ if (ndlp)
+ lpfc_nlp_put(ndlp);
+ /* Start discovery */
+- lpfc_disc_start(phba);
++ lpfc_disc_start(vport);
+ break;
+
+ case LPFC_NS_QRY:
+ /* Check for wait for NameServer Rsp timeout */
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0224 NameServer Query timeout "
++ "%d (%d):0224 NameServer Query timeout "
+ "Data: x%x x%x\n",
+- phba->brd_no,
+- phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
++ phba->brd_no, vport->vpi,
++ vport->fc_ns_retry, LPFC_MAX_NS_RETRY);
+
+- ndlp = lpfc_findnode_did(phba, NameServer_DID);
+- if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+- if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++ if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
+ /* Try it one more time */
+- rc = lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT);
++ vport->fc_ns_retry++;
++ rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
++ vport->fc_ns_retry, 0);
+ if (rc == 0)
+ break;
+ }
+- phba->fc_ns_retry = 0;
+- }
+-
+- /* Nothing to authenticate, so CLEAR_LA right now */
+- clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (!clearlambox) {
+- clrlaerr = 1;
+- lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0226 Device Discovery "
+- "completion error\n",
+- phba->brd_no);
+- phba->hba_state = LPFC_HBA_ERROR;
+- break;
+- }
++ vport->fc_ns_retry = 0;
+
+- phba->hba_state = LPFC_CLEAR_LA;
+- lpfc_clear_la(phba, clearlambox);
+- clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox(phba, clearlambox,
+- (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free(clearlambox, phba->mbox_mem_pool);
+- clrlaerr = 1;
+- break;
++ /*
++ * Discovery is over.
++ * set port_state to PORT_READY if SLI2.
++ * cmpl_reg_vpi will set port_state to READY for SLI3.
++ */
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++ lpfc_issue_reg_vpi(phba, vport);
++ else { /* NPIV Not enabled */
++ lpfc_issue_clear_la(phba, vport);
++ vport->port_state = LPFC_VPORT_READY;
+ }
+
+ /* Setup and issue mailbox INITIALIZE LINK command */
+ initlinkmbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!initlinkmbox) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0206 Device Discovery "
++ "%d (%d):0206 Device Discovery "
+ "completion error\n",
+- phba->brd_no);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->brd_no, vport->vpi);
++ phba->link_state = LPFC_HBA_ERROR;
+ break;
+ }
+
+@@ -2144,6 +2639,8 @@
+ lpfc_init_link(phba, initlinkmbox, phba->cfg_topology,
+ phba->cfg_link_speed);
+ initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
++ initlinkmbox->vport = vport;
++ initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ rc = lpfc_sli_issue_mbox(phba, initlinkmbox,
+ (MBX_NOWAIT | MBX_STOP_IOCB));
+ lpfc_set_loopback_flag(phba);
+@@ -2154,67 +2651,81 @@
+
+ case LPFC_DISC_AUTH:
+ /* Node Authentication timeout */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0227 Node Authentication timeout\n",
+- phba->brd_no);
+- lpfc_disc_flush_list(phba);
+- clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (!clearlambox) {
+- clrlaerr = 1;
+ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+- "%d:0207 Device Discovery "
+- "completion error\n",
+- phba->brd_no);
+- phba->hba_state = LPFC_HBA_ERROR;
+- break;
++ "%d (%d):0227 Node Authentication timeout\n",
++ phba->brd_no, vport->vpi);
++ lpfc_disc_flush_list(vport);
++
++ /*
++ * set port_state to PORT_READY if SLI2.
++ * cmpl_reg_vpi will set port_state to READY for SLI3.
++ */
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++ lpfc_issue_reg_vpi(phba, vport);
++ else { /* NPIV Not enabled */
++ lpfc_issue_clear_la(phba, vport);
++ vport->port_state = LPFC_VPORT_READY;
+ }
+- phba->hba_state = LPFC_CLEAR_LA;
+- lpfc_clear_la(phba, clearlambox);
+- clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+- rc = lpfc_sli_issue_mbox(phba, clearlambox,
+- (MBX_NOWAIT | MBX_STOP_IOCB));
+- if (rc == MBX_NOT_FINISHED) {
+- mempool_free(clearlambox, phba->mbox_mem_pool);
+- clrlaerr = 1;
++ break;
++
++ case LPFC_VPORT_READY:
++ if (vport->fc_flag & FC_RSCN_MODE) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0231 RSCN timeout Data: x%x "
++ "x%x\n",
++ phba->brd_no, vport->vpi,
++ vport->fc_ns_retry, LPFC_MAX_NS_RETRY);
++
++ /* Cleanup any outstanding ELS commands */
++ lpfc_els_flush_cmd(vport);
++
++ lpfc_els_flush_rscn(vport);
++ lpfc_disc_flush_list(vport);
+ }
+ break;
+
++ default:
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0229 Unexpected discovery timeout, "
++ "vport State x%x\n",
++ phba->brd_no, vport->vpi, vport->port_state);
++
++ break;
++ }
++
++ switch (phba->link_state) {
+ case LPFC_CLEAR_LA:
+ /* CLEAR LA timeout */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0228 CLEAR LA timeout\n",
+- phba->brd_no);
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0228 CLEAR LA timeout\n",
++ phba->brd_no, vport->vpi);
+ clrlaerr = 1;
+ break;
+
+- case LPFC_HBA_READY:
+- if (phba->fc_flag & FC_RSCN_MODE) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0231 RSCN timeout Data: x%x x%x\n",
+- phba->brd_no,
+- phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
+-
+- /* Cleanup any outstanding ELS commands */
+- lpfc_els_flush_cmd(phba);
++ case LPFC_LINK_UNKNOWN:
++ case LPFC_WARM_START:
++ case LPFC_INIT_START:
++ case LPFC_INIT_MBX_CMDS:
++ case LPFC_LINK_DOWN:
++ case LPFC_LINK_UP:
++ case LPFC_HBA_ERROR:
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0230 Unexpected timeout, hba link "
++ "state x%x\n",
++ phba->brd_no, vport->vpi, phba->link_state);
++ clrlaerr = 1;
++ break;
+
+- lpfc_els_flush_rscn(phba);
+- lpfc_disc_flush_list(phba);
+- }
++ case LPFC_HBA_READY:
+ break;
+ }
+
+ if (clrlaerr) {
+- lpfc_disc_flush_list(phba);
++ lpfc_disc_flush_list(vport);
+ psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+ psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+ psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+- phba->hba_state = LPFC_HBA_READY;
++ vport->port_state = LPFC_VPORT_READY;
+ }
+
+ return;
+@@ -2227,37 +2738,29 @@
+ * handed off to the SLI layer.
+ */
+ void
+-lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+- struct lpfc_sli *psli;
+- MAILBOX_t *mb;
+- struct lpfc_dmabuf *mp;
+- struct lpfc_nodelist *ndlp;
+-
+- psli = &phba->sli;
+- mb = &pmb->mb;
+-
+- ndlp = (struct lpfc_nodelist *) pmb->context2;
+- mp = (struct lpfc_dmabuf *) (pmb->context1);
++ MAILBOX_t *mb = &pmb->mb;
++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++ struct lpfc_vport *vport = pmb->vport;
+
+ pmb->context1 = NULL;
+
+ ndlp->nlp_rpi = mb->un.varWords[0];
+ ndlp->nlp_type |= NLP_FABRIC;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+
+- /* Start issuing Fabric-Device Management Interface (FDMI)
+- * command to 0xfffffa (FDMI well known port)
+- */
+- if (phba->cfg_fdmi_on == 1) {
+- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
+- } else {
+ /*
+- * Delay issuing FDMI command if fdmi-on=2
+- * (supporting RPA/hostnmae)
++ * Start issuing Fabric-Device Management Interface (FDMI) command to
++ * 0xfffffa (FDMI well known port) or Delay issuing FDMI command if
++ * fdmi-on=2 (supporting RPA/hostnmae)
+ */
+- mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
+- }
++
++ if (phba->cfg_fdmi_on == 1)
++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
++ else
++ mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60);
+
+ /* Mailbox took a reference to the node */
+ lpfc_nlp_put(ndlp);
+@@ -2283,16 +2786,12 @@
+ sizeof(ndlp->nlp_portname)) == 0;
+ }
+
+-/*
+- * Search node lists for a remote port matching filter criteria
+- * Caller needs to hold host_lock before calling this routine.
+- */
+ struct lpfc_nodelist *
+-__lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param)
++__lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
+ {
+ struct lpfc_nodelist *ndlp;
+
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state != NLP_STE_UNUSED_NODE &&
+ filter(ndlp, param))
+ return ndlp;
+@@ -2302,68 +2801,104 @@
+
+ /*
+ * Search node lists for a remote port matching filter criteria
+- * This routine is used when the caller does NOT have host_lock.
++ * Caller needs to hold host_lock before calling this routine.
+ */
+ struct lpfc_nodelist *
+-lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param)
++lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp;
+
+- spin_lock_irq(phba->host->host_lock);
+- ndlp = __lpfc_find_node(phba, filter, param);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ ndlp = __lpfc_find_node(vport, filter, param);
++ spin_unlock_irq(shost->host_lock);
+ return ndlp;
+ }
+
+ /*
+ * This routine looks up the ndlp lists for the given RPI. If rpi found it
+- * returns the node list pointer else return NULL.
++ * returns the node list element pointer else return NULL.
+ */
+ struct lpfc_nodelist *
+-__lpfc_findnode_rpi(struct lpfc_hba *phba, uint16_t rpi)
++__lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi)
+ {
+- return __lpfc_find_node(phba, lpfc_filter_by_rpi, &rpi);
++ return __lpfc_find_node(vport, lpfc_filter_by_rpi, &rpi);
+ }
+
+ struct lpfc_nodelist *
+-lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi)
++lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp;
+
+- spin_lock_irq(phba->host->host_lock);
+- ndlp = __lpfc_findnode_rpi(phba, rpi);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
++ ndlp = __lpfc_findnode_rpi(vport, rpi);
++ spin_unlock_irq(shost->host_lock);
+ return ndlp;
+ }
+
+ /*
+ * This routine looks up the ndlp lists for the given WWPN. If WWPN found it
+- * returns the node list pointer else return NULL.
++ * returns the node element list pointer else return NULL.
+ */
+ struct lpfc_nodelist *
+-lpfc_findnode_wwpn(struct lpfc_hba *phba, struct lpfc_name *wwpn)
++lpfc_findnode_wwpn(struct lpfc_vport *vport, struct lpfc_name *wwpn)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_nodelist *ndlp;
+
+- spin_lock_irq(phba->host->host_lock);
+- ndlp = __lpfc_find_node(phba, lpfc_filter_by_wwpn, wwpn);
+- spin_unlock_irq(phba->host->host_lock);
+- return NULL;
++ spin_lock_irq(shost->host_lock);
++ ndlp = __lpfc_find_node(vport, lpfc_filter_by_wwpn, wwpn);
++ spin_unlock_irq(shost->host_lock);
++ return ndlp;
+ }
+
+ void
+-lpfc_nlp_init(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did)
++lpfc_dev_loss_delay(unsigned long ptr)
++{
++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr;
++ struct lpfc_vport *vport = ndlp->vport;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_work_evt *evtp = &ndlp->dev_loss_evt;
++ unsigned long flags;
++
++ evtp = &ndlp->dev_loss_evt;
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ if (!list_empty(&evtp->evt_listp)) {
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++ return;
++ }
++
++ evtp->evt_arg1 = ndlp;
++ evtp->evt = LPFC_EVT_DEV_LOSS_DELAY;
++ list_add_tail(&evtp->evt_listp, &phba->work_list);
++ if (phba->work_wait)
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++ return;
++}
++
++void
++lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ uint32_t did)
+ {
+ memset(ndlp, 0, sizeof (struct lpfc_nodelist));
+ INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
++ INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
+ init_timer(&ndlp->nlp_delayfunc);
+ ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
+ ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+ ndlp->nlp_DID = did;
+- ndlp->nlp_phba = phba;
++ ndlp->vport = vport;
+ ndlp->nlp_sid = NLP_NO_SID;
+ INIT_LIST_HEAD(&ndlp->nlp_listp);
+ kref_init(&ndlp->kref);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE,
++ "node init: did:x%x",
++ ndlp->nlp_DID, 0, 0);
++
+ return;
+ }
+
+@@ -2372,8 +2907,13 @@
+ {
+ struct lpfc_nodelist *ndlp = container_of(kref, struct lpfc_nodelist,
+ kref);
+- lpfc_nlp_remove(ndlp->nlp_phba, ndlp);
+- mempool_free(ndlp, ndlp->nlp_phba->nlp_mem_pool);
++
++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
++ "node release: did:x%x flg:x%x type:x%x",
++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
++
++ lpfc_nlp_remove(ndlp->vport, ndlp);
++ mempool_free(ndlp, ndlp->vport->phba->nlp_mem_pool);
+ }
+
+ struct lpfc_nodelist *
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h 2007-12-21 15:36:12.000000000 -0500
+@@ -59,6 +59,12 @@
+ #define SLI2_IOCB_CMD_R3XTRA_ENTRIES 24
+ #define SLI2_IOCB_RSP_R3XTRA_ENTRIES 32
+
++#define SLI2_IOCB_CMD_SIZE 32
++#define SLI2_IOCB_RSP_SIZE 32
++#define SLI3_IOCB_CMD_SIZE 128
++#define SLI3_IOCB_RSP_SIZE 64
++
++
+ /* Common Transport structures and definitions */
+
+ union CtRevisionId {
+@@ -79,6 +85,9 @@
+ uint32_t word;
+ };
+
++#define FC4_FEATURE_INIT 0x2
++#define FC4_FEATURE_TARGET 0x1
++
+ struct lpfc_sli_ct_request {
+ /* Structure is in Big Endian format */
+ union CtRevisionId RevisionId;
+@@ -121,20 +130,6 @@
+
+ uint32_t rsvd[7];
+ } rft;
+- struct rff {
+- uint32_t PortId;
+- uint8_t reserved[2];
+-#ifdef __BIG_ENDIAN_BITFIELD
+- uint8_t feature_res:6;
+- uint8_t feature_init:1;
+- uint8_t feature_tgt:1;
+-#else /* __LITTLE_ENDIAN_BITFIELD */
+- uint8_t feature_tgt:1;
+- uint8_t feature_init:1;
+- uint8_t feature_res:6;
+-#endif
+- uint8_t type_code; /* type=8 for FCP */
+- } rff;
+ struct rnn {
+ uint32_t PortId; /* For RNN_ID requests */
+ uint8_t wwnn[8];
+@@ -144,15 +139,42 @@
+ uint8_t len;
+ uint8_t symbname[255];
+ } rsnn;
++ struct rspn { /* For RSPN_ID requests */
++ uint32_t PortId;
++ uint8_t len;
++ uint8_t symbname[255];
++ } rspn;
++ struct gff {
++ uint32_t PortId;
++ } gff;
++ struct gff_acc {
++ uint8_t fbits[128];
++ } gff_acc;
++#define FCP_TYPE_FEATURE_OFFSET 4
++ struct rff {
++ uint32_t PortId;
++ uint8_t reserved[2];
++ uint8_t fbits;
++ uint8_t type_code; /* type=8 for FCP */
++ } rff;
+ } un;
+ };
+
+ #define SLI_CT_REVISION 1
+-#define GID_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 260)
+-#define RFT_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 228)
+-#define RFF_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 235)
+-#define RNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 252)
+-#define RSNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request))
++#define GID_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct gid))
++#define GFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct gff))
++#define RFT_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct rft))
++#define RFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct rff))
++#define RNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct rnn))
++#define RSNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct rsnn))
++#define RSPN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
++ sizeof(struct rspn))
+
+ /*
+ * FsType Definitions
+@@ -227,6 +249,7 @@
+ #define SLI_CTNS_GFT_ID 0x0117
+ #define SLI_CTNS_GSPN_ID 0x0118
+ #define SLI_CTNS_GPT_ID 0x011A
++#define SLI_CTNS_GFF_ID 0x011F
+ #define SLI_CTNS_GID_PN 0x0121
+ #define SLI_CTNS_GID_NN 0x0131
+ #define SLI_CTNS_GIP_NN 0x0135
+@@ -240,9 +263,9 @@
+ #define SLI_CTNS_RNN_ID 0x0213
+ #define SLI_CTNS_RCS_ID 0x0214
+ #define SLI_CTNS_RFT_ID 0x0217
+-#define SLI_CTNS_RFF_ID 0x021F
+ #define SLI_CTNS_RSPN_ID 0x0218
+ #define SLI_CTNS_RPT_ID 0x021A
++#define SLI_CTNS_RFF_ID 0x021F
+ #define SLI_CTNS_RIP_NN 0x0235
+ #define SLI_CTNS_RIPA_NN 0x0236
+ #define SLI_CTNS_RSNN_NN 0x0239
+@@ -311,9 +334,9 @@
+ uint8_t bbCreditlsb; /* FC Word 0, byte 3 */
+
+ #ifdef __BIG_ENDIAN_BITFIELD
+- uint16_t increasingOffset:1; /* FC Word 1, bit 31 */
++ uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */
+ uint16_t randomOffset:1; /* FC Word 1, bit 30 */
+- uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */
++ uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */
+ uint16_t fPort:1; /* FC Word 1, bit 28 */
+ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */
+ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */
+@@ -332,9 +355,9 @@
+ uint16_t edtovResolution:1; /* FC Word 1, bit 26 */
+ uint16_t altBbCredit:1; /* FC Word 1, bit 27 */
+ uint16_t fPort:1; /* FC Word 1, bit 28 */
+- uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */
++ uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */
+ uint16_t randomOffset:1; /* FC Word 1, bit 30 */
+- uint16_t increasingOffset:1; /* FC Word 1, bit 31 */
++ uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */
+
+ uint16_t payloadlength:1; /* FC Word 1, bit 16 */
+ uint16_t contIncSeqCnt:1; /* FC Word 1, bit 17 */
+@@ -1255,7 +1278,9 @@
+ #define MBX_KILL_BOARD 0x24
+ #define MBX_CONFIG_FARP 0x25
+ #define MBX_BEACON 0x2A
++#define MBX_HEARTBEAT 0x31
+
++#define MBX_CONFIG_HBQ 0x7C
+ #define MBX_LOAD_AREA 0x81
+ #define MBX_RUN_BIU_DIAG64 0x84
+ #define MBX_CONFIG_PORT 0x88
+@@ -1263,6 +1288,10 @@
+ #define MBX_READ_RPI64 0x8F
+ #define MBX_REG_LOGIN64 0x93
+ #define MBX_READ_LA64 0x95
++#define MBX_REG_VPI 0x96
++#define MBX_UNREG_VPI 0x97
++#define MBX_REG_VNPID 0x96
++#define MBX_UNREG_VNPID 0x97
+
+ #define MBX_FLASH_WR_ULA 0x98
+ #define MBX_SET_DEBUG 0x99
+@@ -1335,6 +1364,10 @@
+ #define CMD_FCP_TRECEIVE64_CX 0xA1
+ #define CMD_FCP_TRSP64_CX 0xA3
+
++#define CMD_IOCB_RCV_SEQ64_CX 0xB5
++#define CMD_IOCB_RCV_ELS64_CX 0xB7
++#define CMD_IOCB_RCV_CONT64_CX 0xBB
++
+ #define CMD_GEN_REQUEST64_CR 0xC2
+ #define CMD_GEN_REQUEST64_CX 0xC3
+
+@@ -1561,6 +1594,7 @@
+ #define FLAGS_TOPOLOGY_MODE_PT_PT 0x02 /* Attempt pt-pt only */
+ #define FLAGS_TOPOLOGY_MODE_LOOP 0x04 /* Attempt loop only */
+ #define FLAGS_TOPOLOGY_MODE_PT_LOOP 0x06 /* Attempt pt-pt then loop */
++#define FLAGS_UNREG_LOGIN_ALL 0x08 /* UNREG_LOGIN all on link down */
+ #define FLAGS_LIRP_LILP 0x80 /* LIRP / LILP is disabled */
+
+ #define FLAGS_TOPOLOGY_FAILOVER 0x0400 /* Bit 10 */
+@@ -1744,8 +1778,6 @@
+ #define LMT_4Gb 0x040
+ #define LMT_8Gb 0x080
+ #define LMT_10Gb 0x100
+-
+-
+ uint32_t rsvd2;
+ uint32_t rsvd3;
+ uint32_t max_xri;
+@@ -1754,7 +1786,10 @@
+ uint32_t avail_xri;
+ uint32_t avail_iocb;
+ uint32_t avail_rpi;
+- uint32_t default_rpi;
++ uint32_t max_vpi;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++ uint32_t avail_vpi;
+ } READ_CONFIG_VAR;
+
+ /* Structure for MB Command READ_RCONFIG (12) */
+@@ -1818,6 +1853,13 @@
+ structure */
+ struct ulp_bde64 sp64;
+ } un;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd3;
++ uint16_t vpi;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t vpi;
++ uint16_t rsvd3;
++#endif
+ } READ_SPARM_VAR;
+
+ /* Structure for MB Command READ_STATUS (14) */
+@@ -1918,11 +1960,17 @@
+ #ifdef __BIG_ENDIAN_BITFIELD
+ uint32_t cv:1;
+ uint32_t rr:1;
+- uint32_t rsvd1:29;
++ uint32_t rsvd2:2;
++ uint32_t v3req:1;
++ uint32_t v3rsp:1;
++ uint32_t rsvd1:25;
+ uint32_t rv:1;
+ #else /* __LITTLE_ENDIAN_BITFIELD */
+ uint32_t rv:1;
+- uint32_t rsvd1:29;
++ uint32_t rsvd1:25;
++ uint32_t v3rsp:1;
++ uint32_t v3req:1;
++ uint32_t rsvd2:2;
+ uint32_t rr:1;
+ uint32_t cv:1;
+ #endif
+@@ -1972,8 +2020,8 @@
+ uint8_t sli1FwName[16];
+ uint32_t sli2FwRev;
+ uint8_t sli2FwName[16];
+- uint32_t rsvd2;
+- uint32_t RandomData[7];
++ uint32_t sli3Feat;
++ uint32_t RandomData[6];
+ } READ_REV_VAR;
+
+ /* Structure for MB Command READ_LINK_STAT (18) */
+@@ -2013,6 +2061,14 @@
+ struct ulp_bde64 sp64;
+ } un;
+
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd6;
++ uint16_t vpi;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++ uint16_t vpi;
++ uint16_t rsvd6;
++#endif
++
+ } REG_LOGIN_VAR;
+
+ /* Word 30 contents for REG_LOGIN */
+@@ -2037,16 +2093,78 @@
+ #ifdef __BIG_ENDIAN_BITFIELD
+ uint16_t rsvd1;
+ uint16_t rpi;
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++ uint16_t rsvd6;
++ uint16_t vpi;
+ #else /* __LITTLE_ENDIAN_BITFIELD */
+ uint16_t rpi;
+ uint16_t rsvd1;
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++ uint16_t vpi;
++ uint16_t rsvd6;
+ #endif
+ } UNREG_LOGIN_VAR;
+
++/* Structure for MB Command REG_VPI (0x96) */
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1;
++ uint32_t rsvd2:8;
++ uint32_t sid:24;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++ uint16_t rsvd6;
++ uint16_t vpi;
++#else /* __LITTLE_ENDIAN */
++ uint32_t rsvd1;
++ uint32_t sid:24;
++ uint32_t rsvd2:8;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++ uint16_t vpi;
++ uint16_t rsvd6;
++#endif
++} REG_VPI_VAR;
++
++/* Structure for MB Command UNREG_VPI (0x97) */
++typedef struct {
++ uint32_t rsvd1;
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd6;
++ uint16_t vpi;
++#else /* __LITTLE_ENDIAN */
++ uint16_t vpi;
++ uint16_t rsvd6;
++#endif
++} UNREG_VPI_VAR;
++
+ /* Structure for MB Command UNREG_D_ID (0x23) */
+
+ typedef struct {
+ uint32_t did;
++ uint32_t rsvd2;
++ uint32_t rsvd3;
++ uint32_t rsvd4;
++ uint32_t rsvd5;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t rsvd6;
++ uint16_t vpi;
++#else
++ uint16_t vpi;
++ uint16_t rsvd6;
++#endif
+ } UNREG_D_ID_VAR;
+
+ /* Structure for MB Command READ_LA (21) */
+@@ -2178,13 +2296,240 @@
+ #define DMP_RSP_OFFSET 0x14 /* word 5 contains first word of rsp */
+ #define DMP_RSP_SIZE 0x6C /* maximum of 27 words of rsp data */
+
+-/* Structure for MB Command CONFIG_PORT (0x88) */
++struct hbq_mask {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint8_t tmatch;
++ uint8_t tmask;
++ uint8_t rctlmatch;
++ uint8_t rctlmask;
++#else /* __LITTLE_ENDIAN */
++ uint8_t rctlmask;
++ uint8_t rctlmatch;
++ uint8_t tmask;
++ uint8_t tmatch;
++#endif
++};
+
++
++/* Structure for MB Command CONFIG_HBQ (7c) */
++
++struct config_hbq_var {
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1 :7;
++ uint32_t recvNotify :1; /* Receive Notification */
++ uint32_t numMask :8; /* # Mask Entries */
++ uint32_t profile :8; /* Selection Profile */
++ uint32_t rsvd2 :8;
++#else /* __LITTLE_ENDIAN */
++ uint32_t rsvd2 :8;
++ uint32_t profile :8; /* Selection Profile */
++ uint32_t numMask :8; /* # Mask Entries */
++ uint32_t recvNotify :1; /* Receive Notification */
++ uint32_t rsvd1 :7;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t hbqId :16;
++ uint32_t rsvd3 :12;
++ uint32_t ringMask :4;
++#else /* __LITTLE_ENDIAN */
++ uint32_t ringMask :4;
++ uint32_t rsvd3 :12;
++ uint32_t hbqId :16;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t entry_count :16;
++ uint32_t rsvd4 :8;
++ uint32_t headerLen :8;
++#else /* __LITTLE_ENDIAN */
++ uint32_t headerLen :8;
++ uint32_t rsvd4 :8;
++ uint32_t entry_count :16;
++#endif
++
++ uint32_t hbqaddrLow;
++ uint32_t hbqaddrHigh;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd5 :31;
++ uint32_t logEntry :1;
++#else /* __LITTLE_ENDIAN */
++ uint32_t logEntry :1;
++ uint32_t rsvd5 :31;
++#endif
++
++ uint32_t rsvd6; /* w7 */
++ uint32_t rsvd7; /* w8 */
++ uint32_t rsvd8; /* w9 */
++
++ struct hbq_mask hbqMasks[6];
++
++
++ union {
++ uint32_t allprofiles[12];
++
++ struct {
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t seqlenoff :16;
++ uint32_t maxlen :16;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t maxlen :16;
++ uint32_t seqlenoff :16;
++ #endif
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd1 :28;
++ uint32_t seqlenbcnt :4;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t seqlenbcnt :4;
++ uint32_t rsvd1 :28;
++ #endif
++ uint32_t rsvd[10];
++ } profile2;
++
++ struct {
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t seqlenoff :16;
++ uint32_t maxlen :16;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t maxlen :16;
++ uint32_t seqlenoff :16;
++ #endif
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cmdcodeoff :28;
++ uint32_t rsvd1 :12;
++ uint32_t seqlenbcnt :4;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t seqlenbcnt :4;
++ uint32_t rsvd1 :12;
++ uint32_t cmdcodeoff :28;
++ #endif
++ uint32_t cmdmatch[8];
++
++ uint32_t rsvd[2];
++ } profile3;
++
++ struct {
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t seqlenoff :16;
++ uint32_t maxlen :16;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t maxlen :16;
++ uint32_t seqlenoff :16;
++ #endif
++ #ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cmdcodeoff :28;
++ uint32_t rsvd1 :12;
++ uint32_t seqlenbcnt :4;
++ #else /* __LITTLE_ENDIAN */
++ uint32_t seqlenbcnt :4;
++ uint32_t rsvd1 :12;
++ uint32_t cmdcodeoff :28;
++ #endif
++ uint32_t cmdmatch[8];
++
++ uint32_t rsvd[2];
++ } profile5;
++
++ } profiles;
++
++};
++
++
++
++/* Structure for MB Command CONFIG_PORT (0x88) */
+ typedef struct {
+- uint32_t pcbLen;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t cBE : 1;
++ uint32_t cET : 1;
++ uint32_t cHpcb : 1;
++ uint32_t cMA : 1;
++ uint32_t sli_mode : 4;
++ uint32_t pcbLen : 24; /* bit 23:0 of memory based port
++ * config block */
++#else /* __LITTLE_ENDIAN */
++ uint32_t pcbLen : 24; /* bit 23:0 of memory based port
++ * config block */
++ uint32_t sli_mode : 4;
++ uint32_t cMA : 1;
++ uint32_t cHpcb : 1;
++ uint32_t cET : 1;
++ uint32_t cBE : 1;
++#endif
++
+ uint32_t pcbLow; /* bit 31:0 of memory based port config block */
+ uint32_t pcbHigh; /* bit 63:32 of memory based port config block */
+- uint32_t hbainit[5];
++ uint32_t hbainit[6];
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd : 24; /* Reserved */
++ uint32_t cmv : 1; /* Configure Max VPIs */
++ uint32_t ccrp : 1; /* Config Command Ring Polling */
++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */
++ uint32_t chbs : 1; /* Cofigure Host Backing store */
++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */
++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */
++ uint32_t cmx : 1; /* Configure Max XRIs */
++ uint32_t cmr : 1; /* Configure Max RPIs */
++#else /* __LITTLE_ENDIAN */
++ uint32_t cmr : 1; /* Configure Max RPIs */
++ uint32_t cmx : 1; /* Configure Max XRIs */
++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */
++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */
++ uint32_t chbs : 1; /* Cofigure Host Backing store */
++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */
++ uint32_t ccrp : 1; /* Config Command Ring Polling */
++ uint32_t cmv : 1; /* Configure Max VPIs */
++ uint32_t rsvd : 24; /* Reserved */
++#endif
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd2 : 24; /* Reserved */
++ uint32_t gmv : 1; /* Grant Max VPIs */
++ uint32_t gcrp : 1; /* Grant Command Ring Polling */
++ uint32_t gsah : 1; /* Grant Synchronous Abort Handling */
++ uint32_t ghbs : 1; /* Grant Host Backing Store */
++ uint32_t ginb : 1; /* Grant Interrupt Notification Block */
++ uint32_t gerbm : 1; /* Grant ERBM Request */
++ uint32_t gmx : 1; /* Grant Max XRIs */
++ uint32_t gmr : 1; /* Grant Max RPIs */
++#else /* __LITTLE_ENDIAN */
++ uint32_t gmr : 1; /* Grant Max RPIs */
++ uint32_t gmx : 1; /* Grant Max XRIs */
++ uint32_t gerbm : 1; /* Grant ERBM Request */
++ uint32_t ginb : 1; /* Grant Interrupt Notification Block */
++ uint32_t ghbs : 1; /* Grant Host Backing Store */
++ uint32_t gsah : 1; /* Grant Synchronous Abort Handling */
++ uint32_t gcrp : 1; /* Grant Command Ring Polling */
++ uint32_t gmv : 1; /* Grant Max VPIs */
++ uint32_t rsvd2 : 24; /* Reserved */
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t max_rpi : 16; /* Max RPIs Port should configure */
++ uint32_t max_xri : 16; /* Max XRIs Port should configure */
++#else /* __LITTLE_ENDIAN */
++ uint32_t max_xri : 16; /* Max XRIs Port should configure */
++ uint32_t max_rpi : 16; /* Max RPIs Port should configure */
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */
++ uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */
++#else /* __LITTLE_ENDIAN */
++ uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */
++ uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */
++#endif
++
++ uint32_t rsvd4; /* Reserved */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint32_t rsvd5 : 16; /* Reserved */
++ uint32_t max_vpi : 16; /* Max number of virt N-Ports */
++#else /* __LITTLE_ENDIAN */
++ uint32_t max_vpi : 16; /* Max number of virt N-Ports */
++ uint32_t rsvd5 : 16; /* Reserved */
++#endif
++
+ } CONFIG_PORT_VAR;
+
+ /* SLI-2 Port Control Block */
+@@ -2262,7 +2607,9 @@
+ #define MAILBOX_CMD_SIZE (MAILBOX_CMD_WSIZE * sizeof(uint32_t))
+
+ typedef union {
+- uint32_t varWords[MAILBOX_CMD_WSIZE - 1];
++ uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; /* first word is type/
++ * feature/max ring number
++ */
+ LOAD_SM_VAR varLdSM; /* cmd = 1 (LOAD_SM) */
+ READ_NV_VAR varRDnvp; /* cmd = 2 (READ_NVPARMS) */
+ WRITE_NV_VAR varWTnvp; /* cmd = 3 (WRITE_NVPARMS) */
+@@ -2287,8 +2634,13 @@
+ CLEAR_LA_VAR varClearLA; /* cmd = 22 (CLEAR_LA) */
+ DUMP_VAR varDmp; /* Warm Start DUMP mbx cmd */
+ UNREG_D_ID_VAR varUnregDID; /* cmd = 0x23 (UNREG_D_ID) */
+- CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) NEW_FEATURE */
++ CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP)
++ * NEW_FEATURE
++ */
++ struct config_hbq_var varCfgHbq;/* cmd = 0x7c (CONFIG_HBQ) */
+ CONFIG_PORT_VAR varCfgPort; /* cmd = 0x88 (CONFIG_PORT) */
++ REG_VPI_VAR varRegVpi; /* cmd = 0x96 (REG_VPI) */
++ UNREG_VPI_VAR varUnregVpi; /* cmd = 0x97 (UNREG_VPI) */
+ } MAILVARIANTS;
+
+ /*
+@@ -2305,14 +2657,27 @@
+ __le32 rspPutInx;
+ };
+
+-typedef struct _SLI2_DESC {
+- struct lpfc_hgp host[MAX_RINGS];
++struct sli2_desc {
+ uint32_t unused1[16];
++ struct lpfc_hgp host[MAX_RINGS];
+ struct lpfc_pgp port[MAX_RINGS];
+-} SLI2_DESC;
++};
++
++struct sli3_desc {
++ struct lpfc_hgp host[MAX_RINGS];
++ uint32_t reserved[8];
++ uint32_t hbq_put[16];
++};
++
++struct sli3_pgp {
++ struct lpfc_pgp port[MAX_RINGS];
++ uint32_t hbq_get[16];
++};
+
+ typedef union {
+- SLI2_DESC s2;
++ struct sli2_desc s2;
++ struct sli3_desc s3;
++ struct sli3_pgp s3_pgp;
+ } SLI_VAR;
+
+ typedef struct {
+@@ -2618,6 +2983,25 @@
+ uint32_t fcpt_Length; /* transfer ready for IWRITE */
+ } FCPT_FIELDS64;
+
++/* IOCB Command template for CMD_IOCB_RCV_ELS64_CX (0xB7)
++ or CMD_IOCB_RCV_SEQ64_CX (0xB5) */
++
++struct rcv_sli3 {
++ uint32_t word8Rsvd;
++#ifdef __BIG_ENDIAN_BITFIELD
++ uint16_t vpi;
++ uint16_t word9Rsvd;
++#else /* __LITTLE_ENDIAN */
++ uint16_t word9Rsvd;
++ uint16_t vpi;
++#endif
++ uint32_t word10Rsvd;
++ uint32_t acc_len; /* accumulated length */
++ struct ulp_bde64 bde2;
++};
++
++
++
+ typedef struct _IOCB { /* IOCB structure */
+ union {
+ GENERIC_RSP grsp; /* Generic response */
+@@ -2633,7 +3017,7 @@
+ /* SLI-2 structures */
+
+ struct ulp_bde64 cont64[2]; /* up to 2 64 bit continuation
+- bde_64s */
++ * bde_64s */
+ ELS_REQUEST64 elsreq64; /* ELS_REQUEST template */
+ GEN_REQUEST64 genreq64; /* GEN_REQUEST template */
+ RCV_ELS_REQ64 rcvels64; /* RCV_ELS_REQ template */
+@@ -2695,9 +3079,20 @@
+ uint32_t ulpTimeout:8;
+ #endif
+
++ union {
++ struct rcv_sli3 rcvsli3; /* words 8 - 15 */
++ uint32_t sli3Words[24]; /* 96 extra bytes for SLI-3 */
++ } unsli3;
++
++#define ulpCt_h ulpXS
++#define ulpCt_l ulpFCP2Rcvy
++
++#define IOCB_FCP 1 /* IOCB is used for FCP ELS cmds-ulpRsvByte */
++#define IOCB_IP 2 /* IOCB is used for IP ELS cmds */
+ #define PARM_UNUSED 0 /* PU field (Word 4) not used */
+ #define PARM_REL_OFF 1 /* PU field (Word 4) = R. O. */
+ #define PARM_READ_CHECK 2 /* PU field (Word 4) = Data Transfer Length */
++#define PARM_NPIV_DID 3
+ #define CLASS1 0 /* Class 1 */
+ #define CLASS2 1 /* Class 2 */
+ #define CLASS3 2 /* Class 3 */
+@@ -2718,39 +3113,51 @@
+ #define IOSTAT_RSVD2 0xC
+ #define IOSTAT_RSVD3 0xD
+ #define IOSTAT_RSVD4 0xE
+-#define IOSTAT_RSVD5 0xF
++#define IOSTAT_NEED_BUFFER 0xF
+ #define IOSTAT_DRIVER_REJECT 0x10 /* ulpStatus - Driver defined */
+ #define IOSTAT_DEFAULT 0xF /* Same as rsvd5 for now */
+ #define IOSTAT_CNT 0x11
+
+ } IOCB_t;
+
++/* Structure used for a single HBQ entry */
++struct lpfc_hbq_entry {
++ struct ulp_bde64 bde;
++ uint32_t buffer_tag;
++};
++
+
+ #define SLI1_SLIM_SIZE (4 * 1024)
+
+ /* Up to 498 IOCBs will fit into 16k
+ * 256 (MAILBOX_t) + 140 (PCB_t) + ( 32 (IOCB_t) * 498 ) = < 16384
+ */
+-#define SLI2_SLIM_SIZE (16 * 1024)
++#define SLI2_SLIM_SIZE (64 * 1024)
+
+ /* Maximum IOCBs that will fit in SLI2 slim */
+ #define MAX_SLI2_IOCB 498
++#define MAX_SLIM_IOCB_SIZE (SLI2_SLIM_SIZE - \
++ (sizeof(MAILBOX_t) + sizeof(PCB_t)))
++
++/* HBQ entries are 4 words each = 4k */
++#define LPFC_TOTAL_HBQ_SIZE (sizeof(struct lpfc_hbq_entry) * \
++ lpfc_sli_hbq_count())
+
+ struct lpfc_sli2_slim {
+ MAILBOX_t mbx;
+ PCB_t pcb;
+- IOCB_t IOCBs[MAX_SLI2_IOCB];
++ IOCB_t IOCBs[MAX_SLIM_IOCB_SIZE];
+ };
+
+-/*******************************************************************
+-This macro check PCI device to allow special handling for LC HBAs.
+-
+-Parameters:
+-device : struct pci_dev 's device field
+-
+-return 1 => TRUE
+- 0 => FALSE
+- *******************************************************************/
++/*
++ * This function checks PCI device to allow special handling for LC HBAs.
++ *
++ * Parameters:
++ * device : struct pci_dev 's device field
++ *
++ * return 1 => TRUE
++ * 0 => FALSE
++ */
+ static inline int
+ lpfc_is_LC_HBA(unsigned short device)
+ {
+@@ -2766,3 +3173,16 @@
+ else
+ return 0;
+ }
++
++/*
++ * Determine if an IOCB failed because of a link event or firmware reset.
++ */
++
++static inline int
++lpfc_error_lost_link(IOCB_t *iocbp)
++{
++ return (iocbp->ulpStatus == IOSTAT_LOCAL_REJECT &&
++ (iocbp->un.ulpWord[4] == IOERR_SLI_ABORTED ||
++ iocbp->un.ulpWord[4] == IOERR_LINK_DOWN ||
++ iocbp->un.ulpWord[4] == IOERR_SLI_DOWN));
++}
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c 2007-12-21 15:36:14.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/kthread.h>
+ #include <linux/pci.h>
+ #include <linux/spinlock.h>
++#include <linux/ctype.h>
+
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_device.h>
+@@ -40,15 +41,20 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+ #include "lpfc_version.h"
++#include "lpfc_vport.h"
+
+ static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
+ static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
+ static int lpfc_post_rcv_buf(struct lpfc_hba *);
+
+ static struct scsi_transport_template *lpfc_transport_template = NULL;
++static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
+ static DEFINE_IDR(lpfc_hba_index);
+
++
++
+ /************************************************************************/
+ /* */
+ /* lpfc_config_port_prep */
+@@ -61,7 +67,7 @@
+ /* */
+ /************************************************************************/
+ int
+-lpfc_config_port_prep(struct lpfc_hba * phba)
++lpfc_config_port_prep(struct lpfc_hba *phba)
+ {
+ lpfc_vpd_t *vp = &phba->vpd;
+ int i = 0, rc;
+@@ -75,12 +81,12 @@
+
+ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!pmb) {
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ return -ENOMEM;
+ }
+
+ mb = &pmb->mb;
+- phba->hba_state = LPFC_INIT_MBX_CMDS;
++ phba->link_state = LPFC_INIT_MBX_CMDS;
+
+ if (lpfc_is_LC_HBA(phba->pcidev->device)) {
+ if (init_key) {
+@@ -100,9 +106,7 @@
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+
+ if (rc != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_MBOX,
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ "%d:0324 Config Port initialization "
+ "error, mbxCmd x%x READ_NVPARM, "
+ "mbxStatus x%x\n",
+@@ -112,16 +116,18 @@
+ return -ERESTART;
+ }
+ memcpy(phba->wwnn, (char *)mb->un.varRDnvp.nodename,
+- sizeof (mb->un.varRDnvp.nodename));
++ sizeof(phba->wwnn));
++ memcpy(phba->wwpn, (char *)mb->un.varRDnvp.portname,
++ sizeof(phba->wwpn));
+ }
+
++ phba->sli3_options = 0x0;
++
+ /* Setup and issue mailbox READ REV command */
+ lpfc_read_rev(phba, pmb);
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+ if (rc != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0439 Adapter failed to init, mbxCmd x%x "
+ "READ_REV, mbxStatus x%x\n",
+ phba->brd_no,
+@@ -130,6 +136,7 @@
+ return -ERESTART;
+ }
+
++
+ /*
+ * The value of rr must be 1 since the driver set the cv field to 1.
+ * This setting requires the FW to set all revision fields.
+@@ -144,8 +151,12 @@
+ return -ERESTART;
+ }
+
++ if (phba->sli_rev == 3 && !mb->un.varRdRev.v3rsp)
++ return -EINVAL;
++
+ /* Save information as VPD data */
+ vp->rev.rBit = 1;
++ memcpy(&vp->sli3Feat, &mb->un.varRdRev.sli3Feat, sizeof(uint32_t));
+ vp->rev.sli1FwRev = mb->un.varRdRev.sli1FwRev;
+ memcpy(vp->rev.sli1FwName, (char*) mb->un.varRdRev.sli1FwName, 16);
+ vp->rev.sli2FwRev = mb->un.varRdRev.sli2FwRev;
+@@ -161,6 +172,13 @@
+ vp->rev.postKernRev = mb->un.varRdRev.postKernRev;
+ vp->rev.opFwRev = mb->un.varRdRev.opFwRev;
+
++ /* If the sli feature level is less then 9, we must
++ * tear down all RPIs and VPIs on link down if NPIV
++ * is enabled.
++ */
++ if (vp->rev.feaLevelHigh < 9)
++ phba->sli3_options |= LPFC_SLI3_VPORT_TEARDOWN;
++
+ if (lpfc_is_LC_HBA(phba->pcidev->device))
+ memcpy(phba->RandomData, (char *)&mb->un.varWords[24],
+ sizeof (phba->RandomData));
+@@ -212,48 +230,34 @@
+ /* */
+ /************************************************************************/
+ int
+-lpfc_config_port_post(struct lpfc_hba * phba)
++lpfc_config_port_post(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ LPFC_MBOXQ_t *pmb;
+ MAILBOX_t *mb;
+ struct lpfc_dmabuf *mp;
+ struct lpfc_sli *psli = &phba->sli;
+ uint32_t status, timeout;
+- int i, j, rc;
++ int i, j;
++ int rc;
+
+ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!pmb) {
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ return -ENOMEM;
+ }
+ mb = &pmb->mb;
+
+- lpfc_config_link(phba, pmb);
+- rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+- if (rc != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
+- "%d:0447 Adapter failed init, mbxCmd x%x "
+- "CONFIG_LINK mbxStatus x%x\n",
+- phba->brd_no,
+- mb->mbxCommand, mb->mbxStatus);
+- phba->hba_state = LPFC_HBA_ERROR;
+- mempool_free( pmb, phba->mbox_mem_pool);
+- return -EIO;
+- }
+-
+ /* Get login parameters for NID. */
+- lpfc_read_sparam(phba, pmb);
++ lpfc_read_sparam(phba, pmb, 0);
++ pmb->vport = vport;
+ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0448 Adapter failed init, mbxCmd x%x "
+ "READ_SPARM mbxStatus x%x\n",
+ phba->brd_no,
+ mb->mbxCommand, mb->mbxStatus);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ mp = (struct lpfc_dmabuf *) pmb->context1;
+ mempool_free( pmb, phba->mbox_mem_pool);
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -263,25 +267,27 @@
+
+ mp = (struct lpfc_dmabuf *) pmb->context1;
+
+- memcpy(&phba->fc_sparam, mp->virt, sizeof (struct serv_parm));
++ memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ pmb->context1 = NULL;
+
+ if (phba->cfg_soft_wwnn)
+- u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn);
++ u64_to_wwn(phba->cfg_soft_wwnn,
++ vport->fc_sparam.nodeName.u.wwn);
+ if (phba->cfg_soft_wwpn)
+- u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
+- memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName,
++ u64_to_wwn(phba->cfg_soft_wwpn,
++ vport->fc_sparam.portName.u.wwn);
++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
+ sizeof (struct lpfc_name));
+- memcpy(&phba->fc_portname, &phba->fc_sparam.portName,
++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
+ sizeof (struct lpfc_name));
+ /* If no serial number in VPD data, use low 6 bytes of WWNN */
+ /* This should be consolidated into parse_vpd ? - mr */
+ if (phba->SerialNumber[0] == 0) {
+ uint8_t *outptr;
+
+- outptr = &phba->fc_nodename.u.s.IEEE[0];
++ outptr = &vport->fc_nodename.u.s.IEEE[0];
+ for (i = 0; i < 12; i++) {
+ status = *outptr++;
+ j = ((status & 0xf0) >> 4);
+@@ -303,15 +309,14 @@
+ }
+
+ lpfc_read_config(phba, pmb);
++ pmb->vport = vport;
+ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0453 Adapter failed to init, mbxCmd x%x "
+ "READ_CONFIG, mbxStatus x%x\n",
+ phba->brd_no,
+ mb->mbxCommand, mb->mbxStatus);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ mempool_free( pmb, phba->mbox_mem_pool);
+ return -EIO;
+ }
+@@ -338,9 +343,7 @@
+ || ((phba->cfg_link_speed == LINK_SPEED_10G)
+ && !(phba->lmt & LMT_10Gb))) {
+ /* Reset link speed to auto */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_LINK_EVENT,
++ lpfc_printf_log(phba, KERN_WARNING, LOG_LINK_EVENT,
+ "%d:1302 Invalid speed for this board: "
+ "Reset link speed to auto: x%x\n",
+ phba->brd_no,
+@@ -348,7 +351,7 @@
+ phba->cfg_link_speed = LINK_SPEED_AUTO;
+ }
+
+- phba->hba_state = LPFC_LINK_DOWN;
++ phba->link_state = LPFC_LINK_DOWN;
+
+ /* Only process IOCBs on ring 0 till hba_state is READY */
+ if (psli->ring[psli->extra_ring].cmdringaddr)
+@@ -359,10 +362,11 @@
+ psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT;
+
+ /* Post receive buffers for desired rings */
++ if (phba->sli_rev != 3)
+ lpfc_post_rcv_buf(phba);
+
+ /* Enable appropriate host interrupts */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ status = readl(phba->HCregaddr);
+ status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA;
+ if (psli->num_rings > 0)
+@@ -380,22 +384,24 @@
+
+ writel(status, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ /*
+ * Setup the ring 0 (els) timeout handler
+ */
+ timeout = phba->fc_ratov << 1;
+- mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout);
++ mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
++ mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++ phba->hb_outstanding = 0;
++ phba->last_completion_time = jiffies;
+
+ lpfc_init_link(phba, pmb, phba->cfg_topology, phba->cfg_link_speed);
+ pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ pmb->vport = vport;
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ lpfc_set_loopback_flag(phba);
+ if (rc != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0454 Adapter failed to init, mbxCmd x%x "
+ "INIT_LINK, mbxStatus x%x\n",
+ phba->brd_no,
+@@ -408,7 +414,7 @@
+ writel(0xffffffff, phba->HAregaddr);
+ readl(phba->HAregaddr); /* flush */
+
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ if (rc != MBX_BUSY)
+ mempool_free(pmb, phba->mbox_mem_pool);
+ return -EIO;
+@@ -429,18 +435,19 @@
+ /* */
+ /************************************************************************/
+ int
+-lpfc_hba_down_prep(struct lpfc_hba * phba)
++lpfc_hba_down_prep(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
++
+ /* Disable interrupts */
+ writel(0, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+
+- /* Cleanup potential discovery resources */
+- lpfc_els_flush_rscn(phba);
+- lpfc_els_flush_cmd(phba);
+- lpfc_disc_flush_list(phba);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ lpfc_cleanup_discovery_resources(vport);
++ }
+
+- return (0);
++ return 0;
+ }
+
+ /************************************************************************/
+@@ -453,13 +460,16 @@
+ /* */
+ /************************************************************************/
+ int
+-lpfc_hba_down_post(struct lpfc_hba * phba)
++lpfc_hba_down_post(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
+ struct lpfc_dmabuf *mp, *next_mp;
+ int i;
+
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
++ lpfc_sli_hbqbuf_free_all(phba);
++ else {
+ /* Cleanup preposted buffers on the ELS ring */
+ pring = &psli->ring[LPFC_ELS_RING];
+ list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
+@@ -468,6 +478,7 @@
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ }
++ }
+
+ for (i = 0; i < psli->num_rings; i++) {
+ pring = &psli->ring[i];
+@@ -477,6 +488,119 @@
+ return 0;
+ }
+
++/* HBA heart beat timeout handler */
++void
++lpfc_hb_timeout(unsigned long ptr)
++{
++ struct lpfc_hba *phba;
++ unsigned long iflag;
++
++ phba = (struct lpfc_hba *)ptr;
++ spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
++ if (!(phba->pport->work_port_events & WORKER_HB_TMO))
++ phba->pport->work_port_events |= WORKER_HB_TMO;
++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
++
++ if (phba->work_wait)
++ wake_up(phba->work_wait);
++ return;
++}
++
++static void
++lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
++{
++ unsigned long drvr_flag;
++
++ spin_lock_irqsave(&phba->hbalock, drvr_flag);
++ phba->hb_outstanding = 0;
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++
++ mempool_free(pmboxq, phba->mbox_mem_pool);
++ if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) &&
++ !(phba->link_state == LPFC_HBA_ERROR) &&
++ !(phba->pport->fc_flag & FC_UNLOADING))
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++ return;
++}
++
++void
++lpfc_hb_timeout_handler(struct lpfc_hba *phba)
++{
++ LPFC_MBOXQ_t *pmboxq;
++ int retval;
++ struct lpfc_sli *psli = &phba->sli;
++
++ if ((phba->link_state == LPFC_HBA_ERROR) ||
++ (phba->pport->fc_flag & FC_UNLOADING) ||
++ (phba->pport->fc_flag & FC_OFFLINE_MODE))
++ return;
++
++ spin_lock_irq(&phba->pport->work_port_lock);
++ /* If the timer is already canceled do nothing */
++ if (!(phba->pport->work_port_events & WORKER_HB_TMO)) {
++ spin_unlock_irq(&phba->pport->work_port_lock);
++ return;
++ }
++
++ if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ,
++ jiffies)) {
++ spin_unlock_irq(&phba->pport->work_port_lock);
++ if (!phba->hb_outstanding)
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++ else
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
++ return;
++ }
++ spin_unlock_irq(&phba->pport->work_port_lock);
++
++ /* If there is no heart beat outstanding, issue a heartbeat command */
++ if (!phba->hb_outstanding) {
++ pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
++ if (!pmboxq) {
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++ return;
++ }
++
++ lpfc_heart_beat(phba, pmboxq);
++ pmboxq->mbox_cmpl = lpfc_hb_mbox_cmpl;
++ pmboxq->vport = phba->pport;
++ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
++
++ if (retval != MBX_BUSY && retval != MBX_SUCCESS) {
++ mempool_free(pmboxq, phba->mbox_mem_pool);
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++ return;
++ }
++ mod_timer(&phba->hb_tmofunc,
++ jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
++ phba->hb_outstanding = 1;
++ return;
++ } else {
++ /*
++ * If heart beat timeout called with hb_outstanding set we
++ * need to take the HBA offline.
++ */
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0459 Adapter heartbeat failure, taking "
++ "this port offline.\n", phba->brd_no);
++
++ spin_lock_irq(&phba->hbalock);
++ psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
++ spin_unlock_irq(&phba->hbalock);
++
++ lpfc_offline_prep(phba);
++ lpfc_offline(phba);
++ lpfc_unblock_mgmt_io(phba);
++ phba->link_state = LPFC_HBA_ERROR;
++ lpfc_hba_down_post(phba);
++ }
++}
++
+ /************************************************************************/
+ /* */
+ /* lpfc_handle_eratt */
+@@ -486,11 +610,15 @@
+ /* */
+ /************************************************************************/
+ void
+-lpfc_handle_eratt(struct lpfc_hba * phba)
++lpfc_handle_eratt(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
++ struct lpfc_vport *port_iterator;
+ uint32_t event_data;
++ struct Scsi_Host *shost;
++
+ /* If the pci channel is offline, ignore possible errors,
+ * since we cannot communicate with the pci card anyway. */
+ if (pci_channel_offline(phba->pcidev))
+@@ -504,10 +632,17 @@
+ "Data: x%x x%x x%x\n",
+ phba->brd_no, phba->work_hs,
+ phba->work_status[0], phba->work_status[1]);
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_ESTABLISH_LINK;
++ list_for_each_entry(port_iterator, &phba->port_list,
++ listentry) {
++ shost = lpfc_shost_from_vport(port_iterator);
++
++ spin_lock_irq(shost->host_lock);
++ port_iterator->fc_flag |= FC_ESTABLISH_LINK;
++ spin_unlock_irq(shost->host_lock);
++ }
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ /*
+ * Firmware stops when it triggled erratt with HS_FFER6.
+@@ -544,15 +679,18 @@
+ phba->work_status[0], phba->work_status[1]);
+
+ event_data = FC_REG_DUMP_EVENT;
+- fc_host_post_vendor_event(phba->host, fc_get_event_number(),
++ shost = lpfc_shost_from_vport(vport);
++ fc_host_post_vendor_event(shost, fc_get_event_number(),
+ sizeof(event_data), (char *) &event_data,
+ SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
++ spin_unlock_irq(&phba->hbalock);
+ lpfc_offline_prep(phba);
+ lpfc_offline(phba);
+ lpfc_unblock_mgmt_io(phba);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ lpfc_hba_down_post(phba);
+ }
+ }
+@@ -566,9 +704,11 @@
+ /* */
+ /************************************************************************/
+ void
+-lpfc_handle_latt(struct lpfc_hba * phba)
++lpfc_handle_latt(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_vport *port_iterator;
+ LPFC_MBOXQ_t *pmb;
+ volatile uint32_t control;
+ struct lpfc_dmabuf *mp;
+@@ -589,20 +729,22 @@
+ rc = -EIO;
+
+ /* Cleanup any outstanding ELS commands */
+- lpfc_els_flush_cmd(phba);
++ list_for_each_entry(port_iterator, &phba->port_list, listentry)
++ lpfc_els_flush_cmd(port_iterator);
+
+ psli->slistat.link_event++;
+ lpfc_read_la(phba, pmb, mp);
+ pmb->mbox_cmpl = lpfc_mbx_cmpl_read_la;
++ pmb->vport = vport;
+ rc = lpfc_sli_issue_mbox (phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB));
+ if (rc == MBX_NOT_FINISHED)
+ goto lpfc_handle_latt_free_mbuf;
+
+ /* Clear Link Attention in HA REG */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ writel(HA_LATT, phba->HAregaddr);
+ readl(phba->HAregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return;
+
+@@ -614,7 +756,7 @@
+ mempool_free(pmb, phba->mbox_mem_pool);
+ lpfc_handle_latt_err_exit:
+ /* Enable Link attention interrupts */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag |= LPFC_PROCESS_LA;
+ control = readl(phba->HCregaddr);
+ control |= HC_LAINT_ENA;
+@@ -624,15 +766,13 @@
+ /* Clear Link Attention in HA REG */
+ writel(HA_LATT, phba->HAregaddr);
+ readl(phba->HAregaddr); /* flush */
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ lpfc_linkdown(phba);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+
+ /* The other case is an error from issue_mbox */
+ if (rc == -ENOMEM)
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_MBOX,
++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+ "%d:0300 READ_LA: no buffers\n",
+ phba->brd_no);
+
+@@ -646,7 +786,7 @@
+ /* */
+ /************************************************************************/
+ static int
+-lpfc_parse_vpd(struct lpfc_hba * phba, uint8_t * vpd, int len)
++lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len)
+ {
+ uint8_t lenlo, lenhi;
+ int Length;
+@@ -658,9 +798,7 @@
+ return 0;
+
+ /* Vital Product */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "%d:0455 Vital Product Data: x%x x%x x%x x%x\n",
+ phba->brd_no,
+ (uint32_t) vpd[0], (uint32_t) vpd[1], (uint32_t) vpd[2],
+@@ -785,7 +923,7 @@
+ }
+
+ static void
+-lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp)
++lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
+ {
+ lpfc_vpd_t *vp;
+ uint16_t dev_id = phba->pcidev->device;
+@@ -943,7 +1081,7 @@
+ /* Returns the number of buffers NOT posted. */
+ /**************************************************/
+ int
+-lpfc_post_buffer(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, int cnt,
++lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt,
+ int type)
+ {
+ IOCB_t *icmd;
+@@ -955,9 +1093,7 @@
+ /* While there are buffers to post */
+ while (cnt > 0) {
+ /* Allocate buffer for command iocb */
+- spin_lock_irq(phba->host->host_lock);
+ iocb = lpfc_sli_get_iocbq(phba);
+- spin_unlock_irq(phba->host->host_lock);
+ if (iocb == NULL) {
+ pring->missbufcnt = cnt;
+ return cnt;
+@@ -972,9 +1108,7 @@
+ &mp1->phys);
+ if (mp1 == 0 || mp1->virt == 0) {
+ kfree(mp1);
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, iocb);
+- spin_unlock_irq(phba->host->host_lock);
+ pring->missbufcnt = cnt;
+ return cnt;
+ }
+@@ -990,9 +1124,7 @@
+ kfree(mp2);
+ lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
+ kfree(mp1);
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_release_iocbq(phba, iocb);
+- spin_unlock_irq(phba->host->host_lock);
+ pring->missbufcnt = cnt;
+ return cnt;
+ }
+@@ -1018,7 +1150,6 @@
+ icmd->ulpCommand = CMD_QUE_RING_BUF64_CN;
+ icmd->ulpLe = 1;
+
+- spin_lock_irq(phba->host->host_lock);
+ if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) {
+ lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
+ kfree(mp1);
+@@ -1030,15 +1161,12 @@
+ }
+ lpfc_sli_release_iocbq(phba, iocb);
+ pring->missbufcnt = cnt;
+- spin_unlock_irq(phba->host->host_lock);
+ return cnt;
+ }
+- spin_unlock_irq(phba->host->host_lock);
+ lpfc_sli_ringpostbuf_put(phba, pring, mp1);
+- if (mp2) {
++ if (mp2)
+ lpfc_sli_ringpostbuf_put(phba, pring, mp2);
+ }
+- }
+ pring->missbufcnt = 0;
+ return 0;
+ }
+@@ -1050,7 +1178,7 @@
+ /* */
+ /************************************************************************/
+ static int
+-lpfc_post_rcv_buf(struct lpfc_hba * phba)
++lpfc_post_rcv_buf(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli = &phba->sli;
+
+@@ -1151,7 +1279,7 @@
+ {
+ int t;
+ uint32_t *HashWorking;
+- uint32_t *pwwnn = phba->wwnn;
++ uint32_t *pwwnn = (uint32_t *) phba->wwnn;
+
+ HashWorking = kmalloc(80 * sizeof(uint32_t), GFP_KERNEL);
+ if (!HashWorking)
+@@ -1170,64 +1298,76 @@
+ }
+
+ static void
+-lpfc_cleanup(struct lpfc_hba * phba)
++lpfc_cleanup(struct lpfc_vport *vport)
+ {
+ struct lpfc_nodelist *ndlp, *next_ndlp;
+
+ /* clean up phba - lpfc specific */
+- lpfc_can_disctmo(phba);
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp)
++ lpfc_can_disctmo(vport);
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
+ lpfc_nlp_put(ndlp);
+-
+- INIT_LIST_HEAD(&phba->fc_nodes);
+-
+ return;
+ }
+
+ static void
+ lpfc_establish_link_tmo(unsigned long ptr)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr;
++ struct lpfc_vport *vport = phba->pport;
+ unsigned long iflag;
+
+-
+ /* Re-establishing Link, timer expired */
+ lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
+ "%d:1300 Re-establishing Link, timer expired "
+ "Data: x%x x%x\n",
+- phba->brd_no, phba->fc_flag, phba->hba_state);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- phba->fc_flag &= ~FC_ESTABLISH_LINK;
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ phba->brd_no, vport->fc_flag,
++ vport->port_state);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ spin_lock_irqsave(shost->host_lock, iflag);
++ vport->fc_flag &= ~FC_ESTABLISH_LINK;
++ spin_unlock_irqrestore(shost->host_lock, iflag);
++ }
+ }
+
+-static int
+-lpfc_stop_timer(struct lpfc_hba * phba)
++void
++lpfc_stop_vport_timers(struct lpfc_vport *vport)
+ {
+- struct lpfc_sli *psli = &phba->sli;
++ del_timer_sync(&vport->els_tmofunc);
++ del_timer_sync(&vport->fc_fdmitmo);
++ lpfc_can_disctmo(vport);
++ return;
++}
++
++static void
++lpfc_stop_phba_timers(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport;
+
+ del_timer_sync(&phba->fcp_poll_timer);
+ del_timer_sync(&phba->fc_estabtmo);
+- del_timer_sync(&phba->fc_disctmo);
+- del_timer_sync(&phba->fc_fdmitmo);
+- del_timer_sync(&phba->els_tmofunc);
+- psli = &phba->sli;
+- del_timer_sync(&psli->mbox_tmo);
+- return(1);
++ list_for_each_entry(vport, &phba->port_list, listentry)
++ lpfc_stop_vport_timers(vport);
++ del_timer_sync(&phba->sli.mbox_tmo);
++ del_timer_sync(&phba->fabric_block_timer);
++ phba->hb_outstanding = 0;
++ del_timer_sync(&phba->hb_tmofunc);
++ return;
+ }
+
+ int
+-lpfc_online(struct lpfc_hba * phba)
++lpfc_online(struct lpfc_hba *phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
++
+ if (!phba)
+ return 0;
+
+- if (!(phba->fc_flag & FC_OFFLINE_MODE))
++ if (!(vport->fc_flag & FC_OFFLINE_MODE))
+ return 0;
+
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ "%d:0458 Bring Adapter online\n",
+ phba->brd_no);
+
+@@ -1243,9 +1383,14 @@
+ return 1;
+ }
+
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_OFFLINE_MODE;
+- spin_unlock_irq(phba->host->host_lock);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag &= ~FC_OFFLINE_MODE;
++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++ spin_unlock_irq(shost->host_lock);
++ }
+
+ lpfc_unblock_mgmt_io(phba);
+ return 0;
+@@ -1256,9 +1401,9 @@
+ {
+ unsigned long iflag;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- phba->fc_flag |= FC_BLOCK_MGMT_IO;
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ phba->sli.sli_flag |= LPFC_BLOCK_MGMT_IO;
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+
+ void
+@@ -1266,17 +1411,18 @@
+ {
+ unsigned long iflag;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- phba->fc_flag &= ~FC_BLOCK_MGMT_IO;
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ phba->sli.sli_flag &= ~LPFC_BLOCK_MGMT_IO;
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+
+ void
+ lpfc_offline_prep(struct lpfc_hba * phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ struct lpfc_nodelist *ndlp, *next_ndlp;
+
+- if (phba->fc_flag & FC_OFFLINE_MODE)
++ if (vport->fc_flag & FC_OFFLINE_MODE)
+ return;
+
+ lpfc_block_mgmt_io(phba);
+@@ -1284,39 +1430,49 @@
+ lpfc_linkdown(phba);
+
+ /* Issue an unreg_login to all nodes */
+- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp)
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
+ if (ndlp->nlp_state != NLP_STE_UNUSED_NODE)
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+
+ lpfc_sli_flush_mbox_queue(phba);
+ }
+
+ void
+-lpfc_offline(struct lpfc_hba * phba)
++lpfc_offline(struct lpfc_hba *phba)
+ {
+- unsigned long iflag;
++ struct lpfc_vport *vport = phba->pport;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_vport *port_iterator;
+
+- if (phba->fc_flag & FC_OFFLINE_MODE)
++ if (vport->fc_flag & FC_OFFLINE_MODE)
+ return;
+
+ /* stop all timers associated with this hba */
+- lpfc_stop_timer(phba);
++ lpfc_stop_phba_timers(phba);
++ list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++ port_iterator->work_port_events = 0;
++ }
+
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ "%d:0460 Bring Adapter offline\n",
+ phba->brd_no);
+
+ /* Bring down the SLI Layer and cleanup. The HBA is offline
+ now. */
+ lpfc_sli_hba_down(phba);
+- lpfc_cleanup(phba);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- phba->work_hba_events = 0;
++ spin_lock_irq(&phba->hbalock);
+ phba->work_ha = 0;
+- phba->fc_flag |= FC_OFFLINE_MODE;
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ vport->fc_flag |= FC_OFFLINE_MODE;
++ spin_unlock_irq(&phba->hbalock);
++ list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++ shost = lpfc_shost_from_vport(port_iterator);
++
++ lpfc_cleanup(port_iterator);
++ spin_lock_irq(shost->host_lock);
++ vport->work_port_events = 0;
++ vport->fc_flag |= FC_OFFLINE_MODE;
++ spin_unlock_irq(shost->host_lock);
++ }
+ }
+
+ /******************************************************************************
+@@ -1326,12 +1482,12 @@
+ *
+ ******************************************************************************/
+ static int
+-lpfc_scsi_free(struct lpfc_hba * phba)
++lpfc_scsi_free(struct lpfc_hba *phba)
+ {
+ struct lpfc_scsi_buf *sb, *sb_next;
+ struct lpfc_iocbq *io, *io_next;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ /* Release all the lpfc_scsi_bufs maintained by this host. */
+ list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list, list) {
+ list_del(&sb->list);
+@@ -1348,126 +1504,174 @@
+ phba->total_iocbq_bufs--;
+ }
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ return 0;
+ }
+
+-void lpfc_remove_device(struct lpfc_hba *phba)
+-{
+- unsigned long iflag;
+-
+- lpfc_free_sysfs_attr(phba);
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- phba->fc_flag |= FC_UNLOADING;
++struct lpfc_vport *
++lpfc_create_port(struct lpfc_hba *phba, int instance, struct fc_vport *fc_vport)
++{
++ struct lpfc_vport *vport;
++ struct Scsi_Host *shost;
++ int error = 0;
+
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport));
++ if (!shost)
++ goto out;
+
+- fc_remove_host(phba->host);
+- scsi_remove_host(phba->host);
++ vport = (struct lpfc_vport *) shost->hostdata;
++ vport->phba = phba;
+
+- kthread_stop(phba->worker_thread);
++ vport->load_flag |= FC_LOADING;
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+
++ shost->unique_id = instance;
++ shost->max_id = LPFC_MAX_TARGET;
++ shost->max_lun = phba->cfg_max_luns;
++ shost->this_id = -1;
++ shost->max_cmd_len = 16;
+ /*
+- * Bring down the SLI Layer. This step disable all interrupts,
+- * clears the rings, discards all mailbox commands, and resets
+- * the HBA.
++ * Set initial can_queue value since 0 is no longer supported and
++ * scsi_add_host will fail. This will be adjusted later based on the
++ * max xri value determined in hba setup.
+ */
+- lpfc_sli_hba_down(phba);
+- lpfc_sli_brdrestart(phba);
++ shost->can_queue = phba->cfg_hba_queue_depth - 10;
++ if (fc_vport != NULL) {
++ shost->transportt = lpfc_vport_transport_template;
++ vport->port_type = LPFC_NPIV_PORT;
++ } else {
++ shost->transportt = lpfc_transport_template;
++ vport->port_type = LPFC_PHYSICAL_PORT;
++ }
+
+- /* Release the irq reservation */
+- free_irq(phba->pcidev->irq, phba);
+- pci_disable_msi(phba->pcidev);
++ /* Initialize all internally managed lists. */
++ INIT_LIST_HEAD(&vport->fc_nodes);
++ spin_lock_init(&vport->work_port_lock);
+
+- lpfc_cleanup(phba);
+- lpfc_stop_timer(phba);
+- phba->work_hba_events = 0;
++ init_timer(&vport->fc_disctmo);
++ vport->fc_disctmo.function = lpfc_disc_timeout;
++ vport->fc_disctmo.data = (unsigned long)vport;
+
+- /*
+- * Call scsi_free before mem_free since scsi bufs are released to their
+- * corresponding pools here.
+- */
+- lpfc_scsi_free(phba);
+- lpfc_mem_free(phba);
++ init_timer(&vport->fc_fdmitmo);
++ vport->fc_fdmitmo.function = lpfc_fdmi_tmo;
++ vport->fc_fdmitmo.data = (unsigned long)vport;
+
+- /* Free resources associated with SLI2 interface */
+- dma_free_coherent(&phba->pcidev->dev, SLI2_SLIM_SIZE,
+- phba->slim2p, phba->slim2p_mapping);
++ init_timer(&vport->els_tmofunc);
++ vport->els_tmofunc.function = lpfc_els_timeout;
++ vport->els_tmofunc.data = (unsigned long)vport;
+
+- /* unmap adapter SLIM and Control Registers */
+- iounmap(phba->ctrl_regs_memmap_p);
+- iounmap(phba->slim_memmap_p);
++ if (fc_vport != NULL) {
++ error = scsi_add_host(shost, &fc_vport->dev);
++ } else {
++ error = scsi_add_host(shost, &phba->pcidev->dev);
++ }
++ if (error)
++ goto out_put_shost;
+
+- pci_release_regions(phba->pcidev);
+- pci_disable_device(phba->pcidev);
++ list_add_tail(&vport->listentry, &phba->port_list);
++ return vport;
+
+- idr_remove(&lpfc_hba_index, phba->brd_no);
+- scsi_host_put(phba->host);
++out_put_shost:
++ scsi_host_put(shost);
++out:
++ return NULL;
+ }
+
+-void lpfc_scan_start(struct Scsi_Host *host)
++void
++destroy_port(struct lpfc_vport *vport)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+
+- if (lpfc_alloc_sysfs_attr(phba))
+- goto error;
++ kfree(vport->vname);
+
+- phba->MBslimaddr = phba->slim_memmap_p;
+- phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
+- phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
+- phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
+- phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
++ lpfc_debugfs_terminate(vport);
++ fc_remove_host(shost);
++ scsi_remove_host(shost);
+
+- if (lpfc_sli_hba_setup(phba))
+- goto error;
++ spin_lock_irq(&phba->hbalock);
++ list_del_init(&vport->listentry);
++ spin_unlock_irq(&phba->hbalock);
+
+- /*
+- * hba setup may have changed the hba_queue_depth so we need to adjust
+- * the value of can_queue.
+- */
+- host->can_queue = phba->cfg_hba_queue_depth - 10;
++ lpfc_cleanup(vport);
+ return;
++}
+
+-error:
+- lpfc_remove_device(phba);
++int
++lpfc_get_instance(void)
++{
++ int instance = 0;
++
++ /* Assign an unused number */
++ if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL))
++ return -1;
++ if (idr_get_new(&lpfc_hba_index, NULL, &instance))
++ return -1;
++ return instance;
+ }
+
++/*
++ * Note: there is no scan_start function as adapter initialization
++ * will have asynchronously kicked off the link initialization.
++ */
++
+ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ int stat = 0;
+
+- if (!phba->host)
+- return 1;
+- if (time >= 30 * HZ)
++ spin_lock_irq(shost->host_lock);
++
++ if (vport->fc_flag & FC_UNLOADING) {
++ stat = 1;
+ goto finished;
++ }
++ if (time >= 30 * HZ) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++ "%d:0461 Scanning longer than 30 "
++ "seconds. Continuing initialization\n",
++ phba->brd_no);
++ stat = 1;
++ goto finished;
++ }
++ if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) {
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++ "%d:0465 Link down longer than 15 "
++ "seconds. Continuing initialization\n",
++ phba->brd_no);
++ stat = 1;
++ goto finished;
++ }
+
+- if (phba->hba_state != LPFC_HBA_READY)
+- return 0;
+- if (phba->num_disc_nodes || phba->fc_prli_sent)
+- return 0;
+- if ((phba->fc_map_cnt == 0) && (time < 2 * HZ))
+- return 0;
+- if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE)
+- return 0;
+- if ((phba->hba_state > LPFC_LINK_DOWN) || (time < 15 * HZ))
+- return 0;
++ if (vport->port_state != LPFC_VPORT_READY)
++ goto finished;
++ if (vport->num_disc_nodes || vport->fc_prli_sent)
++ goto finished;
++ if (vport->fc_map_cnt == 0 && time < 2 * HZ)
++ goto finished;
++ if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0)
++ goto finished;
++
++ stat = 1;
+
+ finished:
+- if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
+- spin_lock_irq(shost->host_lock);
+- lpfc_poll_start_timer(phba);
+ spin_unlock_irq(shost->host_lock);
+- }
++ return stat;
++}
+
++void lpfc_host_attrib_init(struct Scsi_Host *shost)
++{
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ /*
+- * set fixed host attributes
+- * Must done after lpfc_sli_hba_setup()
++ * Set fixed host attributes. Must done after lpfc_sli_hba_setup().
+ */
+
+- fc_host_node_name(shost) = wwn_to_u64(phba->fc_nodename.u.wwn);
+- fc_host_port_name(shost) = wwn_to_u64(phba->fc_portname.u.wwn);
++ fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
++ fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+ fc_host_supported_classes(shost) = FC_COS_CLASS3;
+
+ memset(fc_host_supported_fc4s(shost), 0,
+@@ -1475,7 +1679,8 @@
+ fc_host_supported_fc4s(shost)[2] = 1;
+ fc_host_supported_fc4s(shost)[7] = 1;
+
+- lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
++ lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost),
++ sizeof fc_host_symbolic_name(shost));
+
+ fc_host_supported_speeds(shost) = 0;
+ if (phba->lmt & LMT_10Gb)
+@@ -1488,8 +1693,8 @@
+ fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT;
+
+ fc_host_maxframe_size(shost) =
+- ((((uint32_t) phba->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) |
+- (uint32_t) phba->fc_sparam.cmn.bbRcvSizeLsb);
++ (((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) |
++ (uint32_t) vport->fc_sparam.cmn.bbRcvSizeLsb;
+
+ /* This value is also unchanging */
+ memset(fc_host_active_fc4s(shost), 0,
+@@ -1497,20 +1702,20 @@
+ fc_host_active_fc4s(shost)[2] = 1;
+ fc_host_active_fc4s(shost)[7] = 1;
+
++ fc_host_max_npiv_vports(shost) = phba->max_vpi;
+ spin_lock_irq(shost->host_lock);
+- phba->fc_flag &= ~FC_LOADING;
++ vport->fc_flag &= ~FC_LOADING;
+ spin_unlock_irq(shost->host_lock);
+-
+- return 1;
+ }
+
+ static int __devinit
+ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+ {
+- struct Scsi_Host *host;
++ struct lpfc_vport *vport = NULL;
+ struct lpfc_hba *phba;
+ struct lpfc_sli *psli;
+ struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
++ struct Scsi_Host *shost = NULL;
+ unsigned long bar0map_len, bar2map_len;
+ int error = -ENODEV, retval;
+ int i;
+@@ -1521,61 +1726,46 @@
+ if (pci_request_regions(pdev, LPFC_DRIVER_NAME))
+ goto out_disable_device;
+
+- host = scsi_host_alloc(&lpfc_template, sizeof (struct lpfc_hba));
+- if (!host)
++ phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL);
++ if (!phba)
+ goto out_release_regions;
+
+- phba = (struct lpfc_hba*)host->hostdata;
+- memset(phba, 0, sizeof (struct lpfc_hba));
+- phba->host = host;
++ spin_lock_init(&phba->hbalock);
+
+- phba->fc_flag |= FC_LOADING;
+ phba->pcidev = pdev;
+
+ /* Assign an unused board number */
+- if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL))
+- goto out_put_host;
++ if ((phba->brd_no = lpfc_get_instance()) < 0)
++ goto out_free_phba;
+
+- error = idr_get_new(&lpfc_hba_index, NULL, &phba->brd_no);
+- if (error)
+- goto out_put_host;
+-
+- host->unique_id = phba->brd_no;
++ INIT_LIST_HEAD(&phba->port_list);
++ INIT_LIST_HEAD(&phba->hbq_buffer_list);
++ /*
++ * Get all the module params for configuring this host and then
++ * establish the host.
++ */
++ lpfc_get_cfgparam(phba);
++ phba->max_vpi = LPFC_MAX_VPI;
+
+ /* Initialize timers used by driver */
+ init_timer(&phba->fc_estabtmo);
+ phba->fc_estabtmo.function = lpfc_establish_link_tmo;
+ phba->fc_estabtmo.data = (unsigned long)phba;
+- init_timer(&phba->fc_disctmo);
+- phba->fc_disctmo.function = lpfc_disc_timeout;
+- phba->fc_disctmo.data = (unsigned long)phba;
+-
+- init_timer(&phba->fc_fdmitmo);
+- phba->fc_fdmitmo.function = lpfc_fdmi_tmo;
+- phba->fc_fdmitmo.data = (unsigned long)phba;
+- init_timer(&phba->els_tmofunc);
+- phba->els_tmofunc.function = lpfc_els_timeout;
+- phba->els_tmofunc.data = (unsigned long)phba;
++
++ init_timer(&phba->hb_tmofunc);
++ phba->hb_tmofunc.function = lpfc_hb_timeout;
++ phba->hb_tmofunc.data = (unsigned long)phba;
++
+ psli = &phba->sli;
+ init_timer(&psli->mbox_tmo);
+ psli->mbox_tmo.function = lpfc_mbox_timeout;
+- psli->mbox_tmo.data = (unsigned long)phba;
+-
++ psli->mbox_tmo.data = (unsigned long) phba;
+ init_timer(&phba->fcp_poll_timer);
+ phba->fcp_poll_timer.function = lpfc_poll_timeout;
+- phba->fcp_poll_timer.data = (unsigned long)phba;
+-
+- /*
+- * Get all the module params for configuring this host and then
+- * establish the host parameters.
+- */
+- lpfc_get_cfgparam(phba);
+-
+- host->max_id = LPFC_MAX_TARGET;
+- host->max_lun = phba->cfg_max_luns;
+- host->this_id = -1;
+-
+- INIT_LIST_HEAD(&phba->fc_nodes);
++ phba->fcp_poll_timer.data = (unsigned long) phba;
++ init_timer(&phba->fabric_block_timer);
++ phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
++ phba->fabric_block_timer.data = (unsigned long) phba;
+
+ pci_set_master(pdev);
+ retval = pci_set_mwi(pdev);
+@@ -1623,13 +1813,22 @@
+
+ memset(phba->slim2p, 0, SLI2_SLIM_SIZE);
+
++ phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev,
++ lpfc_sli_hbq_size(),
++ &phba->hbqslimp.phys,
++ GFP_KERNEL);
++ if (!phba->hbqslimp.virt)
++ goto out_free_slim;
++
++ memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
++
+ /* Initialize the SLI Layer to run with lpfc HBAs. */
+ lpfc_sli_setup(phba);
+ lpfc_sli_queue_setup(phba);
+
+ error = lpfc_mem_alloc(phba);
+ if (error)
+- goto out_free_slim;
++ goto out_free_hbqslimp;
+
+ /* Initialize and populate the iocb list per host. */
+ INIT_LIST_HEAD(&phba->lpfc_iocb_list);
+@@ -1653,10 +1852,11 @@
+ error = -ENOMEM;
+ goto out_free_iocbq;
+ }
+- spin_lock_irq(phba->host->host_lock);
++
++ spin_lock_irq(&phba->hbalock);
+ list_add(&iocbq_entry->list, &phba->lpfc_iocb_list);
+ phba->total_iocbq_bufs++;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ }
+
+ /* Initialize HBA structure */
+@@ -1677,22 +1877,22 @@
+ goto out_free_iocbq;
+ }
+
+- /*
+- * Set initial can_queue value since 0 is no longer supported and
+- * scsi_add_host will fail. This will be adjusted later based on the
+- * max xri value determined in hba setup.
+- */
+- host->can_queue = phba->cfg_hba_queue_depth - 10;
+-
+- /* Tell the midlayer we support 16 byte commands */
+- host->max_cmd_len = 16;
+-
+ /* Initialize the list of scsi buffers used by driver for scsi IO. */
+ spin_lock_init(&phba->scsi_buf_list_lock);
+ INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+
+- host->transportt = lpfc_transport_template;
+- pci_set_drvdata(pdev, host);
++ /* Initialize list of fabric iocbs */
++ INIT_LIST_HEAD(&phba->fabric_iocb_list);
++
++ vport = lpfc_create_port(phba, phba->brd_no, NULL);
++ if (!vport)
++ goto out_kthread_stop;
++
++ shost = lpfc_shost_from_vport(vport);
++ phba->pport = vport;
++ lpfc_debugfs_initialize(vport);
++
++ pci_set_drvdata(pdev, shost);
+
+ if (phba->cfg_use_msi) {
+ error = pci_enable_msi(phba->pcidev);
+@@ -1708,33 +1908,63 @@
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0451 Enable interrupt handler failed\n",
+ phba->brd_no);
+- goto out_kthread_stop;
++ goto out_disable_msi;
+ }
+
+- error = scsi_add_host(host, &pdev->dev);
+- if (error)
++ phba->MBslimaddr = phba->slim_memmap_p;
++ phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
++ phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
++ phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
++ phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
++
++ if (lpfc_alloc_sysfs_attr(vport))
+ goto out_free_irq;
+
+- scsi_scan_host(host);
++ if (lpfc_sli_hba_setup(phba))
++ goto out_remove_device;
++
++ /*
++ * hba setup may have changed the hba_queue_depth so we need to adjust
++ * the value of can_queue.
++ */
++ shost->can_queue = phba->cfg_hba_queue_depth - 10;
++
++ lpfc_host_attrib_init(shost);
++
++ if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
++ spin_lock_irq(shost->host_lock);
++ lpfc_poll_start_timer(phba);
++ spin_unlock_irq(shost->host_lock);
++ }
++
++ scsi_scan_host(shost);
+
+ return 0;
+
++out_remove_device:
++ lpfc_free_sysfs_attr(vport);
++ spin_lock_irq(shost->host_lock);
++ vport->fc_flag |= FC_UNLOADING;
++ spin_unlock_irq(shost->host_lock);
+ out_free_irq:
+- lpfc_stop_timer(phba);
+- phba->work_hba_events = 0;
++ lpfc_stop_phba_timers(phba);
++ phba->pport->work_port_events = 0;
+ free_irq(phba->pcidev->irq, phba);
++out_disable_msi:
+ pci_disable_msi(phba->pcidev);
++ destroy_port(vport);
+ out_kthread_stop:
+ kthread_stop(phba->worker_thread);
+ out_free_iocbq:
+ list_for_each_entry_safe(iocbq_entry, iocbq_next,
+ &phba->lpfc_iocb_list, list) {
+- spin_lock_irq(phba->host->host_lock);
+ kfree(iocbq_entry);
+ phba->total_iocbq_bufs--;
+- spin_unlock_irq(phba->host->host_lock);
+ }
+ lpfc_mem_free(phba);
++out_free_hbqslimp:
++ dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt,
++ phba->hbqslimp.phys);
+ out_free_slim:
+ dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, phba->slim2p,
+ phba->slim2p_mapping);
+@@ -1744,27 +1974,85 @@
+ iounmap(phba->slim_memmap_p);
+ out_idr_remove:
+ idr_remove(&lpfc_hba_index, phba->brd_no);
+-out_put_host:
+- phba->host = NULL;
+- scsi_host_put(host);
++out_free_phba:
++ kfree(phba);
+ out_release_regions:
+ pci_release_regions(pdev);
+ out_disable_device:
+ pci_disable_device(pdev);
+ out:
+ pci_set_drvdata(pdev, NULL);
++ if (shost)
++ scsi_host_put(shost);
+ return error;
+ }
+
+ static void __devexit
+ lpfc_pci_remove_one(struct pci_dev *pdev)
+ {
+- struct Scsi_Host *host = pci_get_drvdata(pdev);
+- struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata;
++ struct Scsi_Host *shost = pci_get_drvdata(pdev);
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_vport *port_iterator;
++ list_for_each_entry(port_iterator, &phba->port_list, listentry)
++ port_iterator->load_flag |= FC_UNLOADING;
++
++ kfree(vport->vname);
++ lpfc_free_sysfs_attr(vport);
+
+- lpfc_remove_device(phba);
++ fc_remove_host(shost);
++ scsi_remove_host(shost);
++
++ /*
++ * Bring down the SLI Layer. This step disable all interrupts,
++ * clears the rings, discards all mailbox commands, and resets
++ * the HBA.
++ */
++ lpfc_sli_hba_down(phba);
++ lpfc_sli_brdrestart(phba);
++
++ lpfc_stop_phba_timers(phba);
++ spin_lock_irq(&phba->hbalock);
++ list_del_init(&vport->listentry);
++ spin_unlock_irq(&phba->hbalock);
++
++
++ lpfc_debugfs_terminate(vport);
++ lpfc_cleanup(vport);
++
++ kthread_stop(phba->worker_thread);
++
++ /* Release the irq reservation */
++ free_irq(phba->pcidev->irq, phba);
++ pci_disable_msi(phba->pcidev);
+
+ pci_set_drvdata(pdev, NULL);
++ scsi_host_put(shost);
++
++ /*
++ * Call scsi_free before mem_free since scsi bufs are released to their
++ * corresponding pools here.
++ */
++ lpfc_scsi_free(phba);
++ lpfc_mem_free(phba);
++
++ dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt,
++ phba->hbqslimp.phys);
++
++ /* Free resources associated with SLI2 interface */
++ dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
++ phba->slim2p, phba->slim2p_mapping);
++
++ /* unmap adapter SLIM and Control Registers */
++ iounmap(phba->ctrl_regs_memmap_p);
++ iounmap(phba->slim_memmap_p);
++
++ idr_remove(&lpfc_hba_index, phba->brd_no);
++
++ kfree(phba);
++
++ pci_release_regions(pdev);
++ pci_disable_device(pdev);
+ }
+
+ /**
+@@ -1822,10 +2110,13 @@
+ pci_set_master(pdev);
+
+ /* Re-establishing Link */
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag |= FC_ESTABLISH_LINK;
++ spin_lock_irq(host->host_lock);
++ phba->pport->fc_flag |= FC_ESTABLISH_LINK;
++ spin_unlock_irq(host->host_lock);
++
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+
+ /* Take device offline; this will perform cleanup */
+@@ -1948,11 +2239,15 @@
+
+ lpfc_transport_template =
+ fc_attach_transport(&lpfc_transport_functions);
+- if (!lpfc_transport_template)
++ lpfc_vport_transport_template =
++ fc_attach_transport(&lpfc_vport_transport_functions);
++ if (!lpfc_transport_template || !lpfc_vport_transport_template)
+ return -ENOMEM;
+ error = pci_register_driver(&lpfc_driver);
+- if (error)
++ if (error) {
+ fc_release_transport(lpfc_transport_template);
++ fc_release_transport(lpfc_vport_transport_template);
++ }
+
+ return error;
+ }
+@@ -1962,6 +2257,7 @@
+ {
+ pci_unregister_driver(&lpfc_driver);
+ fc_release_transport(lpfc_transport_template);
++ fc_release_transport(lpfc_vport_transport_template);
+ }
+
+ module_init(lpfc_init);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h 2007-12-21 15:36:12.000000000 -0500
+@@ -30,6 +30,7 @@
+ #define LOG_SLI 0x800 /* SLI events */
+ #define LOG_FCP_ERROR 0x1000 /* log errors, not underruns */
+ #define LOG_LIBDFC 0x2000 /* Libdfc events */
++#define LOG_VPORT 0x4000 /* NPIV events */
+ #define LOG_ALL_MSG 0xffff /* LOG all messages */
+
+ #define lpfc_printf_log(phba, level, mask, fmt, arg...) \
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c 2007-12-21 15:36:12.000000000 -0500
+@@ -82,6 +82,22 @@
+ }
+
+ /**********************************************/
++/* lpfc_heart_beat Issue a HEART_BEAT */
++/* mailbox command */
++/**********************************************/
++void
++lpfc_heart_beat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++ MAILBOX_t *mb;
++
++ mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++ mb->mbxCommand = MBX_HEARTBEAT;
++ mb->mbxOwner = OWN_HOST;
++ return;
++}
++
++/**********************************************/
+ /* lpfc_read_la Issue a READ LA */
+ /* mailbox command */
+ /**********************************************/
+@@ -134,6 +150,7 @@
+ void
+ lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ MAILBOX_t *mb = &pmb->mb;
+ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+
+@@ -147,7 +164,7 @@
+ mb->un.varCfgLnk.cr_count = phba->cfg_cr_count;
+ }
+
+- mb->un.varCfgLnk.myId = phba->fc_myDID;
++ mb->un.varCfgLnk.myId = vport->fc_myDID;
+ mb->un.varCfgLnk.edtov = phba->fc_edtov;
+ mb->un.varCfgLnk.arbtov = phba->fc_arbtov;
+ mb->un.varCfgLnk.ratov = phba->fc_ratov;
+@@ -239,7 +256,7 @@
+ /* mailbox command */
+ /**********************************************/
+ int
+-lpfc_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi)
+ {
+ struct lpfc_dmabuf *mp;
+ MAILBOX_t *mb;
+@@ -270,6 +287,7 @@
+ mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
+ mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys);
+ mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys);
++ mb->un.varRdSparm.vpi = vpi;
+
+ /* save address for completion */
+ pmb->context1 = mp;
+@@ -282,7 +300,8 @@
+ /* mailbox command */
+ /********************************************/
+ void
+-lpfc_unreg_did(struct lpfc_hba * phba, uint32_t did, LPFC_MBOXQ_t * pmb)
++lpfc_unreg_did(struct lpfc_hba * phba, uint16_t vpi, uint32_t did,
++ LPFC_MBOXQ_t * pmb)
+ {
+ MAILBOX_t *mb;
+
+@@ -290,6 +309,7 @@
+ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+
+ mb->un.varUnregDID.did = did;
++ mb->un.varUnregDID.vpi = vpi;
+
+ mb->mbxCommand = MBX_UNREG_D_ID;
+ mb->mbxOwner = OWN_HOST;
+@@ -335,19 +355,17 @@
+ /* mailbox command */
+ /********************************************/
+ int
+-lpfc_reg_login(struct lpfc_hba * phba,
+- uint32_t did, uint8_t * param, LPFC_MBOXQ_t * pmb, uint32_t flag)
++lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
++ uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag)
+ {
++ MAILBOX_t *mb = &pmb->mb;
+ uint8_t *sparam;
+ struct lpfc_dmabuf *mp;
+- MAILBOX_t *mb;
+- struct lpfc_sli *psli;
+
+- psli = &phba->sli;
+- mb = &pmb->mb;
+ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+
+ mb->un.varRegLogin.rpi = 0;
++ mb->un.varRegLogin.vpi = vpi;
+ mb->un.varRegLogin.did = did;
+ mb->un.varWords[30] = flag; /* Set flag to issue action on cmpl */
+
+@@ -359,12 +377,10 @@
+ kfree(mp);
+ mb->mbxCommand = MBX_REG_LOGIN64;
+ /* REG_LOGIN: no buffers */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_MBOX,
+- "%d:0302 REG_LOGIN: no buffers Data x%x x%x\n",
+- phba->brd_no,
+- (uint32_t) did, (uint32_t) flag);
++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
++ "%d (%d):0302 REG_LOGIN: no buffers, DID x%x, "
++ "flag x%x\n",
++ phba->brd_no, vpi, did, flag);
+ return (1);
+ }
+ INIT_LIST_HEAD(&mp->list);
+@@ -389,7 +405,8 @@
+ /* mailbox command */
+ /**********************************************/
+ void
+-lpfc_unreg_login(struct lpfc_hba * phba, uint32_t rpi, LPFC_MBOXQ_t * pmb)
++lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi,
++ LPFC_MBOXQ_t * pmb)
+ {
+ MAILBOX_t *mb;
+
+@@ -398,12 +415,52 @@
+
+ mb->un.varUnregLogin.rpi = (uint16_t) rpi;
+ mb->un.varUnregLogin.rsvd1 = 0;
++ mb->un.varUnregLogin.vpi = vpi;
+
+ mb->mbxCommand = MBX_UNREG_LOGIN;
+ mb->mbxOwner = OWN_HOST;
+ return;
+ }
+
++/**************************************************/
++/* lpfc_reg_vpi Issue a REG_VPI */
++/* mailbox command */
++/**************************************************/
++void
++lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid,
++ LPFC_MBOXQ_t *pmb)
++{
++ MAILBOX_t *mb = &pmb->mb;
++
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varRegVpi.vpi = vpi;
++ mb->un.varRegVpi.sid = sid;
++
++ mb->mbxCommand = MBX_REG_VPI;
++ mb->mbxOwner = OWN_HOST;
++ return;
++
++}
++
++/**************************************************/
++/* lpfc_unreg_vpi Issue a UNREG_VNPI */
++/* mailbox command */
++/**************************************************/
++void
++lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb)
++{
++ MAILBOX_t *mb = &pmb->mb;
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++ mb->un.varUnregVpi.vpi = vpi;
++
++ mb->mbxCommand = MBX_UNREG_VPI;
++ mb->mbxOwner = OWN_HOST;
++ return;
++
++}
++
+ static void
+ lpfc_config_pcb_setup(struct lpfc_hba * phba)
+ {
+@@ -412,14 +469,18 @@
+ PCB_t *pcbp = &phba->slim2p->pcb;
+ dma_addr_t pdma_addr;
+ uint32_t offset;
+- uint32_t iocbCnt;
++ uint32_t iocbCnt = 0;
+ int i;
+
+ pcbp->maxRing = (psli->num_rings - 1);
+
+- iocbCnt = 0;
+ for (i = 0; i < psli->num_rings; i++) {
+ pring = &psli->ring[i];
++
++ pring->sizeCiocb = phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE:
++ SLI2_IOCB_CMD_SIZE;
++ pring->sizeRiocb = phba->sli_rev == 3 ? SLI3_IOCB_RSP_SIZE:
++ SLI2_IOCB_RSP_SIZE;
+ /* A ring MUST have both cmd and rsp entries defined to be
+ valid */
+ if ((pring->numCiocb == 0) || (pring->numRiocb == 0)) {
+@@ -434,20 +495,18 @@
+ continue;
+ }
+ /* Command ring setup for ring */
+- pring->cmdringaddr =
+- (void *)&phba->slim2p->IOCBs[iocbCnt];
++ pring->cmdringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt];
+ pcbp->rdsc[i].cmdEntries = pring->numCiocb;
+
+- offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
+- (uint8_t *)phba->slim2p;
++ offset = (uint8_t *) &phba->slim2p->IOCBs[iocbCnt] -
++ (uint8_t *) phba->slim2p;
+ pdma_addr = phba->slim2p_mapping + offset;
+ pcbp->rdsc[i].cmdAddrHigh = putPaddrHigh(pdma_addr);
+ pcbp->rdsc[i].cmdAddrLow = putPaddrLow(pdma_addr);
+ iocbCnt += pring->numCiocb;
+
+ /* Response ring setup for ring */
+- pring->rspringaddr =
+- (void *)&phba->slim2p->IOCBs[iocbCnt];
++ pring->rspringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt];
+
+ pcbp->rdsc[i].rspEntries = pring->numRiocb;
+ offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
+@@ -462,16 +521,108 @@
+ void
+ lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
+ {
+- MAILBOX_t *mb;
+-
+- mb = &pmb->mb;
++ MAILBOX_t *mb = &pmb->mb;
+ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+ mb->un.varRdRev.cv = 1;
++ mb->un.varRdRev.v3req = 1; /* Request SLI3 info */
+ mb->mbxCommand = MBX_READ_REV;
+ mb->mbxOwner = OWN_HOST;
+ return;
+ }
+
++static void
++lpfc_build_hbq_profile2(struct config_hbq_var *hbqmb,
++ struct lpfc_hbq_init *hbq_desc)
++{
++ hbqmb->profiles.profile2.seqlenbcnt = hbq_desc->seqlenbcnt;
++ hbqmb->profiles.profile2.maxlen = hbq_desc->maxlen;
++ hbqmb->profiles.profile2.seqlenoff = hbq_desc->seqlenoff;
++}
++
++static void
++lpfc_build_hbq_profile3(struct config_hbq_var *hbqmb,
++ struct lpfc_hbq_init *hbq_desc)
++{
++ hbqmb->profiles.profile3.seqlenbcnt = hbq_desc->seqlenbcnt;
++ hbqmb->profiles.profile3.maxlen = hbq_desc->maxlen;
++ hbqmb->profiles.profile3.cmdcodeoff = hbq_desc->cmdcodeoff;
++ hbqmb->profiles.profile3.seqlenoff = hbq_desc->seqlenoff;
++ memcpy(&hbqmb->profiles.profile3.cmdmatch, hbq_desc->cmdmatch,
++ sizeof(hbqmb->profiles.profile3.cmdmatch));
++}
++
++static void
++lpfc_build_hbq_profile5(struct config_hbq_var *hbqmb,
++ struct lpfc_hbq_init *hbq_desc)
++{
++ hbqmb->profiles.profile5.seqlenbcnt = hbq_desc->seqlenbcnt;
++ hbqmb->profiles.profile5.maxlen = hbq_desc->maxlen;
++ hbqmb->profiles.profile5.cmdcodeoff = hbq_desc->cmdcodeoff;
++ hbqmb->profiles.profile5.seqlenoff = hbq_desc->seqlenoff;
++ memcpy(&hbqmb->profiles.profile5.cmdmatch, hbq_desc->cmdmatch,
++ sizeof(hbqmb->profiles.profile5.cmdmatch));
++}
++
++void
++lpfc_config_hbq(struct lpfc_hba *phba, struct lpfc_hbq_init *hbq_desc,
++ uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb)
++{
++ int i;
++ MAILBOX_t *mb = &pmb->mb;
++ struct config_hbq_var *hbqmb = &mb->un.varCfgHbq;
++
++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++ hbqmb->entry_count = hbq_desc->entry_count; /* # entries in HBQ */
++ hbqmb->recvNotify = hbq_desc->rn; /* Receive
++ * Notification */
++ hbqmb->numMask = hbq_desc->mask_count; /* # R_CTL/TYPE masks
++ * # in words 0-19 */
++ hbqmb->profile = hbq_desc->profile; /* Selection profile:
++ * 0 = all,
++ * 7 = logentry */
++ hbqmb->ringMask = hbq_desc->ring_mask; /* Binds HBQ to a ring
++ * e.g. Ring0=b0001,
++ * ring2=b0100 */
++ hbqmb->headerLen = hbq_desc->headerLen; /* 0 if not profile 4
++ * or 5 */
++ hbqmb->logEntry = hbq_desc->logEntry; /* Set to 1 if this
++ * HBQ will be used
++ * for LogEntry
++ * buffers */
++ hbqmb->hbqaddrLow = putPaddrLow(phba->hbqslimp.phys) +
++ hbq_entry_index * sizeof(struct lpfc_hbq_entry);
++ hbqmb->hbqaddrHigh = putPaddrHigh(phba->hbqslimp.phys);
++
++ mb->mbxCommand = MBX_CONFIG_HBQ;
++ mb->mbxOwner = OWN_HOST;
++
++ /* Copy info for profiles 2,3,5. Other
++ * profiles this area is reserved
++ */
++ if (hbq_desc->profile == 2)
++ lpfc_build_hbq_profile2(hbqmb, hbq_desc);
++ else if (hbq_desc->profile == 3)
++ lpfc_build_hbq_profile3(hbqmb, hbq_desc);
++ else if (hbq_desc->profile == 5)
++ lpfc_build_hbq_profile5(hbqmb, hbq_desc);
++
++ /* Return if no rctl / type masks for this HBQ */
++ if (!hbq_desc->mask_count)
++ return;
++
++ /* Otherwise we setup specific rctl / type masks for this HBQ */
++ for (i = 0; i < hbq_desc->mask_count; i++) {
++ hbqmb->hbqMasks[i].tmatch = hbq_desc->hbqMasks[i].tmatch;
++ hbqmb->hbqMasks[i].tmask = hbq_desc->hbqMasks[i].tmask;
++ hbqmb->hbqMasks[i].rctlmatch = hbq_desc->hbqMasks[i].rctlmatch;
++ hbqmb->hbqMasks[i].rctlmask = hbq_desc->hbqMasks[i].rctlmask;
++ }
++
++ return;
++}
++
++
++
+ void
+ lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
+ {
+@@ -514,15 +665,16 @@
+ }
+
+ void
+-lpfc_config_port(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
++ MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr;
+ MAILBOX_t *mb = &pmb->mb;
+ dma_addr_t pdma_addr;
+ uint32_t bar_low, bar_high;
+ size_t offset;
+ struct lpfc_hgp hgp;
+- void __iomem *to_slim;
+ int i;
++ uint32_t pgp_offset;
+
+ memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
+ mb->mbxCommand = MBX_CONFIG_PORT;
+@@ -535,12 +687,29 @@
+ mb->un.varCfgPort.pcbLow = putPaddrLow(pdma_addr);
+ mb->un.varCfgPort.pcbHigh = putPaddrHigh(pdma_addr);
+
++ /* If HBA supports SLI=3 ask for it */
++
++ if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) {
++ mb->un.varCfgPort.cerbm = 1; /* Request HBQs */
++ mb->un.varCfgPort.max_hbq = 1; /* Requesting 2 HBQs */
++ if (phba->max_vpi && phba->cfg_npiv_enable &&
++ phba->vpd.sli3Feat.cmv) {
++ mb->un.varCfgPort.max_vpi = phba->max_vpi;
++ mb->un.varCfgPort.cmv = 1;
++ phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
++ } else
++ mb->un.varCfgPort.max_vpi = phba->max_vpi = 0;
++ } else
++ phba->sli_rev = 2;
++ mb->un.varCfgPort.sli_mode = phba->sli_rev;
++
+ /* Now setup pcb */
+ phba->slim2p->pcb.type = TYPE_NATIVE_SLI2;
+ phba->slim2p->pcb.feature = FEATURE_INITIAL_SLI2;
+
+ /* Setup Mailbox pointers */
+- phba->slim2p->pcb.mailBoxSize = sizeof(MAILBOX_t);
++ phba->slim2p->pcb.mailBoxSize = offsetof(MAILBOX_t, us) +
++ sizeof(struct sli2_desc);
+ offset = (uint8_t *)&phba->slim2p->mbx - (uint8_t *)phba->slim2p;
+ pdma_addr = phba->slim2p_mapping + offset;
+ phba->slim2p->pcb.mbAddrHigh = putPaddrHigh(pdma_addr);
+@@ -568,29 +737,70 @@
+ pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_0, &bar_low);
+ pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_1, &bar_high);
+
++ /*
++ * Set up HGP - Port Memory
++ *
++ * The port expects the host get/put pointers to reside in memory
++ * following the "non-diagnostic" mode mailbox (32 words, 0x80 bytes)
++ * area of SLIM. In SLI-2 mode, there's an additional 16 reserved
++ * words (0x40 bytes). This area is not reserved if HBQs are
++ * configured in SLI-3.
++ *
++ * CR0Put - SLI2(no HBQs) = 0xc0, With HBQs = 0x80
++ * RR0Get 0xc4 0x84
++ * CR1Put 0xc8 0x88
++ * RR1Get 0xcc 0x8c
++ * CR2Put 0xd0 0x90
++ * RR2Get 0xd4 0x94
++ * CR3Put 0xd8 0x98
++ * RR3Get 0xdc 0x9c
++ *
++ * Reserved 0xa0-0xbf
++ * If HBQs configured:
++ * HBQ 0 Put ptr 0xc0
++ * HBQ 1 Put ptr 0xc4
++ * HBQ 2 Put ptr 0xc8
++ * ......
++ * HBQ(M-1)Put Pointer 0xc0+(M-1)*4
++ *
++ */
++
++ if (phba->sli_rev == 3) {
++ phba->host_gp = &mb_slim->us.s3.host[0];
++ phba->hbq_put = &mb_slim->us.s3.hbq_put[0];
++ } else {
++ phba->host_gp = &mb_slim->us.s2.host[0];
++ phba->hbq_put = NULL;
++ }
+
+ /* mask off BAR0's flag bits 0 - 3 */
+ phba->slim2p->pcb.hgpAddrLow = (bar_low & PCI_BASE_ADDRESS_MEM_MASK) +
+- (SLIMOFF*sizeof(uint32_t));
++ (void __iomem *) phba->host_gp -
++ (void __iomem *)phba->MBslimaddr;
+ if (bar_low & PCI_BASE_ADDRESS_MEM_TYPE_64)
+ phba->slim2p->pcb.hgpAddrHigh = bar_high;
+ else
+ phba->slim2p->pcb.hgpAddrHigh = 0;
+ /* write HGP data to SLIM at the required longword offset */
+ memset(&hgp, 0, sizeof(struct lpfc_hgp));
+- to_slim = phba->MBslimaddr + (SLIMOFF*sizeof (uint32_t));
+
+ for (i=0; i < phba->sli.num_rings; i++) {
+- lpfc_memcpy_to_slim(to_slim, &hgp, sizeof(struct lpfc_hgp));
+- to_slim += sizeof (struct lpfc_hgp);
++ lpfc_memcpy_to_slim(phba->host_gp + i, &hgp,
++ sizeof(*phba->host_gp));
+ }
+
+ /* Setup Port Group ring pointer */
+- offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port -
++ if (phba->sli_rev == 3)
++ pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s3_pgp.port -
+ (uint8_t *)phba->slim2p;
+- pdma_addr = phba->slim2p_mapping + offset;
++ else
++ pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port -
++ (uint8_t *)phba->slim2p;
++
++ pdma_addr = phba->slim2p_mapping + pgp_offset;
+ phba->slim2p->pcb.pgpAddrHigh = putPaddrHigh(pdma_addr);
+ phba->slim2p->pcb.pgpAddrLow = putPaddrLow(pdma_addr);
++ phba->hbq_get = &phba->slim2p->mbx.us.s3_pgp.hbq_get[0];
+
+ /* Use callback routine to setp rings in the pcb */
+ lpfc_config_pcb_setup(phba);
+@@ -606,11 +816,7 @@
+
+ /* Swap PCB if needed */
+ lpfc_sli_pcimem_bcopy(&phba->slim2p->pcb, &phba->slim2p->pcb,
+- sizeof (PCB_t));
+-
+- lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+- "%d:0405 Service Level Interface (SLI) 2 selected\n",
+- phba->brd_no);
++ sizeof(PCB_t));
+ }
+
+ void
+@@ -644,15 +850,23 @@
+ LPFC_MBOXQ_t *mbq = NULL;
+ struct lpfc_sli *psli = &phba->sli;
+
+- list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t,
+- list);
+- if (mbq) {
++ list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, list);
++ if (mbq)
+ psli->mboxq_cnt--;
+- }
+
+ return mbq;
+ }
+
++void
++lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq)
++{
++ /* This function expects to be called from interupt context */
++ spin_lock(&phba->hbalock);
++ list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
++ spin_unlock(&phba->hbalock);
++ return;
++}
++
+ int
+ lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd)
+ {
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,7 +1,7 @@
+ /*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for *
+ * Fibre Channel Host Bus Adapters. *
+- * Copyright (C) 2004-2005 Emulex. All rights reserved. *
++ * Copyright (C) 2004-2006 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig *
+@@ -38,10 +38,13 @@
+ #define LPFC_MBUF_POOL_SIZE 64 /* max elements in MBUF safety pool */
+ #define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */
+
++
++
+ int
+ lpfc_mem_alloc(struct lpfc_hba * phba)
+ {
+ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++ int longs;
+ int i;
+
+ phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool",
+@@ -80,10 +83,27 @@
+ if (!phba->nlp_mem_pool)
+ goto fail_free_mbox_pool;
+
++ phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev,
++ LPFC_BPL_SIZE, 8, 0);
++ if (!phba->lpfc_hbq_pool)
++ goto fail_free_nlp_mem_pool;
++
++ /* vpi zero is reserved for the physical port so add 1 to max */
++ longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG;
++ phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL);
++ if (!phba->vpi_bmask)
++ goto fail_free_hbq_pool;
++
+ return 0;
+
++ fail_free_hbq_pool:
++ lpfc_sli_hbqbuf_free_all(phba);
++ fail_free_nlp_mem_pool:
++ mempool_destroy(phba->nlp_mem_pool);
++ phba->nlp_mem_pool = NULL;
+ fail_free_mbox_pool:
+ mempool_destroy(phba->mbox_mem_pool);
++ phba->mbox_mem_pool = NULL;
+ fail_free_mbuf_pool:
+ while (i--)
+ pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+@@ -91,8 +111,10 @@
+ kfree(pool->elements);
+ fail_free_lpfc_mbuf_pool:
+ pci_pool_destroy(phba->lpfc_mbuf_pool);
++ phba->lpfc_mbuf_pool = NULL;
+ fail_free_dma_buf_pool:
+ pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
++ phba->lpfc_scsi_dma_buf_pool = NULL;
+ fail:
+ return -ENOMEM;
+ }
+@@ -106,6 +128,9 @@
+ struct lpfc_dmabuf *mp;
+ int i;
+
++ kfree(phba->vpi_bmask);
++ lpfc_sli_hbqbuf_free_all(phba);
++
+ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
+ mp = (struct lpfc_dmabuf *) (mbox->context1);
+ if (mp) {
+@@ -115,6 +140,15 @@
+ list_del(&mbox->list);
+ mempool_free(mbox, phba->mbox_mem_pool);
+ }
++ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
++ mp = (struct lpfc_dmabuf *) (mbox->context1);
++ if (mp) {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ list_del(&mbox->list);
++ mempool_free(mbox, phba->mbox_mem_pool);
++ }
+
+ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+ if (psli->mbox_active) {
+@@ -132,12 +166,20 @@
+ pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+ pool->elements[i].phys);
+ kfree(pool->elements);
++
++ pci_pool_destroy(phba->lpfc_hbq_pool);
+ mempool_destroy(phba->nlp_mem_pool);
+ mempool_destroy(phba->mbox_mem_pool);
+
+ pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
+ pci_pool_destroy(phba->lpfc_mbuf_pool);
+
++ phba->lpfc_hbq_pool = NULL;
++ phba->nlp_mem_pool = NULL;
++ phba->mbox_mem_pool = NULL;
++ phba->lpfc_scsi_dma_buf_pool = NULL;
++ phba->lpfc_mbuf_pool = NULL;
++
+ /* Free the iocb lookup array */
+ kfree(psli->iocbq_lookup);
+ psli->iocbq_lookup = NULL;
+@@ -148,20 +190,23 @@
+ lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
+ {
+ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++ unsigned long iflags;
+ void *ret;
+
+ ret = pci_pool_alloc(phba->lpfc_mbuf_pool, GFP_KERNEL, handle);
+
+- if (!ret && ( mem_flags & MEM_PRI) && pool->current_count) {
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ if (!ret && (mem_flags & MEM_PRI) && pool->current_count) {
+ pool->current_count--;
+ ret = pool->elements[pool->current_count].virt;
+ *handle = pool->elements[pool->current_count].phys;
+ }
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ return ret;
+ }
+
+ void
+-lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
++__lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
+ {
+ struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
+
+@@ -174,3 +219,51 @@
+ }
+ return;
+ }
++
++void
++lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
++{
++ unsigned long iflags;
++
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ __lpfc_mbuf_free(phba, virt, dma);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ return;
++}
++
++void *
++lpfc_hbq_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
++{
++ void *ret;
++ ret = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_ATOMIC, handle);
++ return ret;
++}
++
++void
++lpfc_hbq_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma)
++{
++ pci_pool_free(phba->lpfc_hbq_pool, virt, dma);
++ return;
++}
++
++void
++lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
++{
++ struct hbq_dmabuf *hbq_entry;
++
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++ hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
++ if (hbq_entry->tag == -1) {
++ lpfc_hbq_free(phba, hbq_entry->dbuf.virt,
++ hbq_entry->dbuf.phys);
++ kfree(hbq_entry);
++ } else {
++ lpfc_sli_free_hbq(phba, hbq_entry);
++ }
++ } else {
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ }
++ return;
++}
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,4 +1,4 @@
+-/*******************************************************************
++ /*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for *
+ * Fibre Channel Host Bus Adapters. *
+ * Copyright (C) 2004-2007 Emulex. All rights reserved. *
+@@ -35,20 +35,22 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+
+
+ /* Called to verify a rcv'ed ADISC was intended for us. */
+ static int
+-lpfc_check_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
+- struct lpfc_name * nn, struct lpfc_name * pn)
++lpfc_check_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ struct lpfc_name *nn, struct lpfc_name *pn)
+ {
+ /* Compare the ADISC rsp WWNN / WWPN matches our internal node
+ * table entry for that node.
+ */
+- if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)) != 0)
++ if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)))
+ return 0;
+
+- if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)) != 0)
++ if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)))
+ return 0;
+
+ /* we match, return success */
+@@ -56,11 +58,10 @@
+ }
+
+ int
+-lpfc_check_sparm(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, struct serv_parm * sp,
+- uint32_t class)
++lpfc_check_sparm(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ struct serv_parm * sp, uint32_t class)
+ {
+- volatile struct serv_parm *hsp = &phba->fc_sparam;
++ volatile struct serv_parm *hsp = &vport->fc_sparam;
+ uint16_t hsp_value, ssp_value = 0;
+
+ /*
+@@ -75,12 +76,14 @@
+ hsp->cls1.rcvDataSizeLsb;
+ ssp_value = (sp->cls1.rcvDataSizeMsb << 8) |
+ sp->cls1.rcvDataSizeLsb;
++ if (!ssp_value)
++ goto bad_service_param;
+ if (ssp_value > hsp_value) {
+ sp->cls1.rcvDataSizeLsb = hsp->cls1.rcvDataSizeLsb;
+ sp->cls1.rcvDataSizeMsb = hsp->cls1.rcvDataSizeMsb;
+ }
+ } else if (class == CLASS1) {
+- return 0;
++ goto bad_service_param;
+ }
+
+ if (sp->cls2.classValid) {
+@@ -88,12 +91,14 @@
+ hsp->cls2.rcvDataSizeLsb;
+ ssp_value = (sp->cls2.rcvDataSizeMsb << 8) |
+ sp->cls2.rcvDataSizeLsb;
++ if (!ssp_value)
++ goto bad_service_param;
+ if (ssp_value > hsp_value) {
+ sp->cls2.rcvDataSizeLsb = hsp->cls2.rcvDataSizeLsb;
+ sp->cls2.rcvDataSizeMsb = hsp->cls2.rcvDataSizeMsb;
+ }
+ } else if (class == CLASS2) {
+- return 0;
++ goto bad_service_param;
+ }
+
+ if (sp->cls3.classValid) {
+@@ -101,12 +106,14 @@
+ hsp->cls3.rcvDataSizeLsb;
+ ssp_value = (sp->cls3.rcvDataSizeMsb << 8) |
+ sp->cls3.rcvDataSizeLsb;
++ if (!ssp_value)
++ goto bad_service_param;
+ if (ssp_value > hsp_value) {
+ sp->cls3.rcvDataSizeLsb = hsp->cls3.rcvDataSizeLsb;
+ sp->cls3.rcvDataSizeMsb = hsp->cls3.rcvDataSizeMsb;
+ }
+ } else if (class == CLASS3) {
+- return 0;
++ goto bad_service_param;
+ }
+
+ /*
+@@ -125,11 +132,21 @@
+ memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name));
+ memcpy(&ndlp->nlp_portname, &sp->portName, sizeof (struct lpfc_name));
+ return 1;
++bad_service_param:
++ lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0207 Device %x "
++ "(%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x) sent "
++ "invalid service parameters. Ignoring device.\n",
++ vport->phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID,
++ sp->nodeName.u.wwn[0], sp->nodeName.u.wwn[1],
++ sp->nodeName.u.wwn[2], sp->nodeName.u.wwn[3],
++ sp->nodeName.u.wwn[4], sp->nodeName.u.wwn[5],
++ sp->nodeName.u.wwn[6], sp->nodeName.u.wwn[7]);
++ return 0;
+ }
+
+ static void *
+-lpfc_check_elscmpl_iocb(struct lpfc_hba * phba,
+- struct lpfc_iocbq *cmdiocb,
++lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ struct lpfc_iocbq *rspiocb)
+ {
+ struct lpfc_dmabuf *pcmd, *prsp;
+@@ -168,32 +185,29 @@
+ * routine effectively results in a "software abort".
+ */
+ int
+-lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ LIST_HEAD(completions);
+- struct lpfc_sli *psli;
+- struct lpfc_sli_ring *pring;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ struct lpfc_iocbq *iocb, *next_iocb;
+ IOCB_t *cmd;
+
+ /* Abort outstanding I/O on NPort <nlp_DID> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+- "%d:0205 Abort outstanding I/O on NPort x%x "
++ "%d (%d):0205 Abort outstanding I/O on NPort x%x "
+ "Data: x%x x%x x%x\n",
+- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
+- ndlp->nlp_state, ndlp->nlp_rpi);
++ phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID,
++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+
+- psli = &phba->sli;
+- pring = &psli->ring[LPFC_ELS_RING];
++ lpfc_fabric_abort_nport(ndlp);
+
+ /* First check the txq */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+- /* Check to see if iocb matches the nport we are looking
+- for */
++ /* Check to see if iocb matches the nport we are looking for */
+ if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) {
+- /* It matches, so deque and call compl with an
+- error */
++ /* It matches, so deque and call compl with anp error */
+ list_move_tail(&iocb->list, &completions);
+ pring->txq_cnt--;
+ }
+@@ -201,37 +215,39 @@
+
+ /* Next check the txcmplq */
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
+- /* Check to see if iocb matches the nport we are looking
+- for */
+- if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp))
++ /* Check to see if iocb matches the nport we are looking for */
++ if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) {
+ lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ }
++ spin_unlock_irq(&phba->hbalock);
+
+ while (!list_empty(&completions)) {
+ iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ cmd = &iocb->iocb;
+- list_del(&iocb->list);
++ list_del_init(&iocb->list);
+
+- if (iocb->iocb_cmpl) {
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
+ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ (iocb->iocb_cmpl) (phba, iocb, iocb);
+- } else
+- lpfc_sli_release_iocbq(phba, iocb);
++ }
+ }
+
+ /* If we are delaying issuing an ELS command, cancel it */
+ if (ndlp->nlp_flag & NLP_DELAY_TMO)
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(phba->pport, ndlp);
+ return 0;
+ }
+
+ static int
+-lpfc_rcv_plogi(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp,
++lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct lpfc_iocbq *cmdiocb)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_dmabuf *pcmd;
+ uint32_t *lp;
+ IOCB_t *icmd;
+@@ -241,14 +257,14 @@
+ int rc;
+
+ memset(&stat, 0, sizeof (struct ls_rjt));
+- if (phba->hba_state <= LPFC_FLOGI) {
++ if (vport->port_state <= LPFC_FLOGI) {
+ /* Before responding to PLOGI, check for pt2pt mode.
+ * If we are pt2pt, with an outstanding FLOGI, abort
+ * the FLOGI and resend it first.
+ */
+- if (phba->fc_flag & FC_PT2PT) {
++ if (vport->fc_flag & FC_PT2PT) {
+ lpfc_els_abort_flogi(phba);
+- if (!(phba->fc_flag & FC_PT2PT_PLOGI)) {
++ if (!(vport->fc_flag & FC_PT2PT_PLOGI)) {
+ /* If the other side is supposed to initiate
+ * the PLOGI anyway, just ACC it now and
+ * move on with discovery.
+@@ -257,45 +273,42 @@
+ phba->fc_ratov = FF_DEF_RATOV;
+ /* Start discovery - this should just do
+ CLEAR_LA */
+- lpfc_disc_start(phba);
+- } else {
+- lpfc_initial_flogi(phba);
+- }
++ lpfc_disc_start(vport);
++ } else
++ lpfc_initial_flogi(vport);
+ } else {
+ stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb,
+- ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
++ ndlp, NULL);
+ return 0;
+ }
+ }
+ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ lp = (uint32_t *) pcmd->virt;
+ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+- if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3) == 0)) {
++ if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3) == 0)) {
+ /* Reject this request because invalid parameters */
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ return 0;
+ }
+ icmd = &cmdiocb->iocb;
+
+ /* PLOGI chkparm OK */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
+ ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag,
+ ndlp->nlp_rpi);
+
+- if ((phba->cfg_fcp_class == 2) &&
+- (sp->cls2.classValid)) {
++ if (phba->cfg_fcp_class == 2 && sp->cls2.classValid)
+ ndlp->nlp_fcp_info |= CLASS2;
+- } else {
++ else
+ ndlp->nlp_fcp_info |= CLASS3;
+- }
++
+ ndlp->nlp_class_sup = 0;
+ if (sp->cls1.classValid)
+ ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -317,35 +330,37 @@
+ case NLP_STE_PRLI_ISSUE:
+ case NLP_STE_UNMAPPED_NODE:
+ case NLP_STE_MAPPED_NODE:
+- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0);
+ return 1;
+ }
+
+- if ((phba->fc_flag & FC_PT2PT)
+- && !(phba->fc_flag & FC_PT2PT_PLOGI)) {
++ if ((vport->fc_flag & FC_PT2PT) &&
++ !(vport->fc_flag & FC_PT2PT_PLOGI)) {
+ /* rcv'ed PLOGI decides what our NPortId will be */
+- phba->fc_myDID = icmd->un.rcvels.parmRo;
++ vport->fc_myDID = icmd->un.rcvels.parmRo;
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (mbox == NULL)
+ goto out;
+ lpfc_config_link(phba, mbox);
+ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ mbox->vport = vport;
+ rc = lpfc_sli_issue_mbox
+ (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
+ if (rc == MBX_NOT_FINISHED) {
+- mempool_free( mbox, phba->mbox_mem_pool);
++ mempool_free(mbox, phba->mbox_mem_pool);
+ goto out;
+ }
+
+- lpfc_can_disctmo(phba);
++ lpfc_can_disctmo(vport);
+ }
+ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (mbox == NULL)
++ if (!mbox)
+ goto out;
+
+- if (lpfc_reg_login(phba, icmd->un.rcvels.remoteID,
+- (uint8_t *) sp, mbox, 0)) {
+- mempool_free( mbox, phba->mbox_mem_pool);
++ rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID,
++ (uint8_t *) sp, mbox, 0);
++ if (rc) {
++ mempool_free(mbox, phba->mbox_mem_pool);
+ goto out;
+ }
+
+@@ -357,7 +372,10 @@
+ * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox
+ * command issued in lpfc_cmpl_els_acc().
+ */
++ mbox->vport = vport;
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
++ spin_unlock_irq(shost->host_lock);
+
+ /*
+ * If there is an outstanding PLOGI issued, abort it before
+@@ -373,21 +391,38 @@
+ lpfc_els_abort(phba, ndlp);
+ }
+
+- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
++ if ((vport->port_type == LPFC_NPIV_PORT &&
++ phba->cfg_vport_restrict_login)) {
++
++ /* In order to preserve RPIs, we want to cleanup
++ * the default RPI the firmware created to rcv
++ * this ELS request. The only way to do this is
++ * to register, then unregister the RPI.
++ */
++ spin_lock_irq(shost->host_lock);
++ ndlp->nlp_flag |= NLP_RM_DFLT_RPI;
++ spin_unlock_irq(shost->host_lock);
++ stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
++ ndlp, mbox);
++ return 1;
++ }
++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
+ return 1;
+
+ out:
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ return 0;
+ }
+
+ static int
+-lpfc_rcv_padisc(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp,
++lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct lpfc_iocbq *cmdiocb)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_dmabuf *pcmd;
+ struct serv_parm *sp;
+ struct lpfc_name *pnn, *ppn;
+@@ -412,12 +447,11 @@
+ }
+
+ icmd = &cmdiocb->iocb;
+- if ((icmd->ulpStatus == 0) &&
+- (lpfc_check_adisc(phba, ndlp, pnn, ppn))) {
++ if (icmd->ulpStatus == 0 && lpfc_check_adisc(vport, ndlp, pnn, ppn)) {
+ if (cmd == ELS_CMD_ADISC) {
+- lpfc_els_rsp_adisc_acc(phba, cmdiocb, ndlp);
++ lpfc_els_rsp_adisc_acc(vport, cmdiocb, ndlp);
+ } else {
+- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp,
++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp,
+ NULL, 0);
+ }
+ return 1;
+@@ -427,55 +461,57 @@
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+ stat.un.b.vendorUnique = 0;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+
+ /* 1 sec timeout */
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ return 0;
+ }
+
+ static int
+-lpfc_rcv_logo(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp,
+- struct lpfc_iocbq *cmdiocb,
+- uint32_t els_cmd)
++lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ struct lpfc_iocbq *cmdiocb, uint32_t els_cmd)
+ {
+- /* Put ndlp on NPR list with 1 sec timeout for plogi, ACC logo */
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ /* Put ndlp in NPR state with 1 sec timeout for plogi, ACC logo */
+ /* Only call LOGO ACC for first LOGO, this avoids sending unnecessary
+ * PLOGIs during LOGO storms from a device.
+ */
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_LOGO_ACC;
++ spin_unlock_irq(shost->host_lock);
+ if (els_cmd == ELS_CMD_PRLO)
+- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ else
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+
+ if (!(ndlp->nlp_type & NLP_FABRIC) ||
+ (ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
+ /* Only try to re-login if this is NOT a Fabric Node */
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ } else {
+ ndlp->nlp_prev_state = ndlp->nlp_state;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ }
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ /* The driver has to wait until the ACC completes before it continues
+ * processing the LOGO. The action will resume in
+ * lpfc_cmpl_els_logo_acc routine. Since part of processing includes an
+@@ -485,8 +521,7 @@
+ }
+
+ static void
+-lpfc_rcv_prli(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp,
++lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct lpfc_iocbq *cmdiocb)
+ {
+ struct lpfc_dmabuf *pcmd;
+@@ -501,8 +536,7 @@
+
+ ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
+ ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+- if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
+- (npr->prliType == PRLI_FCP_TYPE)) {
++ if (npr->prliType == PRLI_FCP_TYPE) {
+ if (npr->initiatorFunc)
+ ndlp->nlp_type |= NLP_FCP_INITIATOR;
+ if (npr->targetFunc)
+@@ -517,36 +551,42 @@
+ roles |= FC_RPORT_ROLE_FCP_INITIATOR;
+ if (ndlp->nlp_type & NLP_FCP_TARGET)
+ roles |= FC_RPORT_ROLE_FCP_TARGET;
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++ "rport rolechg: role:x%x did:x%x flg:x%x",
++ roles, ndlp->nlp_DID, ndlp->nlp_flag);
++
+ fc_remote_port_rolechg(rport, roles);
+ }
+ }
+
+ static uint32_t
+-lpfc_disc_set_adisc(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp)
++lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++
+ /* Check config parameter use-adisc or FCP-2 */
+- if ((phba->cfg_use_adisc == 0) &&
+- !(phba->fc_flag & FC_RSCN_MODE)) {
+- if (!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE))
+- return 0;
+- }
+- spin_lock_irq(phba->host->host_lock);
++ if ((phba->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) ||
++ ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ return 1;
++ }
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++ lpfc_unreg_rpi(vport, ndlp);
++ return 0;
+ }
+
+ static uint32_t
+-lpfc_disc_illegal(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0253 Illegal State Transition: node x%x event x%x, "
+- "state x%x Data: x%x x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0253 Illegal State Transition: node x%x "
++ "event x%x, state x%x Data: x%x x%x\n",
++ vport->phba->brd_no, vport->vpi,
+ ndlp->nlp_DID, evt, ndlp->nlp_state, ndlp->nlp_rpi,
+ ndlp->nlp_flag);
+ return ndlp->nlp_state;
+@@ -555,150 +595,161 @@
+ /* Start of Discovery State Machine routines */
+
+ static uint32_t
+-lpfc_rcv_plogi_unused_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+
+- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) {
+ ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ return ndlp->nlp_state;
+ }
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+
+ static uint32_t
+-lpfc_rcv_els_unused_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_els_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- lpfc_issue_els_logo(phba, ndlp, 0);
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ lpfc_issue_els_logo(vport, ndlp, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_unused_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_LOGO_ACC;
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_logo_unused_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+
+ static uint32_t
+-lpfc_device_rm_unused_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_plogi_issue(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ void *arg, uint32_t evt)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *cmdiocb = arg;
+- struct lpfc_dmabuf *pcmd;
+- struct serv_parm *sp;
+- uint32_t *lp;
++ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++ uint32_t *lp = (uint32_t *) pcmd->virt;
++ struct serv_parm *sp = (struct serv_parm *) (lp + 1);
+ struct ls_rjt stat;
+ int port_cmp;
+
+- pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+- lp = (uint32_t *) pcmd->virt;
+- sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+-
+ memset(&stat, 0, sizeof (struct ls_rjt));
+
+ /* For a PLOGI, we only accept if our portname is less
+ * than the remote portname.
+ */
+ phba->fc_stat.elsLogiCol++;
+- port_cmp = memcmp(&phba->fc_portname, &sp->portName,
+- sizeof (struct lpfc_name));
++ port_cmp = memcmp(&vport->fc_portname, &sp->portName,
++ sizeof(struct lpfc_name));
+
+ if (port_cmp >= 0) {
+ /* Reject this request because the remote node will accept
+ ours */
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++ NULL);
+ } else {
+- lpfc_rcv_plogi(phba, ndlp, cmdiocb);
+- } /* if our portname was less */
++ lpfc_rcv_plogi(vport, ndlp, cmdiocb);
++ } /* If our portname was less */
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_plogi_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
++ struct ls_rjt stat;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ memset(&stat, 0, sizeof (struct ls_rjt));
++ stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY;
++ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
++ return ndlp->nlp_state;
++}
++
++static uint32_t
++lpfc_rcv_logo_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
++{
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* software abort outstanding PLOGI */
+- lpfc_els_abort(phba, ndlp);
++ lpfc_els_abort(vport->phba, ndlp);
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_els_plogi_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_els_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* software abort outstanding PLOGI */
+ lpfc_els_abort(phba, ndlp);
+
+ if (evt == NLP_EVT_RCV_LOGO) {
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+ } else {
+- lpfc_issue_els_logo(phba, ndlp, 0);
++ lpfc_issue_els_logo(vport, ndlp, 0);
+ }
+
+- /* Put ndlp in npr list set plogi timer for 1 sec */
++ /* Put ndlp in npr state set plogi timer for 1 sec */
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_plogi_plogi_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
+ struct lpfc_dmabuf *pcmd, *prsp, *mp;
+ uint32_t *lp;
+@@ -721,31 +772,26 @@
+
+ pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+
+- prsp = list_get_first(&pcmd->list,
+- struct lpfc_dmabuf,
+- list);
+- lp = (uint32_t *) prsp->virt;
++ prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list);
+
++ lp = (uint32_t *) prsp->virt;
+ sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+- if (!lpfc_check_sparm(phba, ndlp, sp, CLASS3))
++ if (!lpfc_check_sparm(vport, ndlp, sp, CLASS3))
+ goto out;
+
+ /* PLOGI chkparm OK */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_ELS,
+- "%d:0121 PLOGI chkparm OK "
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (%d):0121 PLOGI chkparm OK "
+ "Data: x%x x%x x%x x%x\n",
+- phba->brd_no,
++ phba->brd_no, vport->vpi,
+ ndlp->nlp_DID, ndlp->nlp_state,
+ ndlp->nlp_flag, ndlp->nlp_rpi);
+
+- if ((phba->cfg_fcp_class == 2) &&
+- (sp->cls2.classValid)) {
++ if (phba->cfg_fcp_class == 2 && (sp->cls2.classValid))
+ ndlp->nlp_fcp_info |= CLASS2;
+- } else {
++ else
+ ndlp->nlp_fcp_info |= CLASS3;
+- }
++
+ ndlp->nlp_class_sup = 0;
+ if (sp->cls1.classValid)
+ ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -756,16 +802,23 @@
+ if (sp->cls4.classValid)
+ ndlp->nlp_class_sup |= FC_COS_CLASS4;
+ ndlp->nlp_maxframe =
+- ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) |
+- sp->cmn.bbRcvSizeLsb;
++ ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb;
+
+- if (!(mbox = mempool_alloc(phba->mbox_mem_pool,
+- GFP_KERNEL)))
++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!mbox) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0133 PLOGI: no memory for reg_login "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
++ ndlp->nlp_DID, ndlp->nlp_state,
++ ndlp->nlp_flag, ndlp->nlp_rpi);
+ goto out;
++ }
++
++ lpfc_unreg_rpi(vport, ndlp);
+
+- lpfc_unreg_rpi(phba, ndlp);
+- if (lpfc_reg_login(phba, irsp->un.elsreq64.remoteID, (uint8_t *) sp,
+- mbox, 0) == 0) {
++ if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID,
++ (uint8_t *) sp, mbox, 0) == 0) {
+ switch (ndlp->nlp_DID) {
+ case NameServer_DID:
+ mbox->mbox_cmpl = lpfc_mbx_cmpl_ns_reg_login;
+@@ -777,68 +830,104 @@
+ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
+ }
+ mbox->context2 = lpfc_nlp_get(ndlp);
++ mbox->vport = vport;
+ if (lpfc_sli_issue_mbox(phba, mbox,
+ (MBX_NOWAIT | MBX_STOP_IOCB))
+ != MBX_NOT_FINISHED) {
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE);
++ lpfc_nlp_set_state(vport, ndlp,
++ NLP_STE_REG_LOGIN_ISSUE);
+ return ndlp->nlp_state;
+ }
+ lpfc_nlp_put(ndlp);
+- mp = (struct lpfc_dmabuf *)mbox->context1;
++ mp = (struct lpfc_dmabuf *) mbox->context1;
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ mempool_free(mbox, phba->mbox_mem_pool);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0134 PLOGI: cannot issue reg_login "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
++ ndlp->nlp_DID, ndlp->nlp_state,
++ ndlp->nlp_flag, ndlp->nlp_rpi);
+ } else {
+ mempool_free(mbox, phba->mbox_mem_pool);
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0135 PLOGI: cannot format reg_login "
++ "Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vport->vpi,
++ ndlp->nlp_DID, ndlp->nlp_state,
++ ndlp->nlp_flag, ndlp->nlp_rpi);
+ }
+
+
+- out:
++out:
++ if (ndlp->nlp_DID == NameServer_DID) {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0261 Cannot Register NameServer login\n",
++ phba->brd_no, vport->vpi);
++ }
++
+ /* Free this node since the driver cannot login or has the wrong
+ sparm */
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+
+ static uint32_t
+-lpfc_device_rm_plogi_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+- }
+- else {
++ } else {
+ /* software abort outstanding PLOGI */
+- lpfc_els_abort(phba, ndlp);
++ lpfc_els_abort(vport->phba, ndlp);
+
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ }
+
+ static uint32_t
+-lpfc_device_recov_plogi_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_plogi_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++
++ /* Don't do anything that will mess up processing of the
++ * previous RSCN.
++ */
++ if (vport->fc_flag & FC_RSCN_DEFERRED)
++ return ndlp->nlp_state;
++
+ /* software abort outstanding PLOGI */
+ lpfc_els_abort(phba, ndlp);
+
+ ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_plogi_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *cmdiocb;
+
+ /* software abort outstanding ADISC */
+@@ -846,34 +935,31 @@
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+
+- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb))
+ return ndlp->nlp_state;
+- }
++
+ ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_prli_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_logo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+@@ -881,42 +967,43 @@
+ /* software abort outstanding ADISC */
+ lpfc_els_abort(phba, ndlp);
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_padisc_adisc_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prlo_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_prlo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* Treat like rcv logo */
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_adisc_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
+ IOCB_t *irsp;
+ ADISC *ap;
+@@ -928,101 +1015,112 @@
+ irsp = &rspiocb->iocb;
+
+ if ((irsp->ulpStatus) ||
+- (!lpfc_check_adisc(phba, ndlp, &ap->nodeName, &ap->portName))) {
++ (!lpfc_check_adisc(vport, ndlp, &ap->nodeName, &ap->portName))) {
+ /* 1 sec timeout */
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+
+- memset(&ndlp->nlp_nodename, 0, sizeof (struct lpfc_name));
+- memset(&ndlp->nlp_portname, 0, sizeof (struct lpfc_name));
++ memset(&ndlp->nlp_nodename, 0, sizeof(struct lpfc_name));
++ memset(&ndlp->nlp_portname, 0, sizeof(struct lpfc_name));
+
+ ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ lpfc_unreg_rpi(vport, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ if (ndlp->nlp_type & NLP_FCP_TARGET) {
+ ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
+ } else {
+ ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_device_rm_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_device_rm_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+- }
+- else {
++ } else {
+ /* software abort outstanding ADISC */
+- lpfc_els_abort(phba, ndlp);
++ lpfc_els_abort(vport->phba, ndlp);
+
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ }
+
+ static uint32_t
+-lpfc_device_recov_adisc_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_adisc_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++
++ /* Don't do anything that will mess up processing of the
++ * previous RSCN.
++ */
++ if (vport->fc_flag & FC_RSCN_DEFERRED)
++ return ndlp->nlp_state;
++
+ /* software abort outstanding ADISC */
+ lpfc_els_abort(phba, ndlp);
+
+ ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- ndlp->nlp_flag |= NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
+-
++ spin_unlock_irq(shost->host_lock);
++ lpfc_disc_set_adisc(vport, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_plogi_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ LPFC_MBOXQ_t *mb;
+ LPFC_MBOXQ_t *nextmb;
+ struct lpfc_dmabuf *mp;
+@@ -1033,12 +1131,13 @@
+ if ((mb = phba->sli.mbox_active)) {
+ if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
++ lpfc_nlp_put(ndlp);
+ mb->context2 = NULL;
+ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ }
+ }
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
+ if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ (ndlp == (struct lpfc_nodelist *) mb->context2)) {
+@@ -1047,61 +1146,61 @@
+ lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ kfree(mp);
+ }
++ lpfc_nlp_put(ndlp);
+ list_del(&mb->list);
+ mempool_free(mb, phba->mbox_mem_pool);
+ }
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_padisc_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prlo_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_prlo_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp,
+- void *arg, uint32_t evt)
++lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
++ uint32_t evt)
+ {
+- LPFC_MBOXQ_t *pmb;
+- MAILBOX_t *mb;
+- uint32_t did;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++ LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
++ MAILBOX_t *mb = &pmb->mb;
++ uint32_t did = mb->un.varWords[1];
+
+- pmb = (LPFC_MBOXQ_t *) arg;
+- mb = &pmb->mb;
+- did = mb->un.varWords[1];
+ if (mb->mbxStatus) {
+ /* RegLogin failed */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_DISCOVERY,
+- "%d:0246 RegLogin failed Data: x%x x%x x%x\n",
+- phba->brd_no,
+- did, mb->mbxStatus, phba->hba_state);
++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++ "%d (%d):0246 RegLogin failed Data: x%x x%x "
++ "x%x\n",
++ phba->brd_no, vport->vpi,
++ did, mb->mbxStatus, vport->port_state);
+
+ /*
+ * If RegLogin failed due to lack of HBA resources do not
+@@ -1109,20 +1208,20 @@
+ */
+ if (mb->mbxStatus == MBXERR_RPI_FULL) {
+ ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ return ndlp->nlp_state;
+ }
+
+- /* Put ndlp in npr list set plogi timer for 1 sec */
++ /* Put ndlp in npr state set plogi timer for 1 sec */
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+
+- lpfc_issue_els_logo(phba, ndlp, 0);
++ lpfc_issue_els_logo(vport, ndlp, 0);
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ return ndlp->nlp_state;
+ }
+
+@@ -1131,91 +1230,99 @@
+ /* Only if we are not a fabric nport do we issue PRLI */
+ if (!(ndlp->nlp_type & NLP_FABRIC)) {
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
+- lpfc_issue_els_prli(phba, ndlp, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
++ lpfc_issue_els_prli(vport, ndlp, 0);
+ } else {
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_device_rm_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_rm_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
+- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+- }
+- else {
+- lpfc_drop_node(phba, ndlp);
++ } else {
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ }
+
+ static uint32_t
+-lpfc_device_recov_reglogin_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ /* Don't do anything that will mess up processing of the
++ * previous RSCN.
++ */
++ if (vport->fc_flag & FC_RSCN_DEFERRED)
++ return ndlp->nlp_state;
++
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_disc_set_adisc(vport, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb;
+
+ cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* Software abort outstanding PRLI before sending acc */
+- lpfc_els_abort(phba, ndlp);
++ lpfc_els_abort(vport->phba, ndlp);
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_padisc_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+@@ -1225,21 +1332,22 @@
+ * NEXT STATE = PRLI_ISSUE
+ */
+ static uint32_t
+-lpfc_rcv_prlo_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_prli_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
++ struct lpfc_hba *phba = vport->phba;
+ IOCB_t *irsp;
+ PRLI *npr;
+
+@@ -1249,8 +1357,12 @@
+
+ irsp = &rspiocb->iocb;
+ if (irsp->ulpStatus) {
++ if ((vport->port_type == LPFC_NPIV_PORT) &&
++ phba->cfg_vport_restrict_login) {
++ goto out;
++ }
+ ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ return ndlp->nlp_state;
+ }
+
+@@ -1266,9 +1378,25 @@
+ if (npr->Retry)
+ ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+ }
++ if (!(ndlp->nlp_type & NLP_FCP_TARGET) &&
++ (vport->port_type == LPFC_NPIV_PORT) &&
++ phba->cfg_vport_restrict_login) {
++out:
++ spin_lock_irq(shost->host_lock);
++ ndlp->nlp_flag |= NLP_TARGET_REMOVE;
++ spin_unlock_irq(shost->host_lock);
++ lpfc_issue_els_logo(vport, ndlp, 0);
++
++ ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
++ return ndlp->nlp_state;
++ }
+
+ ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE);
++ if (ndlp->nlp_type & NLP_FCP_TARGET)
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
++ else
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ return ndlp->nlp_state;
+ }
+
+@@ -1289,19 +1417,23 @@
+ * on plogi list so it can be freed when LOGO completes.
+ *
+ */
++
+ static uint32_t
+-lpfc_device_rm_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+- }
+- else {
++ } else {
+ /* software abort outstanding PLOGI */
+- lpfc_els_abort(phba, ndlp);
++ lpfc_els_abort(vport->phba, ndlp);
+
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ }
+@@ -1324,261 +1456,251 @@
+ * outstanding PRLI command, then free the node entry.
+ */
+ static uint32_t
+-lpfc_device_recov_prli_issue(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_recov_prli_issue(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
++ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_hba *phba = vport->phba;
++
++ /* Don't do anything that will mess up processing of the
++ * previous RSCN.
++ */
++ if (vport->fc_flag & FC_RSCN_DEFERRED)
++ return ndlp->nlp_state;
++
+ /* software abort outstanding PRLI */
+ lpfc_els_abort(phba, ndlp);
+
+ ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_disc_set_adisc(vport, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_rcv_prli(phba, ndlp, cmdiocb);
+- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ lpfc_rcv_prli(vport, ndlp, cmdiocb);
++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_padisc_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prlo_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_device_recov_unmap_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_recov_unmap_node(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
++ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ ndlp->nlp_prev_state = NLP_STE_UNMAPPED_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- lpfc_disc_set_adisc(phba, ndlp);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_disc_set_adisc(vport, ndlp);
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++ lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_padisc_mapped_node(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prlo_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* flush the target */
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+- spin_unlock_irq(phba->host->host_lock);
+
+ /* Treat like rcv logo */
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_device_recov_mapped_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_mapped_node(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg,
+ uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ ndlp->nlp_prev_state = NLP_STE_MAPPED_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+- spin_lock_irq(phba->host->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
+- lpfc_disc_set_adisc(phba, ndlp);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_disc_set_adisc(vport, ndlp);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_plogi_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+ /* Ignore PLOGI if we have an outstanding LOGO */
+- if (ndlp->nlp_flag & NLP_LOGO_SND) {
++ if (ndlp->nlp_flag & (NLP_LOGO_SND | NLP_LOGO_ACC)) {
+ return ndlp->nlp_state;
+ }
+
+- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
+- spin_lock_irq(phba->host->host_lock);
++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+ }
+
+ /* send PLOGI immediately, move to PLOGI issue state */
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ }
+
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prli_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ struct ls_rjt stat;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+ memset(&stat, 0, sizeof (struct ls_rjt));
+ stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+ if (ndlp->nlp_flag & NLP_NPR_ADISC) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+- lpfc_issue_els_adisc(phba, ndlp, 0);
++ spin_unlock_irq(shost->host_lock);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++ lpfc_issue_els_adisc(vport, ndlp, 0);
+ } else {
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ }
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_logo_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_padisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
+-
+- cmdiocb = (struct lpfc_iocbq *) arg;
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++ lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+
+ /*
+ * Do not start discovery if discovery is about to start
+@@ -1586,53 +1708,52 @@
+ * here will affect the counting of discovery threads.
+ */
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+- !(ndlp->nlp_flag & NLP_NPR_2B_DISC)){
++ !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
+ if (ndlp->nlp_flag & NLP_NPR_ADISC) {
++ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+- lpfc_issue_els_adisc(phba, ndlp, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++ lpfc_issue_els_adisc(vport, ndlp, 0);
+ } else {
+ ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ }
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_rcv_prlo_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_rcv_prlo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- struct lpfc_iocbq *cmdiocb;
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+
+- cmdiocb = (struct lpfc_iocbq *) arg;
+-
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_LOGO_ACC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+
+- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+
+- if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++ if ((ndlp->nlp_flag & NLP_DELAY_TMO) == 0) {
+ mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_DELAY_TMO;
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ } else {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_plogi_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
+ IOCB_t *irsp;
+@@ -1642,15 +1763,15 @@
+
+ irsp = &rspiocb->iocb;
+ if (irsp->ulpStatus) {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_prli_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
+ IOCB_t *irsp;
+@@ -1660,25 +1781,24 @@
+
+ irsp = &rspiocb->iocb;
+ if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_logo_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- lpfc_unreg_rpi(phba, ndlp);
++ lpfc_unreg_rpi(vport, ndlp);
+ /* This routine does nothing, just return the current state */
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_adisc_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_cmpl_adisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+ struct lpfc_iocbq *cmdiocb, *rspiocb;
+ IOCB_t *irsp;
+@@ -1688,28 +1808,25 @@
+
+ irsp = &rspiocb->iocb;
+ if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ return ndlp->nlp_state;
+ }
+
+ static uint32_t
+-lpfc_cmpl_reglogin_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_cmpl_reglogin_npr_node(struct lpfc_vport *vport,
++ struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- LPFC_MBOXQ_t *pmb;
+- MAILBOX_t *mb;
+-
+- pmb = (LPFC_MBOXQ_t *) arg;
+- mb = &pmb->mb;
++ LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
++ MAILBOX_t *mb = &pmb->mb;
+
+ if (!mb->mbxStatus)
+ ndlp->nlp_rpi = mb->un.varWords[0];
+ else {
+ if (ndlp->nlp_flag & NLP_NODEV_REMOVE) {
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+ }
+@@ -1717,28 +1834,38 @@
+ }
+
+ static uint32_t
+-lpfc_device_rm_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_device_rm_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++ spin_unlock_irq(shost->host_lock);
+ return ndlp->nlp_state;
+ }
+- lpfc_drop_node(phba, ndlp);
++ lpfc_drop_node(vport, ndlp);
+ return NLP_STE_FREED_NODE;
+ }
+
+ static uint32_t
+-lpfc_device_recov_npr_node(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg,
+- uint32_t evt)
++lpfc_device_recov_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
+- spin_lock_irq(phba->host->host_lock);
++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++ /* Don't do anything that will mess up processing of the
++ * previous RSCN.
++ */
++ if (vport->fc_flag & FC_RSCN_DEFERRED)
++ return ndlp->nlp_state;
++
++ spin_lock_irq(shost->host_lock);
+ ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(shost->host_lock);
+ if (ndlp->nlp_flag & NLP_DELAY_TMO) {
+- lpfc_cancel_retry_delay_tmo(phba, ndlp);
++ lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ }
+ return ndlp->nlp_state;
+ }
+@@ -1801,7 +1928,7 @@
+ */
+
+ static uint32_t (*lpfc_disc_action[NLP_STE_MAX_STATE * NLP_EVT_MAX_EVENT])
+- (struct lpfc_hba *, struct lpfc_nodelist *, void *, uint32_t) = {
++ (struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t) = {
+ /* Action routine Event Current State */
+ lpfc_rcv_plogi_unused_node, /* RCV_PLOGI UNUSED_NODE */
+ lpfc_rcv_els_unused_node, /* RCV_PRLI */
+@@ -1818,7 +1945,7 @@
+ lpfc_disc_illegal, /* DEVICE_RECOVERY */
+
+ lpfc_rcv_plogi_plogi_issue, /* RCV_PLOGI PLOGI_ISSUE */
+- lpfc_rcv_els_plogi_issue, /* RCV_PRLI */
++ lpfc_rcv_prli_plogi_issue, /* RCV_PRLI */
+ lpfc_rcv_logo_plogi_issue, /* RCV_LOGO */
+ lpfc_rcv_els_plogi_issue, /* RCV_ADISC */
+ lpfc_rcv_els_plogi_issue, /* RCV_PDISC */
+@@ -1917,34 +2044,40 @@
+ };
+
+ int
+-lpfc_disc_state_machine(struct lpfc_hba * phba,
+- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++ void *arg, uint32_t evt)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t cur_state, rc;
+- uint32_t(*func) (struct lpfc_hba *, struct lpfc_nodelist *, void *,
++ uint32_t(*func) (struct lpfc_vport *, struct lpfc_nodelist *, void *,
+ uint32_t);
+
+ lpfc_nlp_get(ndlp);
+ cur_state = ndlp->nlp_state;
+
+ /* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0211 DSM in event x%x on NPort x%x in state %d "
+- "Data: x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0211 DSM in event x%x on NPort x%x in "
++ "state %d Data: x%x\n",
++ phba->brd_no, vport->vpi,
+ evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag);
+
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
++ "DSM in: evt:%d ste:%d did:x%x",
++ evt, cur_state, ndlp->nlp_DID);
++
+ func = lpfc_disc_action[(cur_state * NLP_EVT_MAX_EVENT) + evt];
+- rc = (func) (phba, ndlp, arg, evt);
++ rc = (func) (vport, ndlp, arg, evt);
+
+ /* DSM out state <rc> on NPort <nlp_DID> */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_DISCOVERY,
+- "%d:0212 DSM out state %d on NPort x%x Data: x%x\n",
+- phba->brd_no,
++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++ "%d (%d):0212 DSM out state %d on NPort x%x "
++ "Data: x%x\n",
++ phba->brd_no, vport->vpi,
++ rc, ndlp->nlp_DID, ndlp->nlp_flag);
++
++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
++ "DSM out: ste:%d did:x%x flg:x%x",
+ rc, ndlp->nlp_DID, ndlp->nlp_flag);
+
+ lpfc_nlp_put(ndlp);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -37,10 +37,158 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+
+ #define LPFC_RESET_WAIT 2
+ #define LPFC_ABORT_WAIT 2
+
++/*
++ * This function is called with no lock held when there is a resource
++ * error in driver or in firmware.
++ */
++void
++lpfc_adjust_queue_depth(struct lpfc_hba *phba)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ atomic_inc(&phba->num_rsrc_err);
++ phba->last_rsrc_error_time = jiffies;
++
++ if ((phba->last_ramp_down_time + QUEUE_RAMP_DOWN_INTERVAL) > jiffies) {
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++ return;
++ }
++
++ phba->last_ramp_down_time = jiffies;
++
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++
++ spin_lock_irqsave(&phba->pport->work_port_lock, flags);
++ if ((phba->pport->work_port_events &
++ WORKER_RAMP_DOWN_QUEUE) == 0) {
++ phba->pport->work_port_events |= WORKER_RAMP_DOWN_QUEUE;
++ }
++ spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ if (phba->work_wait)
++ wake_up(phba->work_wait);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++
++ return;
++}
++
++/*
++ * This function is called with no lock held when there is a successful
++ * SCSI command completion.
++ */
++static inline void
++lpfc_rampup_queue_depth(struct lpfc_hba *phba,
++ struct scsi_device *sdev)
++{
++ unsigned long flags;
++ atomic_inc(&phba->num_cmd_success);
++
++ if (phba->cfg_lun_queue_depth <= sdev->queue_depth)
++ return;
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ if (((phba->last_ramp_up_time + QUEUE_RAMP_UP_INTERVAL) > jiffies) ||
++ ((phba->last_rsrc_error_time + QUEUE_RAMP_UP_INTERVAL ) > jiffies)) {
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++ return;
++ }
++
++ phba->last_ramp_up_time = jiffies;
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++
++ spin_lock_irqsave(&phba->pport->work_port_lock, flags);
++ if ((phba->pport->work_port_events &
++ WORKER_RAMP_UP_QUEUE) == 0) {
++ phba->pport->work_port_events |= WORKER_RAMP_UP_QUEUE;
++ }
++ spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ if (phba->work_wait)
++ wake_up(phba->work_wait);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++}
++
++void
++lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport;
++ struct Scsi_Host *host;
++ struct scsi_device *sdev;
++ unsigned long new_queue_depth;
++ unsigned long num_rsrc_err, num_cmd_success;
++
++ num_rsrc_err = atomic_read(&phba->num_rsrc_err);
++ num_cmd_success = atomic_read(&phba->num_cmd_success);
++
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ host = lpfc_shost_from_vport(vport);
++ if (!scsi_host_get(host))
++ continue;
++
++ spin_unlock_irq(&phba->hbalock);
++
++ shost_for_each_device(sdev, host) {
++ new_queue_depth = sdev->queue_depth * num_rsrc_err /
++ (num_rsrc_err + num_cmd_success);
++ if (!new_queue_depth)
++ new_queue_depth = sdev->queue_depth - 1;
++ else
++ new_queue_depth =
++ sdev->queue_depth - new_queue_depth;
++
++ if (sdev->ordered_tags)
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG,
++ new_queue_depth);
++ else
++ scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG,
++ new_queue_depth);
++ }
++ spin_lock_irq(&phba->hbalock);
++ scsi_host_put(host);
++ }
++ spin_unlock_irq(&phba->hbalock);
++ atomic_set(&phba->num_rsrc_err, 0);
++ atomic_set(&phba->num_cmd_success, 0);
++}
++
++void
++lpfc_ramp_up_queue_handler(struct lpfc_hba *phba)
++{
++ struct lpfc_vport *vport;
++ struct Scsi_Host *host;
++ struct scsi_device *sdev;
++
++ spin_lock_irq(&phba->hbalock);
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ host = lpfc_shost_from_vport(vport);
++ if (!scsi_host_get(host))
++ continue;
++
++ spin_unlock_irq(&phba->hbalock);
++ shost_for_each_device(sdev, host) {
++ if (sdev->ordered_tags)
++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG,
++ sdev->queue_depth+1);
++ else
++ scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG,
++ sdev->queue_depth+1);
++ }
++ spin_lock_irq(&phba->hbalock);
++ scsi_host_put(host);
++ }
++ spin_unlock_irq(&phba->hbalock);
++ atomic_set(&phba->num_rsrc_err, 0);
++ atomic_set(&phba->num_cmd_success, 0);
++}
+
+ /*
+ * This routine allocates a scsi buffer, which contains all the necessary
+@@ -51,8 +199,9 @@
+ * and the BPL BDE is setup in the IOCB.
+ */
+ static struct lpfc_scsi_buf *
+-lpfc_new_scsi_buf(struct lpfc_hba * phba)
++lpfc_new_scsi_buf(struct lpfc_vport *vport)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_scsi_buf *psb;
+ struct ulp_bde64 *bpl;
+ IOCB_t *iocb;
+@@ -63,7 +212,6 @@
+ if (!psb)
+ return NULL;
+ memset(psb, 0, sizeof (struct lpfc_scsi_buf));
+- psb->scsi_hba = phba;
+
+ /*
+ * Get memory from the pci pool to map the virt space to pci bus space
+@@ -155,7 +303,7 @@
+ }
+
+ static void
+-lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
++lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+ {
+ unsigned long iflag = 0;
+
+@@ -166,7 +314,7 @@
+ }
+
+ static int
+-lpfc_scsi_prep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd)
++lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+ {
+ struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+ struct scatterlist *sgel = NULL;
+@@ -175,8 +323,7 @@
+ IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+ dma_addr_t physaddr;
+ uint32_t i, num_bde = 0;
+- int datadir = scsi_cmnd->sc_data_direction;
+- int dma_error;
++ int nseg, datadir = scsi_cmnd->sc_data_direction;
+
+ /*
+ * There are three possibilities here - use scatter-gather segment, use
+@@ -185,26 +332,22 @@
+ * data bde entry.
+ */
+ bpl += 2;
+- if (scsi_cmnd->use_sg) {
++ nseg = scsi_dma_map(scsi_cmnd);
++ if (nseg > 0) {
+ /*
+ * The driver stores the segment count returned from pci_map_sg
+ * because this a count of dma-mappings used to map the use_sg
+ * pages. They are not guaranteed to be the same for those
+ * architectures that implement an IOMMU.
+ */
+- sgel = (struct scatterlist *)scsi_cmnd->request_buffer;
+- lpfc_cmd->seg_cnt = dma_map_sg(&phba->pcidev->dev, sgel,
+- scsi_cmnd->use_sg, datadir);
+- if (lpfc_cmd->seg_cnt == 0)
+- return 1;
+
++ lpfc_cmd->seg_cnt = nseg;
+ if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+ printk(KERN_ERR "%s: Too many sg segments from "
+ "dma_map_sg. Config %d, seg_cnt %d",
+ __FUNCTION__, phba->cfg_sg_seg_cnt,
+ lpfc_cmd->seg_cnt);
+- dma_unmap_sg(&phba->pcidev->dev, sgel,
+- lpfc_cmd->seg_cnt, datadir);
++ scsi_dma_unmap(scsi_cmnd);
+ return 1;
+ }
+
+@@ -214,7 +357,7 @@
+ * single scsi command. Just run through the seg_cnt and format
+ * the bde's.
+ */
+- for (i = 0; i < lpfc_cmd->seg_cnt; i++) {
++ scsi_for_each_sg(scsi_cmnd, sgel, nseg, i) {
+ physaddr = sg_dma_address(sgel);
+ bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
+ bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
+@@ -225,35 +368,10 @@
+ bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+ bpl->tus.w = le32_to_cpu(bpl->tus.w);
+ bpl++;
+- sgel++;
+ num_bde++;
+ }
+- } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) {
+- physaddr = dma_map_single(&phba->pcidev->dev,
+- scsi_cmnd->request_buffer,
+- scsi_cmnd->request_bufflen,
+- datadir);
+- dma_error = dma_mapping_error(physaddr);
+- if (dma_error) {
+- lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0718 Unable to dma_map_single "
+- "request_buffer: x%x\n",
+- phba->brd_no, dma_error);
++ } else if (nseg < 0)
+ return 1;
+- }
+-
+- lpfc_cmd->nonsg_phys = physaddr;
+- bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
+- bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
+- bpl->tus.f.bdeSize = scsi_cmnd->request_bufflen;
+- if (datadir == DMA_TO_DEVICE)
+- bpl->tus.f.bdeFlags = 0;
+- else
+- bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+- bpl->tus.w = le32_to_cpu(bpl->tus.w);
+- num_bde = 1;
+- bpl++;
+- }
+
+ /*
+ * Finish initializing those IOCB fields that are dependent on the
+@@ -266,7 +384,7 @@
+ (num_bde * sizeof (struct ulp_bde64));
+ iocb_cmd->ulpBdeCount = 1;
+ iocb_cmd->ulpLe = 1;
+- fcp_cmnd->fcpDl = be32_to_cpu(scsi_cmnd->request_bufflen);
++ fcp_cmnd->fcpDl = be32_to_cpu(scsi_bufflen(scsi_cmnd));
+ return 0;
+ }
+
+@@ -279,26 +397,20 @@
+ * a request buffer, but did not request use_sg. There is a third
+ * case, but it does not require resource deallocation.
+ */
+- if ((psb->seg_cnt > 0) && (psb->pCmd->use_sg)) {
+- dma_unmap_sg(&phba->pcidev->dev, psb->pCmd->request_buffer,
+- psb->seg_cnt, psb->pCmd->sc_data_direction);
+- } else {
+- if ((psb->nonsg_phys) && (psb->pCmd->request_bufflen)) {
+- dma_unmap_single(&phba->pcidev->dev, psb->nonsg_phys,
+- psb->pCmd->request_bufflen,
+- psb->pCmd->sc_data_direction);
+- }
+- }
++ if (psb->seg_cnt > 0)
++ scsi_dma_unmap(psb->pCmd);
+ }
+
+ static void
+-lpfc_handle_fcp_err(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb)
++lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
++ struct lpfc_iocbq *rsp_iocb)
+ {
+ struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
+ struct fcp_cmnd *fcpcmd = lpfc_cmd->fcp_cmnd;
+ struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp;
+- struct lpfc_hba *phba = lpfc_cmd->scsi_hba;
++ struct lpfc_hba *phba = vport->phba;
+ uint32_t fcpi_parm = rsp_iocb->iocb.un.fcpi.fcpi_parm;
++ uint32_t vpi = vport->vpi;
+ uint32_t resp_info = fcprsp->rspStatus2;
+ uint32_t scsi_status = fcprsp->rspStatus3;
+ uint32_t *lp;
+@@ -331,9 +443,9 @@
+ logit = LOG_FCP;
+
+ lpfc_printf_log(phba, KERN_WARNING, logit,
+- "%d:0730 FCP command x%x failed: x%x SNS x%x x%x "
++ "%d (%d):0730 FCP command x%x failed: x%x SNS x%x x%x "
+ "Data: x%x x%x x%x x%x x%x\n",
+- phba->brd_no, cmnd->cmnd[0], scsi_status,
++ phba->brd_no, vpi, cmnd->cmnd[0], scsi_status,
+ be32_to_cpu(*lp), be32_to_cpu(*(lp + 3)), resp_info,
+ be32_to_cpu(fcprsp->rspResId),
+ be32_to_cpu(fcprsp->rspSnsLen),
+@@ -349,15 +461,16 @@
+ }
+ }
+
+- cmnd->resid = 0;
++ scsi_set_resid(cmnd, 0);
+ if (resp_info & RESID_UNDER) {
+- cmnd->resid = be32_to_cpu(fcprsp->rspResId);
++ scsi_set_resid(cmnd, be32_to_cpu(fcprsp->rspResId));
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0716 FCP Read Underrun, expected %d, "
+- "residual %d Data: x%x x%x x%x\n", phba->brd_no,
+- be32_to_cpu(fcpcmd->fcpDl), cmnd->resid,
+- fcpi_parm, cmnd->cmnd[0], cmnd->underflow);
++ "%d (%d):0716 FCP Read Underrun, expected %d, "
++ "residual %d Data: x%x x%x x%x\n",
++ phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl),
++ scsi_get_resid(cmnd), fcpi_parm, cmnd->cmnd[0],
++ cmnd->underflow);
+
+ /*
+ * If there is an under run check if under run reported by
+@@ -366,15 +479,16 @@
+ */
+ if ((cmnd->sc_data_direction == DMA_FROM_DEVICE) &&
+ fcpi_parm &&
+- (cmnd->resid != fcpi_parm)) {
++ (scsi_get_resid(cmnd) != fcpi_parm)) {
+ lpfc_printf_log(phba, KERN_WARNING,
+ LOG_FCP | LOG_FCP_ERROR,
+- "%d:0735 FCP Read Check Error and Underrun "
+- "Data: x%x x%x x%x x%x\n", phba->brd_no,
++ "%d (%d):0735 FCP Read Check Error "
++ "and Underrun Data: x%x x%x x%x x%x\n",
++ phba->brd_no, vpi,
+ be32_to_cpu(fcpcmd->fcpDl),
+- cmnd->resid,
+- fcpi_parm, cmnd->cmnd[0]);
+- cmnd->resid = cmnd->request_bufflen;
++ scsi_get_resid(cmnd), fcpi_parm,
++ cmnd->cmnd[0]);
++ scsi_set_resid(cmnd, scsi_bufflen(cmnd));
+ host_status = DID_ERROR;
+ }
+ /*
+@@ -385,22 +499,23 @@
+ */
+ if (!(resp_info & SNS_LEN_VALID) &&
+ (scsi_status == SAM_STAT_GOOD) &&
+- (cmnd->request_bufflen - cmnd->resid) < cmnd->underflow) {
++ (scsi_bufflen(cmnd) - scsi_get_resid(cmnd)
++ < cmnd->underflow)) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0717 FCP command x%x residual "
++ "%d (%d):0717 FCP command x%x residual "
+ "underrun converted to error "
+- "Data: x%x x%x x%x\n", phba->brd_no,
+- cmnd->cmnd[0], cmnd->request_bufflen,
+- cmnd->resid, cmnd->underflow);
+-
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, vpi, cmnd->cmnd[0],
++ cmnd->request_bufflen,
++ scsi_get_resid(cmnd), cmnd->underflow);
+ host_status = DID_ERROR;
+ }
+ } else if (resp_info & RESID_OVER) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0720 FCP command x%x residual "
++ "%d (%d):0720 FCP command x%x residual "
+ "overrun error. Data: x%x x%x \n",
+- phba->brd_no, cmnd->cmnd[0],
+- cmnd->request_bufflen, cmnd->resid);
++ phba->brd_no, vpi, cmnd->cmnd[0],
++ scsi_bufflen(cmnd), scsi_get_resid(cmnd));
+ host_status = DID_ERROR;
+
+ /*
+@@ -410,13 +525,14 @@
+ } else if ((scsi_status == SAM_STAT_GOOD) && fcpi_parm &&
+ (cmnd->sc_data_direction == DMA_FROM_DEVICE)) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR,
+- "%d:0734 FCP Read Check Error Data: "
+- "x%x x%x x%x x%x\n", phba->brd_no,
++ "%d (%d):0734 FCP Read Check Error Data: "
++ "x%x x%x x%x x%x\n",
++ phba->brd_no, vpi,
+ be32_to_cpu(fcpcmd->fcpDl),
+ be32_to_cpu(fcprsp->rspResId),
+ fcpi_parm, cmnd->cmnd[0]);
+ host_status = DID_ERROR;
+- cmnd->resid = cmnd->request_bufflen;
++ scsi_set_resid(cmnd, scsi_bufflen(cmnd));
+ }
+
+ out:
+@@ -429,9 +545,13 @@
+ {
+ struct lpfc_scsi_buf *lpfc_cmd =
+ (struct lpfc_scsi_buf *) pIocbIn->context1;
++ struct lpfc_vport *vport = pIocbIn->vport;
+ struct lpfc_rport_data *rdata = lpfc_cmd->rdata;
+ struct lpfc_nodelist *pnode = rdata->pnode;
+ struct scsi_cmnd *cmd = lpfc_cmd->pCmd;
++ uint32_t vpi = (lpfc_cmd->cur_iocbq.vport
++ ? lpfc_cmd->cur_iocbq.vport->vpi
++ : 0);
+ int result;
+ struct scsi_device *sdev, *tmp_sdev;
+ int depth = 0;
+@@ -447,22 +567,31 @@
+ lpfc_cmd->status = IOSTAT_DEFAULT;
+
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0729 FCP cmd x%x failed <%d/%d> status: "
+- "x%x result: x%x Data: x%x x%x\n",
+- phba->brd_no, cmd->cmnd[0], cmd->device->id,
+- cmd->device->lun, lpfc_cmd->status,
+- lpfc_cmd->result, pIocbOut->iocb.ulpContext,
++ "%d (%d):0729 FCP cmd x%x failed <%d/%d> "
++ "status: x%x result: x%x Data: x%x x%x\n",
++ phba->brd_no, vpi, cmd->cmnd[0],
++ cmd->device ? cmd->device->id : 0xffff,
++ cmd->device ? cmd->device->lun : 0xffff,
++ lpfc_cmd->status, lpfc_cmd->result,
++ pIocbOut->iocb.ulpContext,
+ lpfc_cmd->cur_iocbq.iocb.ulpIoTag);
+
+ switch (lpfc_cmd->status) {
+ case IOSTAT_FCP_RSP_ERROR:
+ /* Call FCP RSP handler to determine result */
+- lpfc_handle_fcp_err(lpfc_cmd,pIocbOut);
++ lpfc_handle_fcp_err(vport, lpfc_cmd, pIocbOut);
+ break;
+ case IOSTAT_NPORT_BSY:
+ case IOSTAT_FABRIC_BSY:
+ cmd->result = ScsiResult(DID_BUS_BUSY, 0);
+ break;
++ case IOSTAT_LOCAL_REJECT:
++ if (lpfc_cmd->result == RJT_UNAVAIL_PERM ||
++ lpfc_cmd->result == IOERR_NO_RESOURCES ||
++ lpfc_cmd->result == RJT_LOGIN_REQUIRED) {
++ cmd->result = ScsiResult(DID_REQUEUE, 0);
++ break;
++ } /* else: fall through */
+ default:
+ cmd->result = ScsiResult(DID_ERROR, 0);
+ break;
+@@ -479,11 +608,12 @@
+ uint32_t *lp = (uint32_t *)cmd->sense_buffer;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0710 Iodone <%d/%d> cmd %p, error x%x "
+- "SNS x%x x%x Data: x%x x%x\n",
+- phba->brd_no, cmd->device->id,
++ "%d (%d):0710 Iodone <%d/%d> cmd %p, error "
++ "x%x SNS x%x x%x Data: x%x x%x\n",
++ phba->brd_no, vpi, cmd->device->id,
+ cmd->device->lun, cmd, cmd->result,
+- *lp, *(lp + 3), cmd->retries, cmd->resid);
++ *lp, *(lp + 3), cmd->retries,
++ scsi_get_resid(cmd));
+ }
+
+ result = cmd->result;
+@@ -496,6 +626,10 @@
+ return;
+ }
+
++
++ if (!result)
++ lpfc_rampup_queue_depth(phba, sdev);
++
+ if (!result && pnode != NULL &&
+ ((jiffies - pnode->last_ramp_up_time) >
+ LPFC_Q_RAMP_UP_INTERVAL * HZ) &&
+@@ -544,8 +678,9 @@
+
+ if (depth) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0711 detected queue full - lun queue depth "
+- " adjusted to %d.\n", phba->brd_no, depth);
++ "%d (%d):0711 detected queue full - "
++ "lun queue depth adjusted to %d.\n",
++ phba->brd_no, vpi, depth);
+ }
+ }
+
+@@ -553,9 +688,10 @@
+ }
+
+ static void
+-lpfc_scsi_prep_cmnd(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd,
++lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+ struct lpfc_nodelist *pnode)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+ struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
+ IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+@@ -592,22 +728,7 @@
+ * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first
+ * data bde entry.
+ */
+- if (scsi_cmnd->use_sg) {
+- if (datadir == DMA_TO_DEVICE) {
+- iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
+- iocb_cmd->un.fcpi.fcpi_parm = 0;
+- iocb_cmd->ulpPU = 0;
+- fcp_cmnd->fcpCntl3 = WRITE_DATA;
+- phba->fc4OutputRequests++;
+- } else {
+- iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
+- iocb_cmd->ulpPU = PARM_READ_CHECK;
+- iocb_cmd->un.fcpi.fcpi_parm =
+- scsi_cmnd->request_bufflen;
+- fcp_cmnd->fcpCntl3 = READ_DATA;
+- phba->fc4InputRequests++;
+- }
+- } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) {
++ if (scsi_sg_count(scsi_cmnd)) {
+ if (datadir == DMA_TO_DEVICE) {
+ iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
+ iocb_cmd->un.fcpi.fcpi_parm = 0;
+@@ -617,8 +738,7 @@
+ } else {
+ iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
+ iocb_cmd->ulpPU = PARM_READ_CHECK;
+- iocb_cmd->un.fcpi.fcpi_parm =
+- scsi_cmnd->request_bufflen;
++ iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
+ fcp_cmnd->fcpCntl3 = READ_DATA;
+ phba->fc4InputRequests++;
+ }
+@@ -642,15 +762,15 @@
+ piocbq->context1 = lpfc_cmd;
+ piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl;
+ piocbq->iocb.ulpTimeout = lpfc_cmd->timeout;
++ piocbq->vport = vport;
+ }
+
+ static int
+-lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_hba *phba,
++lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+ struct lpfc_scsi_buf *lpfc_cmd,
+ unsigned int lun,
+ uint8_t task_mgmt_cmd)
+ {
+- struct lpfc_sli *psli;
+ struct lpfc_iocbq *piocbq;
+ IOCB_t *piocb;
+ struct fcp_cmnd *fcp_cmnd;
+@@ -661,8 +781,9 @@
+ return 0;
+ }
+
+- psli = &phba->sli;
+ piocbq = &(lpfc_cmd->cur_iocbq);
++ piocbq->vport = vport;
++
+ piocb = &piocbq->iocb;
+
+ fcp_cmnd = lpfc_cmd->fcp_cmnd;
+@@ -688,7 +809,7 @@
+ piocb->ulpTimeout = lpfc_cmd->timeout;
+ }
+
+- return (1);
++ return 1;
+ }
+
+ static void
+@@ -704,10 +825,11 @@
+ }
+
+ static int
+-lpfc_scsi_tgt_reset(struct lpfc_scsi_buf * lpfc_cmd, struct lpfc_hba * phba,
++lpfc_scsi_tgt_reset(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_vport *vport,
+ unsigned tgt_id, unsigned int lun,
+ struct lpfc_rport_data *rdata)
+ {
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_iocbq *iocbq;
+ struct lpfc_iocbq *iocbqrsp;
+ int ret;
+@@ -716,12 +838,11 @@
+ return FAILED;
+
+ lpfc_cmd->rdata = rdata;
+- ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, lun,
++ ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun,
+ FCP_TARGET_RESET);
+ if (!ret)
+ return FAILED;
+
+- lpfc_cmd->scsi_hba = phba;
+ iocbq = &lpfc_cmd->cur_iocbq;
+ iocbqrsp = lpfc_sli_get_iocbq(phba);
+
+@@ -730,10 +851,10 @@
+
+ /* Issue Target Reset to TGT <num> */
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0702 Issue Target Reset to TGT %d "
++ "%d (%d):0702 Issue Target Reset to TGT %d "
+ "Data: x%x x%x\n",
+- phba->brd_no, tgt_id, rdata->pnode->nlp_rpi,
+- rdata->pnode->nlp_flag);
++ phba->brd_no, vport->vpi, tgt_id,
++ rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag);
+
+ ret = lpfc_sli_issue_iocb_wait(phba,
+ &phba->sli.ring[phba->sli.fcp_ring],
+@@ -758,7 +879,8 @@
+ const char *
+ lpfc_info(struct Scsi_Host *host)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ int len;
+ static char lpfcinfobuf[384];
+
+@@ -800,25 +922,21 @@
+
+ void lpfc_poll_timeout(unsigned long ptr)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
+- unsigned long iflag;
+-
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr;
+
+ if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
+ lpfc_sli_poll_fcp_ring (phba);
+ if (phba->cfg_poll & DISABLE_FCP_RING_INT)
+ lpfc_poll_rearm_timer(phba);
+ }
+-
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ }
+
+ static int
+ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
+ {
+- struct lpfc_hba *phba =
+- (struct lpfc_hba *) cmnd->device->host->hostdata;
++ struct Scsi_Host *shost = cmnd->device->host;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_rport_data *rdata = cmnd->device->hostdata;
+ struct lpfc_nodelist *ndlp = rdata->pnode;
+@@ -840,11 +958,14 @@
+ cmnd->result = ScsiResult(DID_BUS_BUSY, 0);
+ goto out_fail_command;
+ }
+- lpfc_cmd = lpfc_get_scsi_buf (phba);
++ lpfc_cmd = lpfc_get_scsi_buf(phba);
+ if (lpfc_cmd == NULL) {
++ lpfc_adjust_queue_depth(phba);
++
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0707 driver's buffer pool is empty, "
+- "IO busied\n", phba->brd_no);
++ "%d (%d):0707 driver's buffer pool is empty, "
++ "IO busied\n",
++ phba->brd_no, vport->vpi);
+ goto out_host_busy;
+ }
+
+@@ -862,7 +983,7 @@
+ if (err)
+ goto out_host_busy_free_buf;
+
+- lpfc_scsi_prep_cmnd(phba, lpfc_cmd, ndlp);
++ lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
+
+ err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring],
+ &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
+@@ -908,7 +1029,8 @@
+ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ {
+ struct Scsi_Host *shost = cmnd->device->host;
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring];
+ struct lpfc_iocbq *iocb;
+ struct lpfc_iocbq *abtsiocb;
+@@ -918,8 +1040,6 @@
+ int ret = SUCCESS;
+
+ lpfc_block_error_handler(cmnd);
+- spin_lock_irq(shost->host_lock);
+-
+ lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
+ BUG_ON(!lpfc_cmd);
+
+@@ -956,12 +1076,13 @@
+
+ icmd->ulpLe = 1;
+ icmd->ulpClass = cmd->ulpClass;
+- if (phba->hba_state >= LPFC_LINK_UP)
++ if (lpfc_is_link_up(phba))
+ icmd->ulpCommand = CMD_ABORT_XRI_CN;
+ else
+ icmd->ulpCommand = CMD_CLOSE_XRI_CN;
+
+ abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++ abtsiocb->vport = vport;
+ if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) {
+ lpfc_sli_release_iocbq(phba, abtsiocb);
+ ret = FAILED;
+@@ -977,9 +1098,7 @@
+ if (phba->cfg_poll & DISABLE_FCP_RING_INT)
+ lpfc_sli_poll_fcp_ring (phba);
+
+- spin_unlock_irq(phba->host->host_lock);
+- schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ);
+- spin_lock_irq(phba->host->host_lock);
++ schedule_timeout_uninterruptible(LPFC_ABORT_WAIT * HZ);
+ if (++loop_count
+ > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT)
+ break;
+@@ -988,22 +1107,21 @@
+ if (lpfc_cmd->pCmd == cmnd) {
+ ret = FAILED;
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0748 abort handler timed out waiting for "
+- "abort to complete: ret %#x, ID %d, LUN %d, "
+- "snum %#lx\n",
+- phba->brd_no, ret, cmnd->device->id,
+- cmnd->device->lun, cmnd->serial_number);
++ "%d (%d):0748 abort handler timed out waiting "
++ "for abort to complete: ret %#x, ID %d, "
++ "LUN %d, snum %#lx\n",
++ phba->brd_no, vport->vpi, ret,
++ cmnd->device->id, cmnd->device->lun,
++ cmnd->serial_number);
+ }
+
+ out:
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0749 SCSI Layer I/O Abort Request "
++ "%d (%d):0749 SCSI Layer I/O Abort Request "
+ "Status x%x ID %d LUN %d snum %#lx\n",
+- phba->brd_no, ret, cmnd->device->id,
++ phba->brd_no, vport->vpi, ret, cmnd->device->id,
+ cmnd->device->lun, cmnd->serial_number);
+
+- spin_unlock_irq(shost->host_lock);
+-
+ return ret;
+ }
+
+@@ -1011,7 +1129,8 @@
+ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
+ {
+ struct Scsi_Host *shost = cmnd->device->host;
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_iocbq *iocbq, *iocbqrsp;
+ struct lpfc_rport_data *rdata = cmnd->device->hostdata;
+@@ -1022,28 +1141,26 @@
+ int cnt, loopcnt;
+
+ lpfc_block_error_handler(cmnd);
+- spin_lock_irq(shost->host_lock);
+ loopcnt = 0;
+ /*
+ * If target is not in a MAPPED state, delay the reset until
+ * target is rediscovered or devloss timeout expires.
+ */
+- while ( 1 ) {
++ while (1) {
+ if (!pnode)
+ goto out;
+
+ if (pnode->nlp_state != NLP_STE_MAPPED_NODE) {
+- spin_unlock_irq(phba->host->host_lock);
+ schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+- spin_lock_irq(phba->host->host_lock);
+ loopcnt++;
+ rdata = cmnd->device->hostdata;
+ if (!rdata ||
+ (loopcnt > ((phba->cfg_devloss_tmo * 2) + 1))) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0721 LUN Reset rport failure:"
+- " cnt x%x rdata x%p\n",
+- phba->brd_no, loopcnt, rdata);
++ "%d (%d):0721 LUN Reset rport "
++ "failure: cnt x%x rdata x%p\n",
++ phba->brd_no, vport->vpi,
++ loopcnt, rdata);
+ goto out;
+ }
+ pnode = rdata->pnode;
+@@ -1054,15 +1171,14 @@
+ break;
+ }
+
+- lpfc_cmd = lpfc_get_scsi_buf (phba);
++ lpfc_cmd = lpfc_get_scsi_buf(phba);
+ if (lpfc_cmd == NULL)
+ goto out;
+
+ lpfc_cmd->timeout = 60;
+- lpfc_cmd->scsi_hba = phba;
+ lpfc_cmd->rdata = rdata;
+
+- ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, cmnd->device->lun,
++ ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, cmnd->device->lun,
+ FCP_TARGET_RESET);
+ if (!ret)
+ goto out_free_scsi_buf;
+@@ -1075,8 +1191,9 @@
+ goto out_free_scsi_buf;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+- "%d:0703 Issue target reset to TGT %d LUN %d rpi x%x "
+- "nlp_flag x%x\n", phba->brd_no, cmnd->device->id,
++ "%d (%d):0703 Issue target reset to TGT %d LUN %d "
++ "rpi x%x nlp_flag x%x\n",
++ phba->brd_no, vport->vpi, cmnd->device->id,
+ cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag);
+
+ iocb_status = lpfc_sli_issue_iocb_wait(phba,
+@@ -1111,9 +1228,7 @@
+ 0, LPFC_CTX_LUN);
+ loopcnt = 0;
+ while(cnt) {
+- spin_unlock_irq(phba->host->host_lock);
+ schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ);
+- spin_lock_irq(phba->host->host_lock);
+
+ if (++loopcnt
+ > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
+@@ -1127,8 +1242,9 @@
+
+ if (cnt) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0719 device reset I/O flush failure: cnt x%x\n",
+- phba->brd_no, cnt);
++ "%d (%d):0719 device reset I/O flush failure: "
++ "cnt x%x\n",
++ phba->brd_no, vport->vpi, cnt);
+ ret = FAILED;
+ }
+
+@@ -1137,13 +1253,12 @@
+ lpfc_release_scsi_buf(phba, lpfc_cmd);
+ }
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0713 SCSI layer issued device reset (%d, %d) "
++ "%d (%d):0713 SCSI layer issued device reset (%d, %d) "
+ "return x%x status x%x result x%x\n",
+- phba->brd_no, cmnd->device->id, cmnd->device->lun,
+- ret, cmd_status, cmd_result);
++ phba->brd_no, vport->vpi, cmnd->device->id,
++ cmnd->device->lun, ret, cmd_status, cmd_result);
+
+ out:
+- spin_unlock_irq(shost->host_lock);
+ return ret;
+ }
+
+@@ -1151,7 +1266,8 @@
+ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd)
+ {
+ struct Scsi_Host *shost = cmnd->device->host;
+- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_nodelist *ndlp = NULL;
+ int match;
+ int ret = FAILED, i, err_count = 0;
+@@ -1159,7 +1275,6 @@
+ struct lpfc_scsi_buf * lpfc_cmd;
+
+ lpfc_block_error_handler(cmnd);
+- spin_lock_irq(shost->host_lock);
+
+ lpfc_cmd = lpfc_get_scsi_buf(phba);
+ if (lpfc_cmd == NULL)
+@@ -1167,7 +1282,6 @@
+
+ /* The lpfc_cmd storage is reused. Set all loop invariants. */
+ lpfc_cmd->timeout = 60;
+- lpfc_cmd->scsi_hba = phba;
+
+ /*
+ * Since the driver manages a single bus device, reset all
+@@ -1177,7 +1291,8 @@
+ for (i = 0; i < LPFC_MAX_TARGET; i++) {
+ /* Search for mapped node by target ID */
+ match = 0;
+- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++ spin_lock_irq(shost->host_lock);
++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
+ i == ndlp->nlp_sid &&
+ ndlp->rport) {
+@@ -1185,15 +1300,18 @@
+ break;
+ }
+ }
++ spin_unlock_irq(shost->host_lock);
+ if (!match)
+ continue;
+
+- ret = lpfc_scsi_tgt_reset(lpfc_cmd, phba, i, cmnd->device->lun,
++ ret = lpfc_scsi_tgt_reset(lpfc_cmd, vport, i,
++ cmnd->device->lun,
+ ndlp->rport->dd_data);
+ if (ret != SUCCESS) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0700 Bus Reset on target %d failed\n",
+- phba->brd_no, i);
++ "%d (%d):0700 Bus Reset on target %d "
++ "failed\n",
++ phba->brd_no, vport->vpi, i);
+ err_count++;
+ break;
+ }
+@@ -1219,9 +1337,7 @@
+ 0, 0, 0, LPFC_CTX_HOST);
+ loopcnt = 0;
+ while(cnt) {
+- spin_unlock_irq(phba->host->host_lock);
+ schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ);
+- spin_lock_irq(phba->host->host_lock);
+
+ if (++loopcnt
+ > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
+@@ -1234,25 +1350,24 @@
+
+ if (cnt) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0715 Bus Reset I/O flush failure: cnt x%x left x%x\n",
+- phba->brd_no, cnt, i);
++ "%d (%d):0715 Bus Reset I/O flush failure: "
++ "cnt x%x left x%x\n",
++ phba->brd_no, vport->vpi, cnt, i);
+ ret = FAILED;
+ }
+
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_FCP,
+- "%d:0714 SCSI layer issued Bus Reset Data: x%x\n",
+- phba->brd_no, ret);
++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++ "%d (%d):0714 SCSI layer issued Bus Reset Data: x%x\n",
++ phba->brd_no, vport->vpi, ret);
+ out:
+- spin_unlock_irq(shost->host_lock);
+ return ret;
+ }
+
+ static int
+ lpfc_slave_alloc(struct scsi_device *sdev)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *)sdev->host->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_scsi_buf *scsi_buf = NULL;
+ struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+ uint32_t total = 0, i;
+@@ -1273,27 +1388,35 @@
+ */
+ total = phba->total_scsi_bufs;
+ num_to_alloc = phba->cfg_lun_queue_depth + 2;
+- if (total >= phba->cfg_hba_queue_depth) {
++
++ /* Allow some exchanges to be available always to complete discovery */
++ if (total >= phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0704 At limitation of %d preallocated "
+- "command buffers\n", phba->brd_no, total);
++ "%d (%d):0704 At limitation of %d "
++ "preallocated command buffers\n",
++ phba->brd_no, vport->vpi, total);
+ return 0;
+- } else if (total + num_to_alloc > phba->cfg_hba_queue_depth) {
++
++ /* Allow some exchanges to be available always to complete discovery */
++ } else if (total + num_to_alloc >
++ phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) {
+ lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+- "%d:0705 Allocation request of %d command "
+- "buffers will exceed max of %d. Reducing "
+- "allocation request to %d.\n", phba->brd_no,
+- num_to_alloc, phba->cfg_hba_queue_depth,
++ "%d (%d):0705 Allocation request of %d "
++ "command buffers will exceed max of %d. "
++ "Reducing allocation request to %d.\n",
++ phba->brd_no, vport->vpi, num_to_alloc,
++ phba->cfg_hba_queue_depth,
+ (phba->cfg_hba_queue_depth - total));
+ num_to_alloc = phba->cfg_hba_queue_depth - total;
+ }
+
+ for (i = 0; i < num_to_alloc; i++) {
+- scsi_buf = lpfc_new_scsi_buf(phba);
++ scsi_buf = lpfc_new_scsi_buf(vport);
+ if (!scsi_buf) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+- "%d:0706 Failed to allocate command "
+- "buffer\n", phba->brd_no);
++ "%d (%d):0706 Failed to allocate "
++ "command buffer\n",
++ phba->brd_no, vport->vpi);
+ break;
+ }
+
+@@ -1308,7 +1431,8 @@
+ static int
+ lpfc_slave_configure(struct scsi_device *sdev)
+ {
+- struct lpfc_hba *phba = (struct lpfc_hba *) sdev->host->hostdata;
++ struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
++ struct lpfc_hba *phba = vport->phba;
+ struct fc_rport *rport = starget_to_rport(sdev->sdev_target);
+
+ if (sdev->tagged_supported)
+@@ -1340,6 +1464,7 @@
+ return;
+ }
+
++
+ struct scsi_host_template lpfc_template = {
+ .module = THIS_MODULE,
+ .name = LPFC_DRIVER_NAME,
+@@ -1352,11 +1477,10 @@
+ .slave_configure = lpfc_slave_configure,
+ .slave_destroy = lpfc_slave_destroy,
+ .scan_finished = lpfc_scan_finished,
+- .scan_start = lpfc_scan_start,
+ .this_id = -1,
+ .sg_tablesize = LPFC_SG_SEG_CNT,
+ .cmd_per_lun = LPFC_CMD_PER_LUN,
+ .use_clustering = ENABLE_CLUSTERING,
+- .shost_attrs = lpfc_host_attrs,
++ .shost_attrs = lpfc_hba_attrs,
+ .max_sectors = 0xFFFF,
+ };
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,7 +1,7 @@
+ /*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for *
+ * Fibre Channel Host Bus Adapters. *
+- * Copyright (C) 2004-2005 Emulex. All rights reserved. *
++ * Copyright (C) 2004-2006 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+@@ -110,7 +110,6 @@
+ struct lpfc_scsi_buf {
+ struct list_head list;
+ struct scsi_cmnd *pCmd;
+- struct lpfc_hba *scsi_hba;
+ struct lpfc_rport_data *rdata;
+
+ uint32_t timeout;
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c 2007-12-21 15:36:12.000000000 -0500
+@@ -38,23 +38,25 @@
+ #include "lpfc_crtn.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_compat.h"
++#include "lpfc_debugfs.h"
+
+ /*
+ * Define macro to log: Mailbox command x%x cannot issue Data
+ * This allows multiple uses of lpfc_msgBlk0311
+ * w/o perturbing log msg utility.
+ */
+-#define LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) \
++#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \
+ lpfc_printf_log(phba, \
+ KERN_INFO, \
+ LOG_MBOX | LOG_SLI, \
+- "%d:0311 Mailbox command x%x cannot issue " \
+- "Data: x%x x%x x%x\n", \
++ "%d (%d):0311 Mailbox command x%x cannot " \
++ "issue Data: x%x x%x x%x\n", \
+ phba->brd_no, \
+- mb->mbxCommand, \
+- phba->hba_state, \
++ pmbox->vport ? pmbox->vport->vpi : 0, \
++ pmbox->mb.mbxCommand, \
++ phba->pport->port_state, \
+ psli->sli_flag, \
+- flag);
++ flag)
+
+
+ /* There are only four IOCB completion types. */
+@@ -65,8 +67,26 @@
+ LPFC_ABORT_IOCB
+ } lpfc_iocb_type;
+
+-struct lpfc_iocbq *
+-lpfc_sli_get_iocbq(struct lpfc_hba * phba)
++ /* SLI-2/SLI-3 provide different sized iocbs. Given a pointer
++ * to the start of the ring, and the slot number of the
++ * desired iocb entry, calc a pointer to that entry.
++ */
++static inline IOCB_t *
++lpfc_cmd_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
++{
++ return (IOCB_t *) (((char *) pring->cmdringaddr) +
++ pring->cmdidx * phba->iocb_cmd_size);
++}
++
++static inline IOCB_t *
++lpfc_resp_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
++{
++ return (IOCB_t *) (((char *) pring->rspringaddr) +
++ pring->rspidx * phba->iocb_rsp_size);
++}
++
++static struct lpfc_iocbq *
++__lpfc_sli_get_iocbq(struct lpfc_hba *phba)
+ {
+ struct list_head *lpfc_iocb_list = &phba->lpfc_iocb_list;
+ struct lpfc_iocbq * iocbq = NULL;
+@@ -75,10 +95,22 @@
+ return iocbq;
+ }
+
++struct lpfc_iocbq *
++lpfc_sli_get_iocbq(struct lpfc_hba *phba)
++{
++ struct lpfc_iocbq * iocbq = NULL;
++ unsigned long iflags;
++
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ iocbq = __lpfc_sli_get_iocbq(phba);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++ return iocbq;
++}
++
+ void
+-lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq)
++__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+ {
+- size_t start_clean = (size_t)(&((struct lpfc_iocbq *)NULL)->iocb);
++ size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
+
+ /*
+ * Clean all volatile data fields, preserve iotag and node struct.
+@@ -87,6 +119,19 @@
+ list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+ }
+
++void
++lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
++{
++ unsigned long iflags;
++
++ /*
++ * Clean all volatile data fields, preserve iotag and node struct.
++ */
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ __lpfc_sli_release_iocbq(phba, iocbq);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++}
++
+ /*
+ * Translate the iocb command to an iocb command type used to decide the final
+ * disposition of each completed IOCB.
+@@ -155,6 +200,9 @@
+ case CMD_RCV_ELS_REQ_CX:
+ case CMD_RCV_SEQUENCE64_CX:
+ case CMD_RCV_ELS_REQ64_CX:
++ case CMD_IOCB_RCV_SEQ64_CX:
++ case CMD_IOCB_RCV_ELS64_CX:
++ case CMD_IOCB_RCV_CONT64_CX:
+ type = LPFC_UNSOL_IOCB;
+ break;
+ default:
+@@ -166,73 +214,77 @@
+ }
+
+ static int
+-lpfc_sli_ring_map(struct lpfc_hba * phba, LPFC_MBOXQ_t *pmb)
++lpfc_sli_ring_map(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli = &phba->sli;
+- MAILBOX_t *pmbox = &pmb->mb;
+- int i, rc;
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *pmbox;
++ int i, rc, ret = 0;
+
++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!pmb)
++ return -ENOMEM;
++ pmbox = &pmb->mb;
++ phba->link_state = LPFC_INIT_MBX_CMDS;
+ for (i = 0; i < psli->num_rings; i++) {
+- phba->hba_state = LPFC_INIT_MBX_CMDS;
+ lpfc_config_ring(phba, i, pmb);
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+ if (rc != MBX_SUCCESS) {
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
+- "%d:0446 Adapter failed to init, "
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++ "%d:0446 Adapter failed to init (%d), "
+ "mbxCmd x%x CFG_RING, mbxStatus x%x, "
+ "ring %d\n",
+- phba->brd_no,
++ phba->brd_no, rc,
+ pmbox->mbxCommand,
+ pmbox->mbxStatus,
+ i);
+- phba->hba_state = LPFC_HBA_ERROR;
+- return -ENXIO;
++ phba->link_state = LPFC_HBA_ERROR;
++ ret = -ENXIO;
++ break;
+ }
+ }
+- return 0;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return ret;
+ }
+
+ static int
+-lpfc_sli_ringtxcmpl_put(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocb)
++lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocb)
+ {
+ list_add_tail(&piocb->list, &pring->txcmplq);
+ pring->txcmplq_cnt++;
+- if (unlikely(pring->ringno == LPFC_ELS_RING))
+- mod_timer(&phba->els_tmofunc,
++ if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
++ (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
++ (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) {
++ if (!piocb->vport)
++ BUG();
++ else
++ mod_timer(&piocb->vport->els_tmofunc,
+ jiffies + HZ * (phba->fc_ratov << 1));
++ }
+
+- return (0);
++
++ return 0;
+ }
+
+ static struct lpfc_iocbq *
+-lpfc_sli_ringtx_get(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+- struct list_head *dlp;
+ struct lpfc_iocbq *cmd_iocb;
+
+- dlp = &pring->txq;
+- cmd_iocb = NULL;
+- list_remove_head((&pring->txq), cmd_iocb,
+- struct lpfc_iocbq,
+- list);
+- if (cmd_iocb) {
+- /* If the first ptr is not equal to the list header,
+- * deque the IOCBQ_t and return it.
+- */
++ list_remove_head((&pring->txq), cmd_iocb, struct lpfc_iocbq, list);
++ if (cmd_iocb != NULL)
+ pring->txq_cnt--;
+- }
+- return (cmd_iocb);
++ return cmd_iocb;
+ }
+
+ static IOCB_t *
+ lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++ &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ uint32_t max_cmd_idx = pring->numCiocb;
+- IOCB_t *iocb = NULL;
+
+ if ((pring->next_cmdidx == pring->cmdidx) &&
+ (++pring->next_cmdidx >= max_cmd_idx))
+@@ -249,15 +301,17 @@
+ phba->brd_no, pring->ringno,
+ pring->local_getidx, max_cmd_idx);
+
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ /*
+ * All error attention handlers are posted to
+ * worker thread
+ */
+ phba->work_ha |= HA_ERATT;
+ phba->work_hs = HS_FFER3;
++
++ /* hbalock should already be held */
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
+
+ return NULL;
+ }
+@@ -266,39 +320,34 @@
+ return NULL;
+ }
+
+- iocb = IOCB_ENTRY(pring->cmdringaddr, pring->cmdidx);
+-
+- return iocb;
++ return lpfc_cmd_iocb(phba, pring);
+ }
+
+ uint16_t
+-lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq)
++lpfc_sli_next_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+ {
+- struct lpfc_iocbq ** new_arr;
+- struct lpfc_iocbq ** old_arr;
++ struct lpfc_iocbq **new_arr;
++ struct lpfc_iocbq **old_arr;
+ size_t new_len;
+ struct lpfc_sli *psli = &phba->sli;
+ uint16_t iotag;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ iotag = psli->last_iotag;
+ if(++iotag < psli->iocbq_lookup_len) {
+ psli->last_iotag = iotag;
+ psli->iocbq_lookup[iotag] = iocbq;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ iocbq->iotag = iotag;
+ return iotag;
+- }
+- else if (psli->iocbq_lookup_len < (0xffff
++ } else if (psli->iocbq_lookup_len < (0xffff
+ - LPFC_IOCBQ_LOOKUP_INCREMENT)) {
+ new_len = psli->iocbq_lookup_len + LPFC_IOCBQ_LOOKUP_INCREMENT;
+- spin_unlock_irq(phba->host->host_lock);
+- new_arr = kmalloc(new_len * sizeof (struct lpfc_iocbq *),
++ spin_unlock_irq(&phba->hbalock);
++ new_arr = kzalloc(new_len * sizeof (struct lpfc_iocbq *),
+ GFP_KERNEL);
+ if (new_arr) {
+- memset((char *)new_arr, 0,
+- new_len * sizeof (struct lpfc_iocbq *));
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ old_arr = psli->iocbq_lookup;
+ if (new_len <= psli->iocbq_lookup_len) {
+ /* highly unprobable case */
+@@ -307,11 +356,11 @@
+ if(++iotag < psli->iocbq_lookup_len) {
+ psli->last_iotag = iotag;
+ psli->iocbq_lookup[iotag] = iocbq;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ iocbq->iotag = iotag;
+ return iotag;
+ }
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ return 0;
+ }
+ if (psli->iocbq_lookup)
+@@ -322,13 +371,13 @@
+ psli->iocbq_lookup_len = new_len;
+ psli->last_iotag = iotag;
+ psli->iocbq_lookup[iotag] = iocbq;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ iocbq->iotag = iotag;
+ kfree(old_arr);
+ return iotag;
+ }
+ } else
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ lpfc_printf_log(phba, KERN_ERR,LOG_SLI,
+ "%d:0318 Failed to allocate IOTAG.last IOTAG is %d\n",
+@@ -349,7 +398,7 @@
+ /*
+ * Issue iocb command to adapter
+ */
+- lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, sizeof (IOCB_t));
++ lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, phba->iocb_cmd_size);
+ wmb();
+ pring->stats.iocb_cmd++;
+
+@@ -361,20 +410,18 @@
+ if (nextiocb->iocb_cmpl)
+ lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb);
+ else
+- lpfc_sli_release_iocbq(phba, nextiocb);
++ __lpfc_sli_release_iocbq(phba, nextiocb);
+
+ /*
+ * Let the HBA know what IOCB slot will be the next one the
+ * driver will put a command into.
+ */
+ pring->cmdidx = pring->next_cmdidx;
+- writel(pring->cmdidx, phba->MBslimaddr
+- + (SLIMOFF + (pring->ringno * 2)) * 4);
++ writel(pring->cmdidx, &phba->host_gp[pring->ringno].cmdPutInx);
+ }
+
+ static void
+-lpfc_sli_update_full_ring(struct lpfc_hba * phba,
+- struct lpfc_sli_ring *pring)
++lpfc_sli_update_full_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ int ringno = pring->ringno;
+
+@@ -393,8 +440,7 @@
+ }
+
+ static void
+-lpfc_sli_update_ring(struct lpfc_hba * phba,
+- struct lpfc_sli_ring *pring)
++lpfc_sli_update_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ int ringno = pring->ringno;
+
+@@ -407,7 +453,7 @@
+ }
+
+ static void
+-lpfc_sli_resume_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ IOCB_t *iocb;
+ struct lpfc_iocbq *nextiocb;
+@@ -420,7 +466,7 @@
+ * (d) IOCB processing is not blocked by the outstanding mbox command.
+ */
+ if (pring->txq_cnt &&
+- (phba->hba_state > LPFC_LINK_DOWN) &&
++ lpfc_is_link_up(phba) &&
+ (pring->ringno != phba->sli.fcp_ring ||
+ phba->sli.sli_flag & LPFC_PROCESS_LA) &&
+ !(pring->flag & LPFC_STOP_IOCB_MBX)) {
+@@ -440,11 +486,15 @@
+
+ /* lpfc_sli_turn_on_ring is only called by lpfc_sli_handle_mb_event below */
+ static void
+-lpfc_sli_turn_on_ring(struct lpfc_hba * phba, int ringno)
++lpfc_sli_turn_on_ring(struct lpfc_hba *phba, int ringno)
+ {
+- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[ringno];
++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[ringno] :
++ &phba->slim2p->mbx.us.s2.port[ringno];
++ unsigned long iflags;
+
+ /* If the ring is active, flag it */
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ if (phba->sli.ring[ringno].cmdringaddr) {
+ if (phba->sli.ring[ringno].flag & LPFC_STOP_IOCB_MBX) {
+ phba->sli.ring[ringno].flag &= ~LPFC_STOP_IOCB_MBX;
+@@ -453,11 +503,176 @@
+ */
+ phba->sli.ring[ringno].local_getidx
+ = le32_to_cpu(pgp->cmdGetInx);
+- spin_lock_irq(phba->host->host_lock);
+ lpfc_sli_resume_iocb(phba, &phba->sli.ring[ringno]);
+- spin_unlock_irq(phba->host->host_lock);
+ }
+ }
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++}
++
++struct lpfc_hbq_entry *
++lpfc_sli_next_hbq_slot(struct lpfc_hba *phba, uint32_t hbqno)
++{
++ struct hbq_s *hbqp = &phba->hbqs[hbqno];
++
++ if (hbqp->next_hbqPutIdx == hbqp->hbqPutIdx &&
++ ++hbqp->next_hbqPutIdx >= hbqp->entry_count)
++ hbqp->next_hbqPutIdx = 0;
++
++ if (unlikely(hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)) {
++ uint32_t raw_index = phba->hbq_get[hbqno];
++ uint32_t getidx = le32_to_cpu(raw_index);
++
++ hbqp->local_hbqGetIdx = getidx;
++
++ if (unlikely(hbqp->local_hbqGetIdx >= hbqp->entry_count)) {
++ lpfc_printf_log(phba, KERN_ERR,
++ LOG_SLI | LOG_VPORT,
++ "%d:1802 HBQ %d: local_hbqGetIdx "
++ "%u is > than hbqp->entry_count %u\n",
++ phba->brd_no, hbqno,
++ hbqp->local_hbqGetIdx,
++ hbqp->entry_count);
++
++ phba->link_state = LPFC_HBA_ERROR;
++ return NULL;
++ }
++
++ if (hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)
++ return NULL;
++ }
++
++ return (struct lpfc_hbq_entry *) phba->hbqslimp.virt + hbqp->hbqPutIdx;
++}
++
++void
++lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
++{
++ struct lpfc_dmabuf *dmabuf, *next_dmabuf;
++ struct hbq_dmabuf *hbq_buf;
++
++ /* Return all memory used by all HBQs */
++ list_for_each_entry_safe(dmabuf, next_dmabuf,
++ &phba->hbq_buffer_list, list) {
++ hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
++ list_del(&hbq_buf->dbuf.list);
++ lpfc_hbq_free(phba, hbq_buf->dbuf.virt, hbq_buf->dbuf.phys);
++ kfree(hbq_buf);
++ }
++}
++
++static void
++lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno,
++ struct hbq_dmabuf *hbq_buf)
++{
++ struct lpfc_hbq_entry *hbqe;
++ dma_addr_t physaddr = hbq_buf->dbuf.phys;
++
++ /* Get next HBQ entry slot to use */
++ hbqe = lpfc_sli_next_hbq_slot(phba, hbqno);
++ if (hbqe) {
++ struct hbq_s *hbqp = &phba->hbqs[hbqno];
++
++ hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
++ hbqe->bde.addrLow = le32_to_cpu(putPaddrLow(physaddr));
++ hbqe->bde.tus.f.bdeSize = FCELSSIZE;
++ hbqe->bde.tus.f.bdeFlags = 0;
++ hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w);
++ hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag);
++ /* Sync SLIM */
++ hbqp->hbqPutIdx = hbqp->next_hbqPutIdx;
++ writel(hbqp->hbqPutIdx, phba->hbq_put + hbqno);
++ /* flush */
++ readl(phba->hbq_put + hbqno);
++ list_add_tail(&hbq_buf->dbuf.list, &phba->hbq_buffer_list);
++ }
++}
++
++static struct lpfc_hbq_init lpfc_els_hbq = {
++ .rn = 1,
++ .entry_count = 200,
++ .mask_count = 0,
++ .profile = 0,
++ .ring_mask = 1 << LPFC_ELS_RING,
++ .buffer_count = 0,
++ .init_count = 20,
++ .add_count = 5,
++};
++
++static struct lpfc_hbq_init *lpfc_hbq_defs[] = {
++ &lpfc_els_hbq,
++};
++
++int
++lpfc_sli_hbqbuf_fill_hbqs(struct lpfc_hba *phba, uint32_t hbqno, uint32_t count)
++{
++ uint32_t i, start, end;
++ struct hbq_dmabuf *hbq_buffer;
++
++ start = lpfc_hbq_defs[hbqno]->buffer_count;
++ end = count + lpfc_hbq_defs[hbqno]->buffer_count;
++ if (end > lpfc_hbq_defs[hbqno]->entry_count) {
++ end = lpfc_hbq_defs[hbqno]->entry_count;
++ }
++
++ /* Populate HBQ entries */
++ for (i = start; i < end; i++) {
++ hbq_buffer = kmalloc(sizeof(struct hbq_dmabuf),
++ GFP_KERNEL);
++ if (!hbq_buffer)
++ return 1;
++ hbq_buffer->dbuf.virt = lpfc_hbq_alloc(phba, MEM_PRI,
++ &hbq_buffer->dbuf.phys);
++ if (hbq_buffer->dbuf.virt == NULL)
++ return 1;
++ hbq_buffer->tag = (i | (hbqno << 16));
++ lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer);
++ lpfc_hbq_defs[hbqno]->buffer_count++;
++ }
++ return 0;
++}
++
++int
++lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *phba, uint32_t qno)
++{
++ return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno,
++ lpfc_hbq_defs[qno]->add_count));
++}
++
++int
++lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *phba, uint32_t qno)
++{
++ return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno,
++ lpfc_hbq_defs[qno]->init_count));
++}
++
++struct hbq_dmabuf *
++lpfc_sli_hbqbuf_find(struct lpfc_hba *phba, uint32_t tag)
++{
++ struct lpfc_dmabuf *d_buf;
++ struct hbq_dmabuf *hbq_buf;
++
++ list_for_each_entry(d_buf, &phba->hbq_buffer_list, list) {
++ hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf);
++ if ((hbq_buf->tag & 0xffff) == tag) {
++ return hbq_buf;
++ }
++ }
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT,
++ "%d:1803 Bad hbq tag. Data: x%x x%x\n",
++ phba->brd_no, tag,
++ lpfc_hbq_defs[tag >> 16]->buffer_count);
++ return NULL;
++}
++
++void
++lpfc_sli_free_hbq(struct lpfc_hba *phba, struct hbq_dmabuf *sp)
++{
++ uint32_t hbqno;
++
++ if (sp) {
++ hbqno = sp->tag >> 16;
++ lpfc_sli_hbq_to_firmware(phba, hbqno, sp);
++ }
+ }
+
+ static int
+@@ -511,32 +726,38 @@
+ case MBX_FLASH_WR_ULA:
+ case MBX_SET_DEBUG:
+ case MBX_LOAD_EXP_ROM:
++ case MBX_REG_VPI:
++ case MBX_UNREG_VPI:
++ case MBX_HEARTBEAT:
+ ret = mbxCommand;
+ break;
+ default:
+ ret = MBX_SHUTDOWN;
+ break;
+ }
+- return (ret);
++ return ret;
+ }
+ static void
+-lpfc_sli_wake_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
++lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
+ {
+ wait_queue_head_t *pdone_q;
++ unsigned long drvr_flag;
+
+ /*
+ * If pdone_q is empty, the driver thread gave up waiting and
+ * continued running.
+ */
+ pmboxq->mbox_flag |= LPFC_MBX_WAKE;
++ spin_lock_irqsave(&phba->hbalock, drvr_flag);
+ pdone_q = (wait_queue_head_t *) pmboxq->context1;
+ if (pdone_q)
+ wake_up_interruptible(pdone_q);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ return;
+ }
+
+ void
+-lpfc_sli_def_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+ struct lpfc_dmabuf *mp;
+ uint16_t rpi;
+@@ -553,78 +774,63 @@
+ * If a REG_LOGIN succeeded after node is destroyed or node
+ * is in re-discovery driver need to cleanup the RPI.
+ */
+- if (!(phba->fc_flag & FC_UNLOADING) &&
+- (pmb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+- (!pmb->mb.mbxStatus)) {
++ if (!(phba->pport->load_flag & FC_UNLOADING) &&
++ pmb->mb.mbxCommand == MBX_REG_LOGIN64 &&
++ !pmb->mb.mbxStatus) {
+
+ rpi = pmb->mb.un.varWords[0];
+- lpfc_unreg_login(phba, rpi, pmb);
+- pmb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++ lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb);
++ pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ if (rc != MBX_NOT_FINISHED)
+ return;
+ }
+
+- mempool_free( pmb, phba->mbox_mem_pool);
++ mempool_free(pmb, phba->mbox_mem_pool);
+ return;
+ }
+
+ int
+-lpfc_sli_handle_mb_event(struct lpfc_hba * phba)
++lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
+ {
+- MAILBOX_t *mbox;
+ MAILBOX_t *pmbox;
+ LPFC_MBOXQ_t *pmb;
+- struct lpfc_sli *psli;
+- int i, rc;
+- uint32_t process_next;
+-
+- psli = &phba->sli;
+- /* We should only get here if we are in SLI2 mode */
+- if (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE)) {
+- return (1);
+- }
++ int rc;
++ LIST_HEAD(cmplq);
+
+ phba->sli.slistat.mbox_event++;
+
++ /* Get all completed mailboxe buffers into the cmplq */
++ spin_lock_irq(&phba->hbalock);
++ list_splice_init(&phba->sli.mboxq_cmpl, &cmplq);
++ spin_unlock_irq(&phba->hbalock);
++
+ /* Get a Mailbox buffer to setup mailbox commands for callback */
+- if ((pmb = phba->sli.mbox_active)) {
+- pmbox = &pmb->mb;
+- mbox = &phba->slim2p->mbx;
++ do {
++ list_remove_head(&cmplq, pmb, LPFC_MBOXQ_t, list);
++ if (pmb == NULL)
++ break;
+
+- /* First check out the status word */
+- lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof (uint32_t));
++ pmbox = &pmb->mb;
+
+- /* Sanity check to ensure the host owns the mailbox */
+- if (pmbox->mbxOwner != OWN_HOST) {
+- /* Lets try for a while */
+- for (i = 0; i < 10240; i++) {
+- /* First copy command data */
+- lpfc_sli_pcimem_bcopy(mbox, pmbox,
+- sizeof (uint32_t));
+- if (pmbox->mbxOwner == OWN_HOST)
+- goto mbout;
++ if (pmbox->mbxCommand != MBX_HEARTBEAT) {
++ if (pmb->vport) {
++ lpfc_debugfs_disc_trc(pmb->vport,
++ LPFC_DISC_TRC_MBOX_VPORT,
++ "MBOX cmpl vport: cmd:x%x mb:x%x x%x",
++ (uint32_t)pmbox->mbxCommand,
++ pmbox->un.varWords[0],
++ pmbox->un.varWords[1]);
++ }
++ else {
++ lpfc_debugfs_disc_trc(phba->pport,
++ LPFC_DISC_TRC_MBOX,
++ "MBOX cmpl: cmd:x%x mb:x%x x%x",
++ (uint32_t)pmbox->mbxCommand,
++ pmbox->un.varWords[0],
++ pmbox->un.varWords[1]);
+ }
+- /* Stray Mailbox Interrupt, mbxCommand <cmd> mbxStatus
+- <status> */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_MBOX | LOG_SLI,
+- "%d:0304 Stray Mailbox Interrupt "
+- "mbxCommand x%x mbxStatus x%x\n",
+- phba->brd_no,
+- pmbox->mbxCommand,
+- pmbox->mbxStatus);
+-
+- spin_lock_irq(phba->host->host_lock);
+- phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
+- return (1);
+ }
+-
+- mbout:
+- del_timer_sync(&phba->sli.mbox_tmo);
+- phba->work_hba_events &= ~WORKER_MBOX_TMO;
+
+ /*
+ * It is a fatal error if unknown mbox command completion.
+@@ -633,51 +839,50 @@
+ MBX_SHUTDOWN) {
+
+ /* Unknow mailbox command compl */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_MBOX | LOG_SLI,
+- "%d:0323 Unknown Mailbox command %x Cmpl\n",
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
++ "%d (%d):0323 Unknown Mailbox command "
++ "%x Cmpl\n",
+ phba->brd_no,
++ pmb->vport ? pmb->vport->vpi : 0,
+ pmbox->mbxCommand);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ phba->work_hs = HS_FFER3;
+ lpfc_handle_eratt(phba);
+- return (0);
++ continue;
+ }
+
+- phba->sli.mbox_active = NULL;
+ if (pmbox->mbxStatus) {
+ phba->sli.slistat.mbox_stat_err++;
+ if (pmbox->mbxStatus == MBXERR_NO_RESOURCES) {
+ /* Mbox cmd cmpl error - RETRYing */
+- lpfc_printf_log(phba,
+- KERN_INFO,
++ lpfc_printf_log(phba, KERN_INFO,
+ LOG_MBOX | LOG_SLI,
+- "%d:0305 Mbox cmd cmpl error - "
+- "RETRYing Data: x%x x%x x%x x%x\n",
++ "%d (%d):0305 Mbox cmd cmpl "
++ "error - RETRYing Data: x%x "
++ "x%x x%x x%x\n",
+ phba->brd_no,
++ pmb->vport ? pmb->vport->vpi :0,
+ pmbox->mbxCommand,
+ pmbox->mbxStatus,
+ pmbox->un.varWords[0],
+- phba->hba_state);
++ pmb->vport->port_state);
+ pmbox->mbxStatus = 0;
+ pmbox->mbxOwner = OWN_HOST;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ if (rc == MBX_SUCCESS)
+- return (0);
++ continue;
+ }
+ }
+
+ /* Mailbox cmd <cmd> Cmpl <cmpl> */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_MBOX | LOG_SLI,
+- "%d:0307 Mailbox cmd x%x Cmpl x%p "
++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++ "%d (%d):0307 Mailbox cmd x%x Cmpl x%p "
+ "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n",
+ phba->brd_no,
++ pmb->vport ? pmb->vport->vpi : 0,
+ pmbox->mbxCommand,
+ pmb->mbox_cmpl,
+ *((uint32_t *) pmbox),
+@@ -690,39 +895,35 @@
+ pmbox->un.varWords[6],
+ pmbox->un.varWords[7]);
+
+- if (pmb->mbox_cmpl) {
+- lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE);
++ if (pmb->mbox_cmpl)
+ pmb->mbox_cmpl(phba,pmb);
+- }
+- }
+-
++ } while (1);
++ return 0;
++}
+
+- do {
+- process_next = 0; /* by default don't loop */
+- spin_lock_irq(phba->host->host_lock);
+- phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++static struct lpfc_dmabuf *
++lpfc_sli_replace_hbqbuff(struct lpfc_hba *phba, uint32_t tag)
++{
++ struct hbq_dmabuf *hbq_entry, *new_hbq_entry;
+
+- /* Process next mailbox command if there is one */
+- if ((pmb = lpfc_mbox_get(phba))) {
+- spin_unlock_irq(phba->host->host_lock);
+- rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+- if (rc == MBX_NOT_FINISHED) {
+- pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+- pmb->mbox_cmpl(phba,pmb);
+- process_next = 1;
+- continue; /* loop back */
+- }
+- } else {
+- spin_unlock_irq(phba->host->host_lock);
+- /* Turn on IOCB processing */
+- for (i = 0; i < phba->sli.num_rings; i++)
+- lpfc_sli_turn_on_ring(phba, i);
++ hbq_entry = lpfc_sli_hbqbuf_find(phba, tag);
++ if (hbq_entry == NULL)
++ return NULL;
++ list_del(&hbq_entry->dbuf.list);
++ new_hbq_entry = kmalloc(sizeof(struct hbq_dmabuf), GFP_ATOMIC);
++ if (new_hbq_entry == NULL)
++ return &hbq_entry->dbuf;
++ new_hbq_entry->dbuf = hbq_entry->dbuf;
++ new_hbq_entry->tag = -1;
++ hbq_entry->dbuf.virt = lpfc_hbq_alloc(phba, 0, &hbq_entry->dbuf.phys);
++ if (hbq_entry->dbuf.virt == NULL) {
++ kfree(new_hbq_entry);
++ return &hbq_entry->dbuf;
+ }
+-
+- } while (process_next);
+-
+- return (0);
++ lpfc_sli_free_hbq(phba, hbq_entry);
++ return &new_hbq_entry->dbuf;
+ }
++
+ static int
+ lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ struct lpfc_iocbq *saveq)
+@@ -735,7 +936,9 @@
+ match = 0;
+ irsp = &(saveq->iocb);
+ if ((irsp->ulpCommand == CMD_RCV_ELS_REQ64_CX)
+- || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)) {
++ || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)
++ || (irsp->ulpCommand == CMD_IOCB_RCV_ELS64_CX)
++ || (irsp->ulpCommand == CMD_IOCB_RCV_CONT64_CX)) {
+ Rctl = FC_ELS_REQ;
+ Type = FC_ELS_DATA;
+ } else {
+@@ -747,13 +950,24 @@
+
+ /* Firmware Workaround */
+ if ((Rctl == 0) && (pring->ringno == LPFC_ELS_RING) &&
+- (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX)) {
++ (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX ||
++ irsp->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
+ Rctl = FC_ELS_REQ;
+ Type = FC_ELS_DATA;
+ w5p->hcsw.Rctl = Rctl;
+ w5p->hcsw.Type = Type;
+ }
+ }
++
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++ if (irsp->ulpBdeCount != 0)
++ saveq->context2 = lpfc_sli_replace_hbqbuff(phba,
++ irsp->un.ulpWord[3]);
++ if (irsp->ulpBdeCount == 2)
++ saveq->context3 = lpfc_sli_replace_hbqbuff(phba,
++ irsp->un.ulpWord[15]);
++ }
++
+ /* unSolicited Responses */
+ if (pring->prt[0].profile) {
+ if (pring->prt[0].lpfc_sli_rcv_unsol_event)
+@@ -781,23 +995,21 @@
+ /* Unexpected Rctl / Type received */
+ /* Ring <ringno> handler: unexpected
+ Rctl <Rctl> Type <Type> received */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_SLI,
++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+ "%d:0313 Ring %d handler: unexpected Rctl x%x "
+- "Type x%x received \n",
++ "Type x%x received\n",
+ phba->brd_no,
+ pring->ringno,
+ Rctl,
+ Type);
+ }
+- return(1);
++ return 1;
+ }
+
+ static struct lpfc_iocbq *
+-lpfc_sli_iocbq_lookup(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring,
+- struct lpfc_iocbq * prspiocb)
++lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
++ struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *prspiocb)
+ {
+ struct lpfc_iocbq *cmd_iocb = NULL;
+ uint16_t iotag;
+@@ -806,7 +1018,7 @@
+
+ if (iotag != 0 && iotag <= phba->sli.last_iotag) {
+ cmd_iocb = phba->sli.iocbq_lookup[iotag];
+- list_del(&cmd_iocb->list);
++ list_del_init(&cmd_iocb->list);
+ pring->txcmplq_cnt--;
+ return cmd_iocb;
+ }
+@@ -821,16 +1033,18 @@
+ }
+
+ static int
+-lpfc_sli_process_sol_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ struct lpfc_iocbq *saveq)
+ {
+- struct lpfc_iocbq * cmdiocbp;
++ struct lpfc_iocbq *cmdiocbp;
+ int rc = 1;
+ unsigned long iflag;
+
+ /* Based on the iotag field, get the cmd IOCB from the txcmplq */
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
++
+ if (cmdiocbp) {
+ if (cmdiocbp->iocb_cmpl) {
+ /*
+@@ -846,17 +1060,8 @@
+ saveq->iocb.un.ulpWord[4] =
+ IOERR_SLI_ABORTED;
+ }
+- spin_unlock_irqrestore(phba->host->host_lock,
+- iflag);
+- (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+ }
+- else {
+- spin_unlock_irqrestore(phba->host->host_lock,
+- iflag);
+ (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- }
+ } else
+ lpfc_sli_release_iocbq(phba, cmdiocbp);
+ } else {
+@@ -870,12 +1075,12 @@
+ * Ring <ringno> handler: unexpected completion IoTag
+ * <IoTag>
+ */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_SLI,
+- "%d:0322 Ring %d handler: unexpected "
+- "completion IoTag x%x Data: x%x x%x x%x x%x\n",
++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
++ "%d (%d):0322 Ring %d handler: "
++ "unexpected completion IoTag x%x "
++ "Data: x%x x%x x%x x%x\n",
+ phba->brd_no,
++ cmdiocbp->vport->vpi,
+ pring->ringno,
+ saveq->iocb.ulpIoTag,
+ saveq->iocb.ulpStatus,
+@@ -885,14 +1090,15 @@
+ }
+ }
+
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ return rc;
+ }
+
+-static void lpfc_sli_rsp_pointers_error(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring)
++static void
++lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++ &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ /*
+ * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+ * rsp ring <portRspMax>
+@@ -904,7 +1110,7 @@
+ le32_to_cpu(pgp->rspPutInx),
+ pring->numRiocb);
+
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+
+ /*
+ * All error attention handlers are posted to
+@@ -912,16 +1118,18 @@
+ */
+ phba->work_ha |= HA_ERATT;
+ phba->work_hs = HS_FFER3;
++
++ /* hbalock should already be held */
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
+
+ return;
+ }
+
+-void lpfc_sli_poll_fcp_ring(struct lpfc_hba * phba)
++void lpfc_sli_poll_fcp_ring(struct lpfc_hba *phba)
+ {
+- struct lpfc_sli * psli = &phba->sli;
+- struct lpfc_sli_ring * pring = &psli->ring[LPFC_FCP_RING];
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_FCP_RING];
+ IOCB_t *irsp = NULL;
+ IOCB_t *entry = NULL;
+ struct lpfc_iocbq *cmdiocbq = NULL;
+@@ -931,13 +1139,15 @@
+ uint32_t portRspPut, portRspMax;
+ int type;
+ uint32_t rsp_cmpl = 0;
+- void __iomem *to_slim;
+ uint32_t ha_copy;
++ unsigned long iflags;
+
+ pring->stats.iocb_event++;
+
+- /* The driver assumes SLI-2 mode */
+- pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++ pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++ &phba->slim2p->mbx.us.s2.port[pring->ringno];
++
+
+ /*
+ * The next available response entry should never exceed the maximum
+@@ -952,15 +1162,13 @@
+
+ rmb();
+ while (pring->rspidx != portRspPut) {
+-
+- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
+-
++ entry = lpfc_resp_iocb(phba, pring);
+ if (++pring->rspidx >= portRspMax)
+ pring->rspidx = 0;
+
+ lpfc_sli_pcimem_bcopy((uint32_t *) entry,
+ (uint32_t *) &rspiocbq.iocb,
+- sizeof (IOCB_t));
++ phba->iocb_rsp_size);
+ irsp = &rspiocbq.iocb;
+ type = lpfc_sli_iocb_cmd_type(irsp->ulpCommand & CMD_IOCB_MASK);
+ pring->stats.iocb_rsp++;
+@@ -998,8 +1206,10 @@
+ break;
+ }
+
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring,
+ &rspiocbq);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ if ((cmdiocbq) && (cmdiocbq->iocb_cmpl)) {
+ (cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ &rspiocbq);
+@@ -1033,9 +1243,7 @@
+ * been updated, sync the pgp->rspPutInx and fetch the new port
+ * response put pointer.
+ */
+- to_slim = phba->MBslimaddr +
+- (SLIMOFF + (pring->ringno * 2) + 1) * 4;
+- writeb(pring->rspidx, to_slim);
++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+
+ if (pring->rspidx == portRspPut)
+ portRspPut = le32_to_cpu(pgp->rspPutInx);
+@@ -1045,13 +1253,16 @@
+ ha_copy >>= (LPFC_FCP_RING * 4);
+
+ if ((rsp_cmpl > 0) && (ha_copy & HA_R0RE_REQ)) {
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ pring->stats.iocb_rsp_full++;
+ status = ((CA_R0ATT | CA_R0RE_RSP) << (LPFC_FCP_RING * 4));
+ writel(status, phba->CAregaddr);
+ readl(phba->CAregaddr);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ }
+ if ((ha_copy & HA_R0CE_RSP) &&
+ (pring->flag & LPFC_CALL_RING_AVAILABLE)) {
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ pring->flag &= ~LPFC_CALL_RING_AVAILABLE;
+ pring->stats.iocb_cmd_empty++;
+
+@@ -1062,6 +1273,7 @@
+ if ((pring->lpfc_sli_cmd_available))
+ (pring->lpfc_sli_cmd_available) (phba, pring);
+
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ }
+
+ return;
+@@ -1072,10 +1284,12 @@
+ * to check it explicitly.
+ */
+ static int
+-lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring, uint32_t mask)
++lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
++ struct lpfc_sli_ring *pring, uint32_t mask)
+ {
+- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++ &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ IOCB_t *irsp = NULL;
+ IOCB_t *entry = NULL;
+ struct lpfc_iocbq *cmdiocbq = NULL;
+@@ -1086,9 +1300,8 @@
+ lpfc_iocb_type type;
+ unsigned long iflag;
+ uint32_t rsp_cmpl = 0;
+- void __iomem *to_slim;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ pring->stats.iocb_event++;
+
+ /*
+@@ -1099,7 +1312,7 @@
+ portRspPut = le32_to_cpu(pgp->rspPutInx);
+ if (unlikely(portRspPut >= portRspMax)) {
+ lpfc_sli_rsp_pointers_error(phba, pring);
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ return 1;
+ }
+
+@@ -1110,14 +1323,15 @@
+ * structure. The copy involves a byte-swap since the
+ * network byte order and pci byte orders are different.
+ */
+- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
++ entry = lpfc_resp_iocb(phba, pring);
++ phba->last_completion_time = jiffies;
+
+ if (++pring->rspidx >= portRspMax)
+ pring->rspidx = 0;
+
+ lpfc_sli_pcimem_bcopy((uint32_t *) entry,
+ (uint32_t *) &rspiocbq.iocb,
+- sizeof (IOCB_t));
++ phba->iocb_rsp_size);
+ INIT_LIST_HEAD(&(rspiocbq.list));
+ irsp = &rspiocbq.iocb;
+
+@@ -1126,14 +1340,28 @@
+ rsp_cmpl++;
+
+ if (unlikely(irsp->ulpStatus)) {
++ /*
++ * If resource errors reported from HBA, reduce
++ * queuedepths of the SCSI device.
++ */
++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
++ lpfc_adjust_queue_depth(phba);
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ }
++
+ /* Rsp ring <ringno> error: IOCB */
+ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+ "%d:0336 Rsp Ring %d error: IOCB Data: "
+ "x%x x%x x%x x%x x%x x%x x%x x%x\n",
+ phba->brd_no, pring->ringno,
+- irsp->un.ulpWord[0], irsp->un.ulpWord[1],
+- irsp->un.ulpWord[2], irsp->un.ulpWord[3],
+- irsp->un.ulpWord[4], irsp->un.ulpWord[5],
++ irsp->un.ulpWord[0],
++ irsp->un.ulpWord[1],
++ irsp->un.ulpWord[2],
++ irsp->un.ulpWord[3],
++ irsp->un.ulpWord[4],
++ irsp->un.ulpWord[5],
+ *(((uint32_t *) irsp) + 6),
+ *(((uint32_t *) irsp) + 7));
+ }
+@@ -1149,7 +1377,8 @@
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "%d:0333 IOCB cmd 0x%x"
+ " processed. Skipping"
+- " completion\n", phba->brd_no,
++ " completion\n",
++ phba->brd_no,
+ irsp->ulpCommand);
+ break;
+ }
+@@ -1161,19 +1390,19 @@
+ (cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ &rspiocbq);
+ } else {
+- spin_unlock_irqrestore(
+- phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock,
++ iflag);
+ (cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ &rspiocbq);
+- spin_lock_irqsave(phba->host->host_lock,
++ spin_lock_irqsave(&phba->hbalock,
+ iflag);
+ }
+ }
+ break;
+ case LPFC_UNSOL_IOCB:
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ lpfc_sli_process_unsol_iocb(phba, pring, &rspiocbq);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ break;
+ default:
+ if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
+@@ -1188,8 +1417,10 @@
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "%d:0334 Unknown IOCB command "
+ "Data: x%x, x%x x%x x%x x%x\n",
+- phba->brd_no, type, irsp->ulpCommand,
+- irsp->ulpStatus, irsp->ulpIoTag,
++ phba->brd_no, type,
++ irsp->ulpCommand,
++ irsp->ulpStatus,
++ irsp->ulpIoTag,
+ irsp->ulpContext);
+ }
+ break;
+@@ -1201,9 +1432,7 @@
+ * been updated, sync the pgp->rspPutInx and fetch the new port
+ * response put pointer.
+ */
+- to_slim = phba->MBslimaddr +
+- (SLIMOFF + (pring->ringno * 2) + 1) * 4;
+- writel(pring->rspidx, to_slim);
++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+
+ if (pring->rspidx == portRspPut)
+ portRspPut = le32_to_cpu(pgp->rspPutInx);
+@@ -1228,31 +1457,31 @@
+
+ }
+
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ return rc;
+ }
+
+-
+ int
+-lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring, uint32_t mask)
++lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
++ struct lpfc_sli_ring *pring, uint32_t mask)
+ {
++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++ &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ IOCB_t *entry;
+ IOCB_t *irsp = NULL;
+ struct lpfc_iocbq *rspiocbp = NULL;
+ struct lpfc_iocbq *next_iocb;
+ struct lpfc_iocbq *cmdiocbp;
+ struct lpfc_iocbq *saveq;
+- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ uint8_t iocb_cmd_type;
+ lpfc_iocb_type type;
+ uint32_t status, free_saveq;
+ uint32_t portRspPut, portRspMax;
+ int rc = 1;
+ unsigned long iflag;
+- void __iomem *to_slim;
+
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ pring->stats.iocb_event++;
+
+ /*
+@@ -1266,16 +1495,14 @@
+ * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+ * rsp ring <portRspMax>
+ */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_SLI,
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "%d:0303 Ring %d handler: portRspPut %d "
+ "is bigger then rsp ring %d\n",
+- phba->brd_no,
+- pring->ringno, portRspPut, portRspMax);
++ phba->brd_no, pring->ringno, portRspPut,
++ portRspMax);
+
+- phba->hba_state = LPFC_HBA_ERROR;
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ phba->link_state = LPFC_HBA_ERROR;
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+ phba->work_hs = HS_FFER3;
+ lpfc_handle_eratt(phba);
+@@ -1298,23 +1525,24 @@
+ * the ulpLe field is set, the entire Command has been
+ * received.
+ */
+- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
+- rspiocbp = lpfc_sli_get_iocbq(phba);
++ entry = lpfc_resp_iocb(phba, pring);
++
++ phba->last_completion_time = jiffies;
++ rspiocbp = __lpfc_sli_get_iocbq(phba);
+ if (rspiocbp == NULL) {
+ printk(KERN_ERR "%s: out of buffers! Failing "
+ "completion.\n", __FUNCTION__);
+ break;
+ }
+
+- lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, sizeof (IOCB_t));
++ lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb,
++ phba->iocb_rsp_size);
+ irsp = &rspiocbp->iocb;
+
+ if (++pring->rspidx >= portRspMax)
+ pring->rspidx = 0;
+
+- to_slim = phba->MBslimaddr + (SLIMOFF + (pring->ringno * 2)
+- + 1) * 4;
+- writel(pring->rspidx, to_slim);
++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+
+ if (list_empty(&(pring->iocb_continueq))) {
+ list_add(&rspiocbp->list, &(pring->iocb_continueq));
+@@ -1338,13 +1566,26 @@
+
+ pring->stats.iocb_rsp++;
+
++ /*
++ * If resource errors reported from HBA, reduce
++ * queuedepths of the SCSI device.
++ */
++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++ (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
++ lpfc_adjust_queue_depth(phba);
++ spin_lock_irqsave(&phba->hbalock, iflag);
++ }
++
+ if (irsp->ulpStatus) {
+ /* Rsp ring <ringno> error: IOCB */
+- lpfc_printf_log(phba,
+- KERN_WARNING,
+- LOG_SLI,
+- "%d:0328 Rsp Ring %d error: IOCB Data: "
+- "x%x x%x x%x x%x x%x x%x x%x x%x\n",
++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
++ "%d:0328 Rsp Ring %d error: "
++ "IOCB Data: "
++ "x%x x%x x%x x%x "
++ "x%x x%x x%x x%x "
++ "x%x x%x x%x x%x "
++ "x%x x%x x%x x%x\n",
+ phba->brd_no,
+ pring->ringno,
+ irsp->un.ulpWord[0],
+@@ -1354,7 +1595,15 @@
+ irsp->un.ulpWord[4],
+ irsp->un.ulpWord[5],
+ *(((uint32_t *) irsp) + 6),
+- *(((uint32_t *) irsp) + 7));
++ *(((uint32_t *) irsp) + 7),
++ *(((uint32_t *) irsp) + 8),
++ *(((uint32_t *) irsp) + 9),
++ *(((uint32_t *) irsp) + 10),
++ *(((uint32_t *) irsp) + 11),
++ *(((uint32_t *) irsp) + 12),
++ *(((uint32_t *) irsp) + 13),
++ *(((uint32_t *) irsp) + 14),
++ *(((uint32_t *) irsp) + 15));
+ }
+
+ /*
+@@ -1366,17 +1615,17 @@
+ iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
+ type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
+ if (type == LPFC_SOL_IOCB) {
+- spin_unlock_irqrestore(phba->host->host_lock,
++ spin_unlock_irqrestore(&phba->hbalock,
+ iflag);
+ rc = lpfc_sli_process_sol_iocb(phba, pring,
+ saveq);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ } else if (type == LPFC_UNSOL_IOCB) {
+- spin_unlock_irqrestore(phba->host->host_lock,
++ spin_unlock_irqrestore(&phba->hbalock,
+ iflag);
+ rc = lpfc_sli_process_unsol_iocb(phba, pring,
+ saveq);
+- spin_lock_irqsave(phba->host->host_lock, iflag);
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ } else if (type == LPFC_ABORT_IOCB) {
+ if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) &&
+ ((cmdiocbp =
+@@ -1386,15 +1635,15 @@
+ routine */
+ if (cmdiocbp->iocb_cmpl) {
+ spin_unlock_irqrestore(
+- phba->host->host_lock,
++ &phba->hbalock,
+ iflag);
+ (cmdiocbp->iocb_cmpl) (phba,
+ cmdiocbp, saveq);
+ spin_lock_irqsave(
+- phba->host->host_lock,
++ &phba->hbalock,
+ iflag);
+ } else
+- lpfc_sli_release_iocbq(phba,
++ __lpfc_sli_release_iocbq(phba,
+ cmdiocbp);
+ }
+ } else if (type == LPFC_UNKNOWN_IOCB) {
+@@ -1411,11 +1660,10 @@
+ phba->brd_no, adaptermsg);
+ } else {
+ /* Unknown IOCB command */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_SLI,
+- "%d:0335 Unknown IOCB command "
+- "Data: x%x x%x x%x x%x\n",
++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
++ "%d:0335 Unknown IOCB "
++ "command Data: x%x "
++ "x%x x%x x%x\n",
+ phba->brd_no,
+ irsp->ulpCommand,
+ irsp->ulpStatus,
+@@ -1425,18 +1673,15 @@
+ }
+
+ if (free_saveq) {
+- if (!list_empty(&saveq->list)) {
+- list_for_each_entry_safe(rspiocbp,
+- next_iocb,
+- &saveq->list,
+- list) {
++ list_for_each_entry_safe(rspiocbp, next_iocb,
++ &saveq->list, list) {
+ list_del(&rspiocbp->list);
+- lpfc_sli_release_iocbq(phba,
++ __lpfc_sli_release_iocbq(phba,
+ rspiocbp);
+ }
++ __lpfc_sli_release_iocbq(phba, saveq);
+ }
+- lpfc_sli_release_iocbq(phba, saveq);
+- }
++ rspiocbp = NULL;
+ }
+
+ /*
+@@ -1449,7 +1694,7 @@
+ }
+ } /* while (pring->rspidx != portRspPut) */
+
+- if ((rspiocbp != 0) && (mask & HA_R0RE_REQ)) {
++ if ((rspiocbp != NULL) && (mask & HA_R0RE_REQ)) {
+ /* At least one response entry has been freed */
+ pring->stats.iocb_rsp_full++;
+ /* SET RxRE_RSP in Chip Att register */
+@@ -1470,24 +1715,25 @@
+
+ }
+
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ return rc;
+ }
+
+-int
++void
+ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ LIST_HEAD(completions);
+ struct lpfc_iocbq *iocb, *next_iocb;
+ IOCB_t *cmd = NULL;
+- int errcnt;
+
+- errcnt = 0;
++ if (pring->ringno == LPFC_ELS_RING) {
++ lpfc_fabric_abort_hba(phba);
++ }
+
+ /* Error everything on txq and txcmplq
+ * First do the txq.
+ */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ list_splice_init(&pring->txq, &completions);
+ pring->txq_cnt = 0;
+
+@@ -1495,26 +1741,25 @@
+ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+ lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ while (!list_empty(&completions)) {
+ iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ cmd = &iocb->iocb;
+- list_del(&iocb->list);
++ list_del_init(&iocb->list);
+
+- if (iocb->iocb_cmpl) {
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
+ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ (iocb->iocb_cmpl) (phba, iocb, iocb);
+- } else
+- lpfc_sli_release_iocbq(phba, iocb);
+ }
+-
+- return errcnt;
++ }
+ }
+
+ int
+-lpfc_sli_brdready(struct lpfc_hba * phba, uint32_t mask)
++lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+ {
+ uint32_t status;
+ int i = 0;
+@@ -1541,7 +1786,8 @@
+ msleep(2500);
+
+ if (i == 15) {
+- phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
++ /* Do post */
++ phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ lpfc_sli_brdrestart(phba);
+ }
+ /* Read the HBA Host Status Register */
+@@ -1550,7 +1796,7 @@
+
+ /* Check to see if any errors occurred during init */
+ if ((status & HS_FFERM) || (i >= 20)) {
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ retval = 1;
+ }
+
+@@ -1559,7 +1805,7 @@
+
+ #define BARRIER_TEST_PATTERN (0xdeadbeef)
+
+-void lpfc_reset_barrier(struct lpfc_hba * phba)
++void lpfc_reset_barrier(struct lpfc_hba *phba)
+ {
+ uint32_t __iomem *resp_buf;
+ uint32_t __iomem *mbox_buf;
+@@ -1584,12 +1830,12 @@
+ hc_copy = readl(phba->HCregaddr);
+ writel((hc_copy & ~HC_ERINT_ENA), phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- phba->fc_flag |= FC_IGNORE_ERATT;
++ phba->link_flag |= LS_IGNORE_ERATT;
+
+ if (readl(phba->HAregaddr) & HA_ERATT) {
+ /* Clear Chip error bit */
+ writel(HA_ERATT, phba->HAregaddr);
+- phba->stopped = 1;
++ phba->pport->stopped = 1;
+ }
+
+ mbox = 0;
+@@ -1606,7 +1852,7 @@
+
+ if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) {
+ if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE ||
+- phba->stopped)
++ phba->pport->stopped)
+ goto restore_hc;
+ else
+ goto clear_errat;
+@@ -1623,17 +1869,17 @@
+
+ if (readl(phba->HAregaddr) & HA_ERATT) {
+ writel(HA_ERATT, phba->HAregaddr);
+- phba->stopped = 1;
++ phba->pport->stopped = 1;
+ }
+
+ restore_hc:
+- phba->fc_flag &= ~FC_IGNORE_ERATT;
++ phba->link_flag &= ~LS_IGNORE_ERATT;
+ writel(hc_copy, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+ }
+
+ int
+-lpfc_sli_brdkill(struct lpfc_hba * phba)
++lpfc_sli_brdkill(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli;
+ LPFC_MBOXQ_t *pmb;
+@@ -1645,26 +1891,22 @@
+ psli = &phba->sli;
+
+ /* Kill HBA */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_SLI,
++ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "%d:0329 Kill HBA Data: x%x x%x\n",
+- phba->brd_no,
+- phba->hba_state,
+- psli->sli_flag);
++ phba->brd_no, phba->pport->port_state, psli->sli_flag);
+
+ if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+ GFP_KERNEL)) == 0)
+ return 1;
+
+ /* Disable the error attention */
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ status = readl(phba->HCregaddr);
+ status &= ~HC_ERINT_ENA;
+ writel(status, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- phba->fc_flag |= FC_IGNORE_ERATT;
+- spin_unlock_irq(phba->host->host_lock);
++ phba->link_flag |= LS_IGNORE_ERATT;
++ spin_unlock_irq(&phba->hbalock);
+
+ lpfc_kill_board(phba, pmb);
+ pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+@@ -1673,9 +1915,9 @@
+ if (retval != MBX_SUCCESS) {
+ if (retval != MBX_BUSY)
+ mempool_free(pmb, phba->mbox_mem_pool);
+- spin_lock_irq(phba->host->host_lock);
+- phba->fc_flag &= ~FC_IGNORE_ERATT;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
++ phba->link_flag &= ~LS_IGNORE_ERATT;
++ spin_unlock_irq(&phba->hbalock);
+ return 1;
+ }
+
+@@ -1698,22 +1940,22 @@
+ del_timer_sync(&psli->mbox_tmo);
+ if (ha_copy & HA_ERATT) {
+ writel(HA_ERATT, phba->HAregaddr);
+- phba->stopped = 1;
++ phba->pport->stopped = 1;
+ }
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+- phba->fc_flag &= ~FC_IGNORE_ERATT;
+- spin_unlock_irq(phba->host->host_lock);
++ phba->link_flag &= ~LS_IGNORE_ERATT;
++ spin_unlock_irq(&phba->hbalock);
+
+ psli->mbox_active = NULL;
+ lpfc_hba_down_post(phba);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+
+- return (ha_copy & HA_ERATT ? 0 : 1);
++ return ha_copy & HA_ERATT ? 0 : 1;
+ }
+
+ int
+-lpfc_sli_brdreset(struct lpfc_hba * phba)
++lpfc_sli_brdreset(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli;
+ struct lpfc_sli_ring *pring;
+@@ -1725,12 +1967,12 @@
+ /* Reset HBA */
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "%d:0325 Reset HBA Data: x%x x%x\n", phba->brd_no,
+- phba->hba_state, psli->sli_flag);
++ phba->pport->port_state, psli->sli_flag);
+
+ /* perform board reset */
+ phba->fc_eventTag = 0;
+- phba->fc_myDID = 0;
+- phba->fc_prevDID = 0;
++ phba->pport->fc_myDID = 0;
++ phba->pport->fc_prevDID = 0;
+
+ /* Turn off parity checking and serr during the physical reset */
+ pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
+@@ -1760,12 +2002,12 @@
+ pring->missbufcnt = 0;
+ }
+
+- phba->hba_state = LPFC_WARM_START;
++ phba->link_state = LPFC_WARM_START;
+ return 0;
+ }
+
+ int
+-lpfc_sli_brdrestart(struct lpfc_hba * phba)
++lpfc_sli_brdrestart(struct lpfc_hba *phba)
+ {
+ MAILBOX_t *mb;
+ struct lpfc_sli *psli;
+@@ -1773,14 +2015,14 @@
+ volatile uint32_t word0;
+ void __iomem *to_slim;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+
+ psli = &phba->sli;
+
+ /* Restart HBA */
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "%d:0337 Restart HBA Data: x%x x%x\n", phba->brd_no,
+- phba->hba_state, psli->sli_flag);
++ phba->pport->port_state, psli->sli_flag);
+
+ word0 = 0;
+ mb = (MAILBOX_t *) &word0;
+@@ -1794,7 +2036,7 @@
+ readl(to_slim); /* flush */
+
+ /* Only skip post after fc_ffinit is completed */
+- if (phba->hba_state) {
++ if (phba->pport->port_state) {
+ skip_post = 1;
+ word0 = 1; /* This is really setting up word1 */
+ } else {
+@@ -1806,10 +2048,10 @@
+ readl(to_slim); /* flush */
+
+ lpfc_sli_brdreset(phba);
+- phba->stopped = 0;
+- phba->hba_state = LPFC_INIT_START;
++ phba->pport->stopped = 0;
++ phba->link_state = LPFC_INIT_START;
+
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
+ psli->stats_start = get_seconds();
+@@ -1843,14 +2085,11 @@
+ if (i++ >= 20) {
+ /* Adapter failed to init, timeout, status reg
+ <status> */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0436 Adapter failed to init, "
+ "timeout, status reg x%x\n",
+- phba->brd_no,
+- status);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->brd_no, status);
++ phba->link_state = LPFC_HBA_ERROR;
+ return -ETIMEDOUT;
+ }
+
+@@ -1859,14 +2098,12 @@
+ /* ERROR: During chipset initialization */
+ /* Adapter failed to init, chipset, status reg
+ <status> */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0437 Adapter failed to init, "
+ "chipset, status reg x%x\n",
+ phba->brd_no,
+ status);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ return -EIO;
+ }
+
+@@ -1879,7 +2116,8 @@
+ }
+
+ if (i == 15) {
+- phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
++ /* Do post */
++ phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ lpfc_sli_brdrestart(phba);
+ }
+ /* Read the HBA Host Status Register */
+@@ -1890,14 +2128,12 @@
+ if (status & HS_FFERM) {
+ /* ERROR: During chipset initialization */
+ /* Adapter failed to init, chipset, status reg <status> */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_INIT,
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0438 Adapter failed to init, chipset, "
+ "status reg x%x\n",
+ phba->brd_no,
+ status);
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ return -EIO;
+ }
+
+@@ -1911,68 +2147,239 @@
+ return 0;
+ }
+
+-int
+-lpfc_sli_hba_setup(struct lpfc_hba * phba)
++static int
++lpfc_sli_hbq_count(void)
+ {
+- LPFC_MBOXQ_t *pmb;
+- uint32_t resetcount = 0, rc = 0, done = 0;
+-
+- pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+- if (!pmb) {
+- phba->hba_state = LPFC_HBA_ERROR;
+- return -ENOMEM;
+- }
++ return ARRAY_SIZE(lpfc_hbq_defs);
++}
+
++static int
++lpfc_sli_hbq_entry_count(void)
++{
++ int hbq_count = lpfc_sli_hbq_count();
++ int count = 0;
++ int i;
++
++ for (i = 0; i < hbq_count; ++i)
++ count += lpfc_hbq_defs[i]->entry_count;
++ return count;
++}
++
++int
++lpfc_sli_hbq_size(void)
++{
++ return lpfc_sli_hbq_entry_count() * sizeof(struct lpfc_hbq_entry);
++}
++
++static int
++lpfc_sli_hbq_setup(struct lpfc_hba *phba)
++{
++ int hbq_count = lpfc_sli_hbq_count();
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *pmbox;
++ uint32_t hbqno;
++ uint32_t hbq_entry_index;
++
++ /* Get a Mailbox buffer to setup mailbox
++ * commands for HBA initialization
++ */
++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++
++ if (!pmb)
++ return -ENOMEM;
++
++ pmbox = &pmb->mb;
++
++ /* Initialize the struct lpfc_sli_hbq structure for each hbq */
++ phba->link_state = LPFC_INIT_MBX_CMDS;
++
++ hbq_entry_index = 0;
++ for (hbqno = 0; hbqno < hbq_count; ++hbqno) {
++ phba->hbqs[hbqno].next_hbqPutIdx = 0;
++ phba->hbqs[hbqno].hbqPutIdx = 0;
++ phba->hbqs[hbqno].local_hbqGetIdx = 0;
++ phba->hbqs[hbqno].entry_count =
++ lpfc_hbq_defs[hbqno]->entry_count;
++ lpfc_config_hbq(phba, lpfc_hbq_defs[hbqno], hbq_entry_index,
++ pmb);
++ hbq_entry_index += phba->hbqs[hbqno].entry_count;
++
++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++ /* Adapter failed to init, mbxCmd <cmd> CFG_RING,
++ mbxStatus <status>, ring <num> */
++
++ lpfc_printf_log(phba, KERN_ERR,
++ LOG_SLI | LOG_VPORT,
++ "%d:1805 Adapter failed to init. "
++ "Data: x%x x%x x%x\n",
++ phba->brd_no, pmbox->mbxCommand,
++ pmbox->mbxStatus, hbqno);
++
++ phba->link_state = LPFC_HBA_ERROR;
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return ENXIO;
++ }
++ }
++ phba->hbq_count = hbq_count;
++
++ mempool_free(pmb, phba->mbox_mem_pool);
++
++ /* Initially populate or replenish the HBQs */
++ for (hbqno = 0; hbqno < hbq_count; ++hbqno) {
++ if (lpfc_sli_hbqbuf_init_hbqs(phba, hbqno))
++ return -ENOMEM;
++ }
++ return 0;
++}
++
++static int
++lpfc_do_config_port(struct lpfc_hba *phba, int sli_mode)
++{
++ LPFC_MBOXQ_t *pmb;
++ uint32_t resetcount = 0, rc = 0, done = 0;
++
++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!pmb) {
++ phba->link_state = LPFC_HBA_ERROR;
++ return -ENOMEM;
++ }
++
++ phba->sli_rev = sli_mode;
+ while (resetcount < 2 && !done) {
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
+- phba->hba_state = LPFC_STATE_UNKNOWN;
++ spin_unlock_irq(&phba->hbalock);
++ phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ lpfc_sli_brdrestart(phba);
+ msleep(2500);
+ rc = lpfc_sli_chipset_init(phba);
+ if (rc)
+ break;
+
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+ resetcount++;
+
+- /* Call pre CONFIG_PORT mailbox command initialization. A value of 0
+- * means the call was successful. Any other nonzero value is a failure,
+- * but if ERESTART is returned, the driver may reset the HBA and try
+- * again.
++ /* Call pre CONFIG_PORT mailbox command initialization. A
++ * value of 0 means the call was successful. Any other
++ * nonzero value is a failure, but if ERESTART is returned,
++ * the driver may reset the HBA and try again.
+ */
+ rc = lpfc_config_port_prep(phba);
+ if (rc == -ERESTART) {
+- phba->hba_state = 0;
++ phba->link_state = LPFC_LINK_UNKNOWN;
+ continue;
+ } else if (rc) {
+ break;
+ }
+
+- phba->hba_state = LPFC_INIT_MBX_CMDS;
++ phba->link_state = LPFC_INIT_MBX_CMDS;
+ lpfc_config_port(phba, pmb);
+ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+- if (rc == MBX_SUCCESS)
+- done = 1;
+- else {
++ if (rc != MBX_SUCCESS) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0442 Adapter failed to init, mbxCmd x%x "
+ "CONFIG_PORT, mbxStatus x%x Data: x%x\n",
+ phba->brd_no, pmb->mb.mbxCommand,
+ pmb->mb.mbxStatus, 0);
++ spin_lock_irq(&phba->hbalock);
+ phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE;
++ spin_unlock_irq(&phba->hbalock);
++ rc = -ENXIO;
++ } else {
++ done = 1;
++ phba->max_vpi = (phba->max_vpi &&
++ pmb->mb.un.varCfgPort.gmv) != 0
++ ? pmb->mb.un.varCfgPort.max_vpi
++ : 0;
+ }
+ }
+- if (!done)
++
++ if (!done) {
++ rc = -EINVAL;
++ goto do_prep_failed;
++ }
++
++ if ((pmb->mb.un.varCfgPort.sli_mode == 3) &&
++ (!pmb->mb.un.varCfgPort.cMA)) {
++ rc = -ENXIO;
++ goto do_prep_failed;
++ }
++ return rc;
++
++do_prep_failed:
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return rc;
++}
++
++int
++lpfc_sli_hba_setup(struct lpfc_hba *phba)
++{
++ uint32_t rc;
++ int mode = 3;
++
++ switch (lpfc_sli_mode) {
++ case 2:
++ if (phba->cfg_npiv_enable) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++ "%d:1824 NPIV enabled: Override lpfc_sli_mode "
++ "parameter (%d) to auto (0).\n",
++ phba->brd_no, lpfc_sli_mode);
++ break;
++ }
++ mode = 2;
++ break;
++ case 0:
++ case 3:
++ break;
++ default:
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++ "%d:1819 Unrecognized lpfc_sli_mode "
++ "parameter: %d.\n",
++ phba->brd_no, lpfc_sli_mode);
++
++ break;
++ }
++
++ rc = lpfc_do_config_port(phba, mode);
++ if (rc && lpfc_sli_mode == 3)
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++ "%d:1820 Unable to select SLI-3. "
++ "Not supported by adapter.\n",
++ phba->brd_no);
++ if (rc && mode != 2)
++ rc = lpfc_do_config_port(phba, 2);
++ if (rc)
++ goto lpfc_sli_hba_setup_error;
++
++ if (phba->sli_rev == 3) {
++ phba->iocb_cmd_size = SLI3_IOCB_CMD_SIZE;
++ phba->iocb_rsp_size = SLI3_IOCB_RSP_SIZE;
++ phba->sli3_options |= LPFC_SLI3_ENABLED;
++ phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED;
++
++ } else {
++ phba->iocb_cmd_size = SLI2_IOCB_CMD_SIZE;
++ phba->iocb_rsp_size = SLI2_IOCB_RSP_SIZE;
++ phba->sli3_options = 0;
++ }
++
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++ "%d:0444 Firmware in SLI %x mode. Max_vpi %d\n",
++ phba->brd_no, phba->sli_rev, phba->max_vpi);
++ rc = lpfc_sli_ring_map(phba);
++
++ if (rc)
+ goto lpfc_sli_hba_setup_error;
+
+- rc = lpfc_sli_ring_map(phba, pmb);
++ /* Init HBQs */
+
++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++ rc = lpfc_sli_hbq_setup(phba);
+ if (rc)
+ goto lpfc_sli_hba_setup_error;
++ }
+
+ phba->sli.sli_flag |= LPFC_PROCESS_LA;
+
+@@ -1980,11 +2387,13 @@
+ if (rc)
+ goto lpfc_sli_hba_setup_error;
+
+- goto lpfc_sli_hba_setup_exit;
++ return rc;
++
+ lpfc_sli_hba_setup_error:
+- phba->hba_state = LPFC_HBA_ERROR;
+-lpfc_sli_hba_setup_exit:
+- mempool_free(pmb, phba->mbox_mem_pool);
++ phba->link_state = LPFC_HBA_ERROR;
++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++ "%d:0445 Firmware initialization failed\n",
++ phba->brd_no);
+ return rc;
+ }
+
+@@ -2004,44 +2413,43 @@
+ void
+ lpfc_mbox_timeout(unsigned long ptr)
+ {
+- struct lpfc_hba *phba;
++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr;
+ unsigned long iflag;
++ uint32_t tmo_posted;
++
++ spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
++ tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO;
++ if (!tmo_posted)
++ phba->pport->work_port_events |= WORKER_MBOX_TMO;
++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
+
+- phba = (struct lpfc_hba *)ptr;
+- spin_lock_irqsave(phba->host->host_lock, iflag);
+- if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
+- phba->work_hba_events |= WORKER_MBOX_TMO;
++ if (!tmo_posted) {
++ spin_lock_irqsave(&phba->hbalock, iflag);
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
++ lpfc_worker_wake_up(phba);
++ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+- spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ }
+
+ void
+ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
+ {
+- LPFC_MBOXQ_t *pmbox;
+- MAILBOX_t *mb;
++ LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
++ MAILBOX_t *mb = &pmbox->mb;
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
+
+- spin_lock_irq(phba->host->host_lock);
+- if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
+- spin_unlock_irq(phba->host->host_lock);
++ if (!(phba->pport->work_port_events & WORKER_MBOX_TMO)) {
+ return;
+ }
+
+- pmbox = phba->sli.mbox_active;
+- mb = &pmbox->mb;
+-
+ /* Mbox cmd <mbxCommand> timeout */
+- lpfc_printf_log(phba,
+- KERN_ERR,
+- LOG_MBOX | LOG_SLI,
+- "%d:0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
++ "%d:0310 Mailbox command x%x timeout Data: x%x x%x "
++ "x%p\n",
+ phba->brd_no,
+ mb->mbxCommand,
+- phba->hba_state,
++ phba->pport->port_state,
+ phba->sli.sli_flag,
+ phba->sli.mbox_active);
+
+@@ -2049,11 +2457,14 @@
+ * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
+ * it to fail all oustanding SCSI IO.
+ */
+- phba->hba_state = LPFC_STATE_UNKNOWN;
+- phba->work_hba_events &= ~WORKER_MBOX_TMO;
+- phba->fc_flag |= FC_ESTABLISH_LINK;
++ spin_lock_irq(&phba->pport->work_port_lock);
++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++ spin_unlock_irq(&phba->pport->work_port_lock);
++ spin_lock_irq(&phba->hbalock);
++ phba->link_state = LPFC_LINK_UNKNOWN;
++ phba->pport->fc_flag |= FC_ESTABLISH_LINK;
+ psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+- spin_unlock_irq(phba->host->host_lock);
++ spin_unlock_irq(&phba->hbalock);
+
+ pring = &psli->ring[psli->fcp_ring];
+ lpfc_sli_abort_iocb_ring(phba, pring);
+@@ -2075,10 +2486,10 @@
+ }
+
+ int
+-lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
++lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+ {
+ MAILBOX_t *mb;
+- struct lpfc_sli *psli;
++ struct lpfc_sli *psli = &phba->sli;
+ uint32_t status, evtctr;
+ uint32_t ha_copy;
+ int i;
+@@ -2086,31 +2497,44 @@
+ volatile uint32_t word0, ldata;
+ void __iomem *to_slim;
+
++ if (pmbox->mbox_cmpl && pmbox->mbox_cmpl != lpfc_sli_def_mbox_cmpl &&
++ pmbox->mbox_cmpl != lpfc_sli_wake_mbox_wait) {
++ if(!pmbox->vport) {
++ lpfc_printf_log(phba, KERN_ERR,
++ LOG_MBOX | LOG_VPORT,
++ "%d:1806 Mbox x%x failed. No vport\n",
++ phba->brd_no,
++ pmbox->mb.mbxCommand);
++ dump_stack();
++ return MBXERR_ERROR;
++ }
++ }
++
++
+ /* If the PCI channel is in offline state, do not post mbox. */
+ if (unlikely(pci_channel_offline(phba->pcidev)))
+ return MBX_NOT_FINISHED;
+
++ spin_lock_irqsave(&phba->hbalock, drvr_flag);
+ psli = &phba->sli;
+
+- spin_lock_irqsave(phba->host->host_lock, drvr_flag);
+-
+
+ mb = &pmbox->mb;
+ status = MBX_SUCCESS;
+
+- if (phba->hba_state == LPFC_HBA_ERROR) {
+- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
++ if (phba->link_state == LPFC_HBA_ERROR) {
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+
+ /* Mbox command <mbxCommand> cannot issue */
+- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+- return (MBX_NOT_FINISHED);
++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag)
++ return MBX_NOT_FINISHED;
+ }
+
+ if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT &&
+ !(readl(phba->HCregaddr) & HC_MBINT_ENA)) {
+- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
+- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+- return (MBX_NOT_FINISHED);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag)
++ return MBX_NOT_FINISHED;
+ }
+
+ if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+@@ -2120,20 +2544,18 @@
+ */
+
+ if (flag & MBX_POLL) {
+- spin_unlock_irqrestore(phba->host->host_lock,
+- drvr_flag);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+
+ /* Mbox command <mbxCommand> cannot issue */
+- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+- return (MBX_NOT_FINISHED);
++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++ return MBX_NOT_FINISHED;
+ }
+
+ if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) {
+- spin_unlock_irqrestore(phba->host->host_lock,
+- drvr_flag);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ /* Mbox command <mbxCommand> cannot issue */
+- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+- return (MBX_NOT_FINISHED);
++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++ return MBX_NOT_FINISHED;
+ }
+
+ /* Handle STOP IOCB processing flag. This is only meaningful
+@@ -2157,21 +2579,33 @@
+ lpfc_mbox_put(phba, pmbox);
+
+ /* Mbox cmd issue - BUSY */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_MBOX | LOG_SLI,
+- "%d:0308 Mbox cmd issue - BUSY Data: x%x x%x x%x x%x\n",
++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++ "%d (%d):0308 Mbox cmd issue - BUSY Data: "
++ "x%x x%x x%x x%x\n",
+ phba->brd_no,
+- mb->mbxCommand,
+- phba->hba_state,
+- psli->sli_flag,
+- flag);
++ pmbox->vport ? pmbox->vport->vpi : 0xffffff,
++ mb->mbxCommand, phba->pport->port_state,
++ psli->sli_flag, flag);
+
+ psli->slistat.mbox_busy++;
+- spin_unlock_irqrestore(phba->host->host_lock,
+- drvr_flag);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+
+- return (MBX_BUSY);
++ if (pmbox->vport) {
++ lpfc_debugfs_disc_trc(pmbox->vport,
++ LPFC_DISC_TRC_MBOX_VPORT,
++ "MBOX Bsy vport: cmd:x%x mb:x%x x%x",
++ (uint32_t)mb->mbxCommand,
++ mb->un.varWords[0], mb->un.varWords[1]);
++ }
++ else {
++ lpfc_debugfs_disc_trc(phba->pport,
++ LPFC_DISC_TRC_MBOX,
++ "MBOX Bsy: cmd:x%x mb:x%x x%x",
++ (uint32_t)mb->mbxCommand,
++ mb->un.varWords[0], mb->un.varWords[1]);
++ }
++
++ return MBX_BUSY;
+ }
+
+ /* Handle STOP IOCB processing flag. This is only meaningful
+@@ -2198,11 +2632,10 @@
+ if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) &&
+ (mb->mbxCommand != MBX_KILL_BOARD)) {
+ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irqrestore(phba->host->host_lock,
+- drvr_flag);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ /* Mbox command <mbxCommand> cannot issue */
+- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag);
+- return (MBX_NOT_FINISHED);
++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++ return MBX_NOT_FINISHED;
+ }
+ /* timeout active mbox command */
+ mod_timer(&psli->mbox_tmo, (jiffies +
+@@ -2210,15 +2643,29 @@
+ }
+
+ /* Mailbox cmd <cmd> issue */
+- lpfc_printf_log(phba,
+- KERN_INFO,
+- LOG_MBOX | LOG_SLI,
+- "%d:0309 Mailbox cmd x%x issue Data: x%x x%x x%x\n",
+- phba->brd_no,
+- mb->mbxCommand,
+- phba->hba_state,
+- psli->sli_flag,
+- flag);
++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++ "%d (%d):0309 Mailbox cmd x%x issue Data: x%x x%x "
++ "x%x\n",
++ phba->brd_no, pmbox->vport ? pmbox->vport->vpi : 0,
++ mb->mbxCommand, phba->pport->port_state,
++ psli->sli_flag, flag);
++
++ if (mb->mbxCommand != MBX_HEARTBEAT) {
++ if (pmbox->vport) {
++ lpfc_debugfs_disc_trc(pmbox->vport,
++ LPFC_DISC_TRC_MBOX_VPORT,
++ "MBOX Send vport: cmd:x%x mb:x%x x%x",
++ (uint32_t)mb->mbxCommand,
++ mb->un.varWords[0], mb->un.varWords[1]);
++ }
++ else {
++ lpfc_debugfs_disc_trc(phba->pport,
++ LPFC_DISC_TRC_MBOX,
++ "MBOX Send: cmd:x%x mb:x%x x%x",
++ (uint32_t)mb->mbxCommand,
++ mb->un.varWords[0], mb->un.varWords[1]);
++ }
++ }
+
+ psli->slistat.mbox_cmd++;
+ evtctr = psli->slistat.mbox_event;
+@@ -2285,12 +2732,12 @@
+ /* Wait for command to complete */
+ while (((word0 & OWN_CHIP) == OWN_CHIP) ||
+ (!(ha_copy & HA_MBATT) &&
+- (phba->hba_state > LPFC_WARM_START))) {
++ (phba->link_state > LPFC_WARM_START))) {
+ if (i-- <= 0) {
+ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+- spin_unlock_irqrestore(phba->host->host_lock,
++ spin_unlock_irqrestore(&phba->hbalock,
+ drvr_flag);
+- return (MBX_NOT_FINISHED);
++ return MBX_NOT_FINISHED;
+ }
+
+ /* Check if we took a mbox interrupt while we were
+@@ -2299,12 +2746,12 @@
+ && (evtctr != psli->slistat.mbox_event))
+ break;
+
+- spin_unlock_irqrestore(phba->host->host_lock,
++ spin_unlock_irqrestore(&phba->hbalock,
+ drvr_flag);
+
+ msleep(1);
+
+- spin_lock_irqsave(phba->host->host_lock, drvr_flag);
++ spin_lock_irqsave(&phba->hbalock, drvr_flag);
+
+ if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+ /* First copy command data */
+@@ -2355,23 +2802,25 @@
+ status = mb->mbxStatus;
+ }
+
+- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
+- return (status);
++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++ return status;
+ }
+
+-static int
+-lpfc_sli_ringtx_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
+- struct lpfc_iocbq * piocb)
++/*
++ * Caller needs to hold lock.
++ */
++static void
++__lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocb)
+ {
+ /* Insert the caller's iocb in the txq tail for later processing. */
+ list_add_tail(&piocb->list, &pring->txq);
+ pring->txq_cnt++;
+- return (0);
+ }
+
+ static struct lpfc_iocbq *
+ lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+- struct lpfc_iocbq ** piocb)
++ struct lpfc_iocbq **piocb)
+ {
+ struct lpfc_iocbq * nextiocb;
+
+@@ -2384,13 +2833,29 @@
+ return nextiocb;
+ }
+
++/*
++ * Lockless version of lpfc_sli_issue_iocb.
++ */
+ int
+-lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ struct lpfc_iocbq *piocb, uint32_t flag)
+ {
+ struct lpfc_iocbq *nextiocb;
+ IOCB_t *iocb;
+
++ if (piocb->iocb_cmpl && (!piocb->vport) &&
++ (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
++ (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) {
++ lpfc_printf_log(phba, KERN_ERR,
++ LOG_SLI | LOG_VPORT,
++ "%d:1807 IOCB x%x failed. No vport\n",
++ phba->brd_no,
++ piocb->iocb.ulpCommand);
++ dump_stack();
++ return IOCB_ERROR;
++ }
++
++
+ /* If the PCI channel is in offline state, do not post iocbs. */
+ if (unlikely(pci_channel_offline(phba->pcidev)))
+ return IOCB_ERROR;
+@@ -2398,7 +2863,7 @@
+ /*
+ * We should never get an IOCB if we are in a < LINK_DOWN state
+ */
+- if (unlikely(phba->hba_state < LPFC_LINK_DOWN))
++ if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+ return IOCB_ERROR;
+
+ /*
+@@ -2408,7 +2873,7 @@
+ if (unlikely(pring->flag & LPFC_STOP_IOCB_MBX))
+ goto iocb_busy;
+
+- if (unlikely(phba->hba_state == LPFC_LINK_DOWN)) {
++ if (unlikely(phba->link_state == LPFC_LINK_DOWN)) {
+ /*
+ * Only CREATE_XRI, CLOSE_XRI, and QUE_RING_BUF
+ * can be issued if the link is not up.
+@@ -2436,8 +2901,9 @@
+ * attention events.
+ */
+ } else if (unlikely(pring->ringno == phba->sli.fcp_ring &&
+- !(phba->sli.sli_flag & LPFC_PROCESS_LA)))
++ !(phba->sli.sli_flag & LPFC_PROCESS_LA))) {
+ goto iocb_busy;
++ }
+
+ while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
+ (nextiocb = lpfc_sli_next_iocb(phba, pring, &piocb)))
+@@ -2459,13 +2925,28 @@
+ out_busy:
+
+ if (!(flag & SLI_IOCB_RET_IOCB)) {
+- lpfc_sli_ringtx_put(phba, pring, piocb);
++ __lpfc_sli_ringtx_put(phba, pring, piocb);
+ return IOCB_SUCCESS;
+ }
+
+ return IOCB_BUSY;
+ }
+
++
++int
++lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocb, uint32_t flag)
++{
++ unsigned long iflags;
++ int rc;
++
++ spin_lock_irqsave(&phba->hbalock, iflags);
++ rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
++
++ return rc;
++}
++
+ static int
+ lpfc_extra_ring_setup( struct lpfc_hba *phba)
+ {
+@@ -2504,7 +2985,7 @@
+ int
+ lpfc_sli_setup(struct lpfc_hba *phba)
+ {
+- int i, totiocb = 0;
++ int i, totiocbsize = 0;
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
+
+@@ -2529,6 +3010,12 @@
+ pring->numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES;
+ pring->numCiocb += SLI2_IOCB_CMD_R3XTRA_ENTRIES;
+ pring->numRiocb += SLI2_IOCB_RSP_R3XTRA_ENTRIES;
++ pring->sizeCiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_CMD_SIZE :
++ SLI2_IOCB_CMD_SIZE;
++ pring->sizeRiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_RSP_SIZE :
++ SLI2_IOCB_RSP_SIZE;
+ pring->iotag_ctr = 0;
+ pring->iotag_max =
+ (phba->cfg_hba_queue_depth * 2);
+@@ -2539,12 +3026,25 @@
+ /* numCiocb and numRiocb are used in config_port */
+ pring->numCiocb = SLI2_IOCB_CMD_R1_ENTRIES;
+ pring->numRiocb = SLI2_IOCB_RSP_R1_ENTRIES;
++ pring->sizeCiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_CMD_SIZE :
++ SLI2_IOCB_CMD_SIZE;
++ pring->sizeRiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_RSP_SIZE :
++ SLI2_IOCB_RSP_SIZE;
++ pring->iotag_max = phba->cfg_hba_queue_depth;
+ pring->num_mask = 0;
+ break;
+ case LPFC_ELS_RING: /* ring 2 - ELS / CT */
+ /* numCiocb and numRiocb are used in config_port */
+ pring->numCiocb = SLI2_IOCB_CMD_R2_ENTRIES;
+ pring->numRiocb = SLI2_IOCB_RSP_R2_ENTRIES;
++ pring->sizeCiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_CMD_SIZE :
++ SLI2_IOCB_CMD_SIZE;
++ pring->sizeRiocb = (phba->sli_rev == 3) ?
++ SLI3_IOCB_RSP_SIZE :
++ SLI2_IOCB_RSP_SIZE;
+ pring->fast_iotag = 0;
+ pring->iotag_ctr = 0;
+ pring->iotag_max = 4096;
+@@ -2575,14 +3075,16 @@
+ lpfc_ct_unsol_event;
+ break;
+ }
+- totiocb += (pring->numCiocb + pring->numRiocb);
++ totiocbsize += (pring->numCiocb * pring->sizeCiocb) +
++ (pring->numRiocb * pring->sizeRiocb);
+ }
+- if (totiocb > MAX_SLI2_IOCB) {
++ if (totiocbsize > MAX_SLIM_IOCB_SIZE) {
+ /* Too many cmd / rsp ring entries in SLI2 SLIM */
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0462 Too many cmd / rsp ring entries in "
+- "SLI2 SLIM Data: x%x x%x\n",
+- phba->brd_no, totiocb, MAX_SLI2_IOCB);
++ "SLI2 SLIM Data: x%x x%lx\n",
++ phba->brd_no, totiocbsize,
++ (unsigned long) MAX_SLIM_IOCB_SIZE);
+ }
+ if (phba->cfg_multi_ring_support == 2)
+ lpfc_extra_ring_setup(phba);
+@@ -2591,15 +3093,16 @@
+ }
+
+ int
+-lpfc_sli_queue_setup(struct lpfc_hba * phba)
++lpfc_sli_queue_setup(struct lpfc_hba *phba)
+ {
+ struct lpfc_sli *psli;
+ struct lpfc_sli_ring *pring;
+ int i;
+
+ psli = &phba->sli;
+- spin_lock_irq(phba->host->host_lock);
++ spin_lock_irq(&phba->hbalock);
+ INIT_LIST_HEAD(&psli->mboxq);
++ INIT_LIST_HEAD(&psli->mboxq_cmpl);
+ /* Initialize list headers for txq and txcmplq as double linked lists */
+ for (i = 0; i < psli->num_rings; i++) {
+ pring = &psli->ring[i];
+@@ -2612,15 +3115,73 @@
+ INIT_LIST_HEAD(&pring->iocb_continueq);
+ INIT_LIST_HEAD(&pring->postbufq);
+ }
+- spin_unlock_irq(phba->host->host_lock);
+- return (1);
++ spin_unlock_irq(&phba->hbalock);
++ return 1;
+ }
+
+ int
+-lpfc_sli_hba_down(struct lpfc_hba * phba)
++lpfc_sli_host_down(struct lpfc_vport *vport)
+ {
+ LIST_HEAD(completions);
+- struct lpfc_sli *psli;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_sli *psli = &phba->sli;
++ struct lpfc_sli_ring *pring;
++ struct lpfc_iocbq *iocb, *next_iocb;
++ int i;
++ unsigned long flags = 0;
++ uint16_t prev_pring_flag;
++
++ lpfc_cleanup_discovery_resources(vport);
++
++ spin_lock_irqsave(&phba->hbalock, flags);
++ for (i = 0; i < psli->num_rings; i++) {
++ pring = &psli->ring[i];
++ prev_pring_flag = pring->flag;
++ if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */
++ pring->flag |= LPFC_DEFERRED_RING_EVENT;
++ /*
++ * Error everything on the txq since these iocbs have not been
++ * given to the FW yet.
++ */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++ if (iocb->vport != vport)
++ continue;
++ list_move_tail(&iocb->list, &completions);
++ pring->txq_cnt--;
++ }
++
++ /* Next issue ABTS for everything on the txcmplq */
++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++ list) {
++ if (iocb->vport != vport)
++ continue;
++ lpfc_sli_issue_abort_iotag(phba, pring, iocb);
++ }
++
++ pring->flag = prev_pring_flag;
++ }
++
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++
++ while (!list_empty(&completions)) {
++ list_remove_head(&completions, iocb, struct lpfc_iocbq, list);
++
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
++ iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT;
++ iocb->iocb.un.ulpWord[4] = IOERR_SLI_DOWN;
++ (iocb->iocb_cmpl) (phba, iocb, iocb);
++ }
++ }
++ return 1;
++}
++
++int
++lpfc_sli_hba_down(struct lpfc_hba *phba)
++{
++ LIST_HEAD(completions);
++ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
+ LPFC_MBOXQ_t *pmb;
+ struct lpfc_iocbq *iocb;
+@@ -2628,12 +3189,14 @@
+ int i;
+ unsigned long flags = 0;
+
+- psli = &phba->sli;
+ lpfc_hba_down_prep(phba);
+
+- spin_lock_irqsave(phba->host->host_lock, flags);
++ lpfc_fabric_abort_hba(phba);
++
++ spin_lock_irqsave(&phba->hbalock, flags);
+ for (i = 0; i < psli->num_rings; i++) {
+ pring = &psli->ring[i];
++ if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */
+ pring->flag |= LPFC_DEFERRED_RING_EVENT;
+
+ /*
+@@ -2644,51 +3207,50 @@
+ pring->txq_cnt = 0;
+
+ }
+- spin_unlock_irqrestore(phba->host->host_lock, flags);
++ spin_unlock_irqrestore(&phba->hbalock, flags);
+
+ while (!list_empty(&completions)) {
+- iocb = list_get_first(&completions, struct lpfc_iocbq, list);
++ list_remove_head(&completions, iocb, struct lpfc_iocbq, list);
+ cmd = &iocb->iocb;
+- list_del(&iocb->list);
+
+- if (iocb->iocb_cmpl) {
++ if (!iocb->iocb_cmpl)
++ lpfc_sli_release_iocbq(phba, iocb);
++ else {
+ cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ cmd->un.ulpWord[4] = IOERR_SLI_DOWN;
+ (iocb->iocb_cmpl) (phba, iocb, iocb);
+- } else
+- lpfc_sli_release_iocbq(phba, iocb);
++ }
+ }
+
+ /* Return any active mbox cmds */
+ del_timer_sync(&psli->mbox_tmo);
+- spin_lock_irqsave(phba->host->host_lock, flags);
+- phba->work_hba_events &= ~WORKER_MBOX_TMO;
++ spin_lock_irqsave(&phba->hbalock, flags);
++
++ spin_lock(&phba->pport->work_port_lock);
++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++ spin_unlock(&phba->pport->work_port_lock);
++
+ if (psli->mbox_active) {
+- pmb = psli->mbox_active;
+- pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+- if (pmb->mbox_cmpl) {
+- spin_unlock_irqrestore(phba->host->host_lock, flags);
+- pmb->mbox_cmpl(phba,pmb);
+- spin_lock_irqsave(phba->host->host_lock, flags);
+- }
+- }
+- psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ list_add_tail(&psli->mbox_active->list, &completions);
+ psli->mbox_active = NULL;
++ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ }
+
+- /* Return any pending mbox cmds */
+- while ((pmb = lpfc_mbox_get(phba)) != NULL) {
++ /* Return any pending or completed mbox cmds */
++ list_splice_init(&phba->sli.mboxq, &completions);
++ list_splice_init(&phba->sli.mboxq_cmpl, &completions);
++ INIT_LIST_HEAD(&psli->mboxq);
++ INIT_LIST_HEAD(&psli->mboxq_cmpl);
++
++ spin_unlock_irqrestore(&phba->hbalock, flags);
++
++ while (!list_empty(&completions)) {
++ list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
+ pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+ if (pmb->mbox_cmpl) {
+- spin_unlock_irqrestore(phba->host->host_lock, flags);
+ pmb->mbox_cmpl(phba,pmb);
+- spin_lock_irqsave(phba->host->host_lock, flags);
+ }
+ }
+-
+- INIT_LIST_HEAD(&psli->mboxq);
+-
+- spin_unlock_irqrestore(phba->host->host_lock, flags);
+-
+ return 1;
+ }
+
+@@ -2710,14 +3272,15 @@
+ }
+
+ int
+-lpfc_sli_ringpostbuf_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
+- struct lpfc_dmabuf * mp)
++lpfc_sli_ringpostbuf_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_dmabuf *mp)
+ {
+ /* Stick struct lpfc_dmabuf at end of postbufq so driver can look it up
+ later */
++ spin_lock_irq(&phba->hbalock);
+ list_add_tail(&mp->list, &pring->postbufq);
+-
+ pring->postbufq_cnt++;
++ spin_unlock_irq(&phba->hbalock);
+ return 0;
+ }
+
+@@ -2730,14 +3293,17 @@
+ struct list_head *slp = &pring->postbufq;
+
+ /* Search postbufq, from the begining, looking for a match on phys */
++ spin_lock_irq(&phba->hbalock);
+ list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
+ if (mp->phys == phys) {
+ list_del_init(&mp->list);
+ pring->postbufq_cnt--;
++ spin_unlock_irq(&phba->hbalock);
+ return mp;
+ }
+ }
+
++ spin_unlock_irq(&phba->hbalock);
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "%d:0410 Cannot find virtual addr for mapped buf on "
+ "ring %d Data x%llx x%p x%p x%x\n",
+@@ -2747,92 +3313,110 @@
+ }
+
+ static void
+-lpfc_sli_abort_els_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- IOCB_t *irsp;
++ IOCB_t *irsp = &rspiocb->iocb;
+ uint16_t abort_iotag, abort_context;
+- struct lpfc_iocbq *abort_iocb, *rsp_ab_iocb;
++ struct lpfc_iocbq *abort_iocb;
+ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+
+ abort_iocb = NULL;
+- irsp = &rspiocb->iocb;
+-
+- spin_lock_irq(phba->host->host_lock);
+
+ if (irsp->ulpStatus) {
+ abort_context = cmdiocb->iocb.un.acxri.abortContextTag;
+ abort_iotag = cmdiocb->iocb.un.acxri.abortIoTag;
+
++ spin_lock_irq(&phba->hbalock);
+ if (abort_iotag != 0 && abort_iotag <= phba->sli.last_iotag)
+ abort_iocb = phba->sli.iocbq_lookup[abort_iotag];
+
+- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+- "%d:0327 Cannot abort els iocb %p"
+- " with tag %x context %x\n",
+- phba->brd_no, abort_iocb,
+- abort_iotag, abort_context);
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_SLI,
++ "%d:0327 Cannot abort els iocb %p "
++ "with tag %x context %x, abort status %x, "
++ "abort code %x\n",
++ phba->brd_no, abort_iocb, abort_iotag,
++ abort_context, irsp->ulpStatus,
++ irsp->un.ulpWord[4]);
+
+ /*
+ * make sure we have the right iocbq before taking it
+ * off the txcmplq and try to call completion routine.
+ */
+- if (abort_iocb &&
+- abort_iocb->iocb.ulpContext == abort_context &&
+- abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
+- list_del(&abort_iocb->list);
++ if (!abort_iocb ||
++ abort_iocb->iocb.ulpContext != abort_context ||
++ (abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) == 0)
++ spin_unlock_irq(&phba->hbalock);
++ else {
++ list_del_init(&abort_iocb->list);
+ pring->txcmplq_cnt--;
++ spin_unlock_irq(&phba->hbalock);
+
+- rsp_ab_iocb = lpfc_sli_get_iocbq(phba);
+- if (rsp_ab_iocb == NULL)
+- lpfc_sli_release_iocbq(phba, abort_iocb);
+- else {
+- abort_iocb->iocb_flag &=
+- ~LPFC_DRIVER_ABORTED;
+- rsp_ab_iocb->iocb.ulpStatus =
+- IOSTAT_LOCAL_REJECT;
+- rsp_ab_iocb->iocb.un.ulpWord[4] =
+- IOERR_SLI_ABORTED;
+- spin_unlock_irq(phba->host->host_lock);
+- (abort_iocb->iocb_cmpl)
+- (phba, abort_iocb, rsp_ab_iocb);
+- spin_lock_irq(phba->host->host_lock);
+- lpfc_sli_release_iocbq(phba, rsp_ab_iocb);
+- }
++ abort_iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED;
++ abort_iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT;
++ abort_iocb->iocb.un.ulpWord[4] = IOERR_SLI_ABORTED;
++ (abort_iocb->iocb_cmpl)(phba, abort_iocb, abort_iocb);
+ }
+ }
+
+ lpfc_sli_release_iocbq(phba, cmdiocb);
+- spin_unlock_irq(phba->host->host_lock);
++ return;
++}
++
++static void
++lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
++{
++ IOCB_t *irsp = &rspiocb->iocb;
++
++ /* ELS cmd tag <ulpIoTag> completes */
++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++ "%d (X):0133 Ignoring ELS cmd tag x%x completion Data: "
++ "x%x x%x x%x\n",
++ phba->brd_no, irsp->ulpIoTag, irsp->ulpStatus,
++ irsp->un.ulpWord[4], irsp->ulpTimeout);
++ if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR)
++ lpfc_ct_free_iocb(phba, cmdiocb);
++ else
++ lpfc_els_free_iocb(phba, cmdiocb);
+ return;
+ }
+
+ int
+-lpfc_sli_issue_abort_iotag(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring,
+- struct lpfc_iocbq * cmdiocb)
++lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *cmdiocb)
+ {
++ struct lpfc_vport *vport = cmdiocb->vport;
+ struct lpfc_iocbq *abtsiocbp;
+ IOCB_t *icmd = NULL;
+ IOCB_t *iabt = NULL;
+ int retval = IOCB_ERROR;
+
+- /* There are certain command types we don't want
+- * to abort.
++ /*
++ * There are certain command types we don't want to abort. And we
++ * don't want to abort commands that are already in the process of
++ * being aborted.
+ */
+ icmd = &cmdiocb->iocb;
+- if ((icmd->ulpCommand == CMD_ABORT_XRI_CN) ||
+- (icmd->ulpCommand == CMD_CLOSE_XRI_CN))
++ if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
++ icmd->ulpCommand == CMD_CLOSE_XRI_CN ||
++ (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0)
+ return 0;
+
+- /* If we're unloading, interrupts are disabled so we
+- * need to cleanup the iocb here.
++ /* If we're unloading, don't abort iocb on the ELS ring, but change the
++ * callback so that nothing happens when it finishes.
+ */
+- if (phba->fc_flag & FC_UNLOADING)
++ if ((vport->load_flag & FC_UNLOADING) &&
++ (pring->ringno == LPFC_ELS_RING)) {
++ if (cmdiocb->iocb_flag & LPFC_IO_FABRIC)
++ cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl;
++ else
++ cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl;
+ goto abort_iotag_exit;
++ }
+
+ /* issue ABTS for this IOCB based on iotag */
+- abtsiocbp = lpfc_sli_get_iocbq(phba);
++ abtsiocbp = __lpfc_sli_get_iocbq(phba);
+ if (abtsiocbp == NULL)
+ return 0;
+
+@@ -2848,7 +3432,7 @@
+ iabt->ulpLe = 1;
+ iabt->ulpClass = icmd->ulpClass;
+
+- if (phba->hba_state >= LPFC_LINK_UP)
++ if (phba->link_state >= LPFC_LINK_UP)
+ iabt->ulpCommand = CMD_ABORT_XRI_CN;
+ else
+ iabt->ulpCommand = CMD_CLOSE_XRI_CN;
+@@ -2856,32 +3440,20 @@
+ abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+- "%d:0339 Abort xri x%x, original iotag x%x, abort "
+- "cmd iotag x%x\n",
+- phba->brd_no, iabt->un.acxri.abortContextTag,
++ "%d (%d):0339 Abort xri x%x, original iotag x%x, "
++ "abort cmd iotag x%x\n",
++ phba->brd_no, vport->vpi,
++ iabt->un.acxri.abortContextTag,
+ iabt->un.acxri.abortIoTag, abtsiocbp->iotag);
+- retval = lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
++ retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
+
+ abort_iotag_exit:
+-
+- /* If we could not issue an abort dequeue the iocb and handle
+- * the completion here.
++ /*
++ * Caller to this routine should check for IOCB_ERROR
++ * and handle it properly. This routine no longer removes
++ * iocb off txcmplq and call compl in case of IOCB_ERROR.
+ */
+- if (retval == IOCB_ERROR) {
+- list_del(&cmdiocb->list);
+- pring->txcmplq_cnt--;
+-
+- if (cmdiocb->iocb_cmpl) {
+- icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+- icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+- spin_unlock_irq(phba->host->host_lock);
+- (cmdiocb->iocb_cmpl) (phba, cmdiocb, cmdiocb);
+- spin_lock_irq(phba->host->host_lock);
+- } else
+- lpfc_sli_release_iocbq(phba, cmdiocb);
+- }
+-
+- return 1;
++ return retval;
+ }
+
+ static int
+@@ -2947,14 +3519,10 @@
+ }
+
+ void
+-lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+- struct lpfc_iocbq * rspiocb)
++lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++ struct lpfc_iocbq *rspiocb)
+ {
+- unsigned long iflags;
+-
+- spin_lock_irqsave(phba->host->host_lock, iflags);
+ lpfc_sli_release_iocbq(phba, cmdiocb);
+- spin_unlock_irqrestore(phba->host->host_lock, iflags);
+ return;
+ }
+
+@@ -2972,8 +3540,8 @@
+ for (i = 1; i <= phba->sli.last_iotag; i++) {
+ iocbq = phba->sli.iocbq_lookup[i];
+
+- if (lpfc_sli_validate_fcp_iocb (iocbq, tgt_id, lun_id,
+- 0, abort_cmd) != 0)
++ if (lpfc_sli_validate_fcp_iocb(iocbq, tgt_id, lun_id, 0,
++ abort_cmd) != 0)
+ continue;
+
+ /* issue ABTS for this IOCB based on iotag */
+@@ -2989,8 +3557,9 @@
+ abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
+ abtsiocb->iocb.ulpLe = 1;
+ abtsiocb->iocb.ulpClass = cmd->ulpClass;
++ abtsiocb->vport = phba->pport;
+
+- if (phba->hba_state >= LPFC_LINK_UP)
++ if (lpfc_is_link_up(phba))
+ abtsiocb->iocb.ulpCommand = CMD_ABORT_XRI_CN;
+ else
+ abtsiocb->iocb.ulpCommand = CMD_CLOSE_XRI_CN;
+@@ -3016,16 +3585,16 @@
+ wait_queue_head_t *pdone_q;
+ unsigned long iflags;
+
+- spin_lock_irqsave(phba->host->host_lock, iflags);
++ spin_lock_irqsave(&phba->hbalock, iflags);
+ cmdiocbq->iocb_flag |= LPFC_IO_WAKE;
+ if (cmdiocbq->context2 && rspiocbq)
+ memcpy(&((struct lpfc_iocbq *)cmdiocbq->context2)->iocb,
+ &rspiocbq->iocb, sizeof(IOCB_t));
+
+ pdone_q = cmdiocbq->context_un.wait_queue;
+- spin_unlock_irqrestore(phba->host->host_lock, iflags);
+ if (pdone_q)
+ wake_up(pdone_q);
++ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ return;
+ }
+
+@@ -3035,11 +3604,12 @@
+ * lpfc_sli_issue_call since the wake routine sets a unique value and by
+ * definition this is a wait function.
+ */
++
+ int
+-lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
+- struct lpfc_sli_ring * pring,
+- struct lpfc_iocbq * piocb,
+- struct lpfc_iocbq * prspiocbq,
++lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
++ struct lpfc_sli_ring *pring,
++ struct lpfc_iocbq *piocb,
++ struct lpfc_iocbq *prspiocbq,
+ uint32_t timeout)
+ {
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
+@@ -3071,11 +3641,9 @@
+ retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0);
+ if (retval == IOCB_SUCCESS) {
+ timeout_req = timeout * HZ;
+- spin_unlock_irq(phba->host->host_lock);
+ timeleft = wait_event_timeout(done_q,
+ piocb->iocb_flag & LPFC_IO_WAKE,
+ timeout_req);
+- spin_lock_irq(phba->host->host_lock);
+
+ if (piocb->iocb_flag & LPFC_IO_WAKE) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+@@ -3117,16 +3685,16 @@
+ }
+
+ int
+-lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
++lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
+ uint32_t timeout)
+ {
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
+ int retval;
++ unsigned long flag;
+
+ /* The caller must leave context1 empty. */
+- if (pmboxq->context1 != 0) {
+- return (MBX_NOT_FINISHED);
+- }
++ if (pmboxq->context1 != 0)
++ return MBX_NOT_FINISHED;
+
+ /* setup wake call as IOCB callback */
+ pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
+@@ -3141,6 +3709,7 @@
+ pmboxq->mbox_flag & LPFC_MBX_WAKE,
+ timeout * HZ);
+
++ spin_lock_irqsave(&phba->hbalock, flag);
+ pmboxq->context1 = NULL;
+ /*
+ * if LPFC_MBX_WAKE flag is set the mailbox is completed
+@@ -3148,8 +3717,11 @@
+ */
+ if (pmboxq->mbox_flag & LPFC_MBX_WAKE)
+ retval = MBX_SUCCESS;
+- else
++ else {
+ retval = MBX_TIMEOUT;
++ pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++ }
++ spin_unlock_irqrestore(&phba->hbalock, flag);
+ }
+
+ return retval;
+@@ -3158,12 +3730,25 @@
+ int
+ lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba)
+ {
++ struct lpfc_vport *vport = phba->pport;
+ int i = 0;
++ uint32_t ha_copy;
+
+- while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !phba->stopped) {
++ while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) {
+ if (i++ > LPFC_MBOX_TMO * 1000)
+ return 1;
+
++ /*
++ * Call lpfc_sli_handle_mb_event only if a mailbox cmd
++ * did finish. This way we won't get the misleading
++ * "Stray Mailbox Interrupt" message.
++ */
++ spin_lock_irq(&phba->hbalock);
++ ha_copy = phba->work_ha;
++ phba->work_ha &= ~HA_MBATT;
++ spin_unlock_irq(&phba->hbalock);
++
++ if (ha_copy & HA_MBATT)
+ if (lpfc_sli_handle_mb_event(phba) == 0)
+ i = 0;
+
+@@ -3183,6 +3768,13 @@
+ int i;
+ uint32_t control;
+
++ MAILBOX_t *mbox, *pmbox;
++ struct lpfc_vport *vport;
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_dmabuf *mp;
++ LPFC_MBOXQ_t *pmb;
++ int rc;
++
+ /*
+ * Get the driver's phba structure from the dev_id and
+ * assume the HBA is not interrupting.
+@@ -3204,7 +3796,7 @@
+ */
+
+ /* Ignore all interrupts during initialization. */
+- if (unlikely(phba->hba_state < LPFC_LINK_DOWN))
++ if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+ return IRQ_NONE;
+
+ /*
+@@ -3212,16 +3804,16 @@
+ * Clear Attention Sources, except Error Attention (to
+ * preserve status) and Link Attention
+ */
+- spin_lock(phba->host->host_lock);
++ spin_lock(&phba->hbalock);
+ ha_copy = readl(phba->HAregaddr);
+ /* If somebody is waiting to handle an eratt don't process it
+ * here. The brdkill function will do this.
+ */
+- if (phba->fc_flag & FC_IGNORE_ERATT)
++ if (phba->link_flag & LS_IGNORE_ERATT)
+ ha_copy &= ~HA_ERATT;
+ writel((ha_copy & ~(HA_LATT | HA_ERATT)), phba->HAregaddr);
+ readl(phba->HAregaddr); /* flush */
+- spin_unlock(phba->host->host_lock);
++ spin_unlock(&phba->hbalock);
+
+ if (unlikely(!ha_copy))
+ return IRQ_NONE;
+@@ -3235,36 +3827,41 @@
+ * Turn off Link Attention interrupts
+ * until CLEAR_LA done
+ */
+- spin_lock(phba->host->host_lock);
++ spin_lock(&phba->hbalock);
+ phba->sli.sli_flag &= ~LPFC_PROCESS_LA;
+ control = readl(phba->HCregaddr);
+ control &= ~HC_LAINT_ENA;
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock(phba->host->host_lock);
++ spin_unlock(&phba->hbalock);
+ }
+ else
+ work_ha_copy &= ~HA_LATT;
+ }
+
+ if (work_ha_copy & ~(HA_ERATT|HA_MBATT|HA_LATT)) {
+- for (i = 0; i < phba->sli.num_rings; i++) {
+- if (work_ha_copy & (HA_RXATT << (4*i))) {
+ /*
+- * Turn off Slow Rings interrupts
++ * Turn off Slow Rings interrupts, LPFC_ELS_RING is
++ * the only slow ring.
+ */
+- spin_lock(phba->host->host_lock);
++ status = (work_ha_copy &
++ (HA_RXMASK << (4*LPFC_ELS_RING)));
++ status >>= (4*LPFC_ELS_RING);
++ if (status & HA_RXMASK) {
++ spin_lock(&phba->hbalock);
+ control = readl(phba->HCregaddr);
+- control &= ~(HC_R0INT_ENA << i);
++ if (control & (HC_R0INT_ENA << LPFC_ELS_RING)) {
++ control &=
++ ~(HC_R0INT_ENA << LPFC_ELS_RING);
+ writel(control, phba->HCregaddr);
+ readl(phba->HCregaddr); /* flush */
+- spin_unlock(phba->host->host_lock);
+ }
++ spin_unlock(&phba->hbalock);
+ }
+ }
+
+ if (work_ha_copy & HA_ERATT) {
+- phba->hba_state = LPFC_HBA_ERROR;
++ phba->link_state = LPFC_HBA_ERROR;
+ /*
+ * There was a link/board error. Read the
+ * status register to retrieve the error event
+@@ -3279,14 +3876,108 @@
+ /* Clear Chip error bit */
+ writel(HA_ERATT, phba->HAregaddr);
+ readl(phba->HAregaddr); /* flush */
+- phba->stopped = 1;
++ phba->pport->stopped = 1;
++ }
++
++ if ((work_ha_copy & HA_MBATT) &&
++ (phba->sli.mbox_active)) {
++ pmb = phba->sli.mbox_active;
++ pmbox = &pmb->mb;
++ mbox = &phba->slim2p->mbx;
++ vport = pmb->vport;
++
++ /* First check out the status word */
++ lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof(uint32_t));
++ if (pmbox->mbxOwner != OWN_HOST) {
++ /*
++ * Stray Mailbox Interrupt, mbxCommand <cmd>
++ * mbxStatus <status>
++ */
++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX |
++ LOG_SLI,
++ "%d (%d):0304 Stray Mailbox "
++ "Interrupt mbxCommand x%x "
++ "mbxStatus x%x\n",
++ phba->brd_no,
++ (vport
++ ? vport->vpi : 0),
++ pmbox->mbxCommand,
++ pmbox->mbxStatus);
++ }
++ phba->last_completion_time = jiffies;
++ del_timer_sync(&phba->sli.mbox_tmo);
++
++ phba->sli.mbox_active = NULL;
++ if (pmb->mbox_cmpl) {
++ lpfc_sli_pcimem_bcopy(mbox, pmbox,
++ MAILBOX_CMD_SIZE);
++ }
++ if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
++ pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG;
++
++ lpfc_debugfs_disc_trc(vport,
++ LPFC_DISC_TRC_MBOX_VPORT,
++ "MBOX dflt rpi: : status:x%x rpi:x%x",
++ (uint32_t)pmbox->mbxStatus,
++ pmbox->un.varWords[0], 0);
++
++ if ( !pmbox->mbxStatus) {
++ mp = (struct lpfc_dmabuf *)
++ (pmb->context1);
++ ndlp = (struct lpfc_nodelist *)
++ pmb->context2;
++
++ /* Reg_LOGIN of dflt RPI was successful.
++ * new lets get rid of the RPI using the
++ * same mbox buffer.
++ */
++ lpfc_unreg_login(phba, vport->vpi,
++ pmbox->un.varWords[0], pmb);
++ pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
++ pmb->context1 = mp;
++ pmb->context2 = ndlp;
++ pmb->vport = vport;
++ spin_lock(&phba->hbalock);
++ phba->sli.sli_flag &=
++ ~LPFC_SLI_MBOX_ACTIVE;
++ spin_unlock(&phba->hbalock);
++ goto send_current_mbox;
++ }
++ }
++ spin_lock(&phba->pport->work_port_lock);
++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++ spin_unlock(&phba->pport->work_port_lock);
++ lpfc_mbox_cmpl_put(phba, pmb);
++ }
++ if ((work_ha_copy & HA_MBATT) &&
++ (phba->sli.mbox_active == NULL)) {
++send_next_mbox:
++ spin_lock(&phba->hbalock);
++ phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++ pmb = lpfc_mbox_get(phba);
++ spin_unlock(&phba->hbalock);
++send_current_mbox:
++ /* Process next mailbox command if there is one */
++ if (pmb != NULL) {
++ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
++ if (rc == MBX_NOT_FINISHED) {
++ pmb->mb.mbxStatus = MBX_NOT_FINISHED;
++ lpfc_mbox_cmpl_put(phba, pmb);
++ goto send_next_mbox;
++ }
++ } else {
++ /* Turn on IOCB processing */
++ for (i = 0; i < phba->sli.num_rings; i++)
++ lpfc_sli_turn_on_ring(phba, i);
++ }
++
+ }
+
+- spin_lock(phba->host->host_lock);
++ spin_lock(&phba->hbalock);
+ phba->work_ha |= work_ha_copy;
+ if (phba->work_wait)
+- wake_up(phba->work_wait);
+- spin_unlock(phba->host->host_lock);
++ lpfc_worker_wake_up(phba);
++ spin_unlock(&phba->hbalock);
+ }
+
+ ha_copy &= ~(phba->work_ha_mask);
+@@ -3298,7 +3989,7 @@
+ */
+ status = (ha_copy & (HA_RXMASK << (4*LPFC_FCP_RING)));
+ status >>= (4*LPFC_FCP_RING);
+- if (status & HA_RXATT)
++ if (status & HA_RXMASK)
+ lpfc_sli_handle_fast_ring_event(phba,
+ &phba->sli.ring[LPFC_FCP_RING],
+ status);
+@@ -3311,7 +4002,7 @@
+ */
+ status = (ha_copy & (HA_RXMASK << (4*LPFC_EXTRA_RING)));
+ status >>= (4*LPFC_EXTRA_RING);
+- if (status & HA_RXATT) {
++ if (status & HA_RXMASK) {
+ lpfc_sli_handle_fast_ring_event(phba,
+ &phba->sli.ring[LPFC_EXTRA_RING],
+ status);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h 2007-12-21 15:36:12.000000000 -0500
+@@ -20,6 +20,7 @@
+
+ /* forward declaration for LPFC_IOCB_t's use */
+ struct lpfc_hba;
++struct lpfc_vport;
+
+ /* Define the context types that SLI handles for abort and sums. */
+ typedef enum _lpfc_ctx_cmd {
+@@ -43,10 +44,12 @@
+ #define LPFC_IO_WAKE 2 /* High Priority Queue signal flag */
+ #define LPFC_IO_FCP 4 /* FCP command -- iocbq in scsi_buf */
+ #define LPFC_DRIVER_ABORTED 8 /* driver aborted this request */
++#define LPFC_IO_FABRIC 0x10 /* Iocb send using fabric scheduler */
+
+ uint8_t abort_count;
+ uint8_t rsvd2;
+ uint32_t drvrTimeout; /* driver timeout in seconds */
++ struct lpfc_vport *vport;/* virtual port pointer */
+ void *context1; /* caller context information */
+ void *context2; /* caller context information */
+ void *context3; /* caller context information */
+@@ -56,6 +59,8 @@
+ struct lpfcMboxq *mbox;
+ } context_un;
+
++ void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++ struct lpfc_iocbq *);
+ void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ struct lpfc_iocbq *);
+
+@@ -69,11 +74,13 @@
+ #define IOCB_TIMEDOUT 3
+
+ #define LPFC_MBX_WAKE 1
++#define LPFC_MBX_IMED_UNREG 2
+
+ typedef struct lpfcMboxq {
+ /* MBOXQs are used in single linked lists */
+ struct list_head list; /* ptr to next mailbox command */
+ MAILBOX_t mb; /* Mailbox cmd */
++ struct lpfc_vport *vport;/* virutal port pointer */
+ void *context1; /* caller context information */
+ void *context2; /* caller context information */
+
+@@ -135,6 +142,8 @@
+ uint8_t ringno; /* ring number */
+ uint16_t numCiocb; /* number of command iocb's per ring */
+ uint16_t numRiocb; /* number of rsp iocb's per ring */
++ uint16_t sizeCiocb; /* Size of command iocb's in this ring */
++ uint16_t sizeRiocb; /* Size of response iocb's in this ring */
+
+ uint32_t fast_iotag; /* max fastlookup based iotag */
+ uint32_t iotag_ctr; /* keeps track of the next iotag to use */
+@@ -165,6 +174,34 @@
+ struct lpfc_sli_ring *);
+ };
+
++/* Structure used for configuring rings to a specific profile or rctl / type */
++struct lpfc_hbq_init {
++ uint32_t rn; /* Receive buffer notification */
++ uint32_t entry_count; /* max # of entries in HBQ */
++ uint32_t headerLen; /* 0 if not profile 4 or 5 */
++ uint32_t logEntry; /* Set to 1 if this HBQ used for LogEntry */
++ uint32_t profile; /* Selection profile 0=all, 7=logentry */
++ uint32_t ring_mask; /* Binds HBQ to a ring e.g. Ring0=b0001,
++ * ring2=b0100 */
++ uint32_t hbq_index; /* index of this hbq in ring .HBQs[] */
++
++ uint32_t seqlenoff;
++ uint32_t maxlen;
++ uint32_t seqlenbcnt;
++ uint32_t cmdcodeoff;
++ uint32_t cmdmatch[8];
++ uint32_t mask_count; /* number of mask entries in prt array */
++ struct hbq_mask hbqMasks[6];
++
++ /* Non-config rings fields to keep track of buffer allocations */
++ uint32_t buffer_count; /* number of buffers allocated */
++ uint32_t init_count; /* number to allocate when initialized */
++ uint32_t add_count; /* number to allocate when starved */
++} ;
++
++#define LPFC_MAX_HBQ 16
++
++
+ /* Structure used to hold SLI statistical counters and info */
+ struct lpfc_sli_stat {
+ uint64_t mbox_stat_err; /* Mbox cmds completed status error */
+@@ -197,6 +234,7 @@
+ #define LPFC_SLI_MBOX_ACTIVE 0x100 /* HBA mailbox is currently active */
+ #define LPFC_SLI2_ACTIVE 0x200 /* SLI2 overlay in firmware is active */
+ #define LPFC_PROCESS_LA 0x400 /* Able to process link attention */
++#define LPFC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */
+
+ struct lpfc_sli_ring ring[LPFC_MAX_RING];
+ int fcp_ring; /* ring used for FCP initiator commands */
+@@ -209,6 +247,7 @@
+ uint16_t mboxq_cnt; /* current length of queue */
+ uint16_t mboxq_max; /* max length */
+ LPFC_MBOXQ_t *mbox_active; /* active mboxq information */
++ struct list_head mboxq_cmpl;
+
+ struct timer_list mbox_tmo; /* Hold clk to timeout active mbox
+ cmd */
+@@ -221,12 +260,6 @@
+ struct lpfc_lnk_stat lnk_stat_offsets;
+ };
+
+-/* Given a pointer to the start of the ring, and the slot number of
+- * the desired iocb entry, calc a pointer to that entry.
+- * (assume iocb entry size is 32 bytes, or 8 words)
+- */
+-#define IOCB_ENTRY(ring,slot) ((IOCB_t *)(((char *)(ring)) + ((slot) * 32)))
+-
+ #define LPFC_MBOX_TMO 30 /* Sec tmo for outstanding mbox
+ command */
+ #define LPFC_MBOX_TMO_FLASH_CMD 300 /* Sec tmo for outstanding FLASH write
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h 2007-12-21 15:36:12.000000000 -0500
+@@ -18,7 +18,7 @@
+ * included with this package. *
+ *******************************************************************/
+
+-#define LPFC_DRIVER_VERSION "8.1.12"
++#define LPFC_DRIVER_VERSION "8.2.1"
+
+ #define LPFC_DRIVER_NAME "lpfc"
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,523 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2004-2006 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * Portions Copyright (C) 2004-2005 Christoph Hellwig *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/idr.h>
++#include <linux/interrupt.h>
++#include <linux/kthread.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++
++#include <scsi/scsi.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_fc.h>
++#include "lpfc_hw.h"
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_crtn.h"
++#include "lpfc_version.h"
++#include "lpfc_vport.h"
++
++inline void lpfc_vport_set_state(struct lpfc_vport *vport,
++ enum fc_vport_state new_state)
++{
++ struct fc_vport *fc_vport = vport->fc_vport;
++
++ if (fc_vport) {
++ /*
++ * When the transport defines fc_vport_set state we will replace
++ * this code with the following line
++ */
++ /* fc_vport_set_state(fc_vport, new_state); */
++ if (new_state != FC_VPORT_INITIALIZING)
++ fc_vport->vport_last_state = fc_vport->vport_state;
++ fc_vport->vport_state = new_state;
++ }
++
++ /* for all the error states we will set the invternal state to FAILED */
++ switch (new_state) {
++ case FC_VPORT_NO_FABRIC_SUPP:
++ case FC_VPORT_NO_FABRIC_RSCS:
++ case FC_VPORT_FABRIC_LOGOUT:
++ case FC_VPORT_FABRIC_REJ_WWN:
++ case FC_VPORT_FAILED:
++ vport->port_state = LPFC_VPORT_FAILED;
++ break;
++ case FC_VPORT_LINKDOWN:
++ vport->port_state = LPFC_VPORT_UNKNOWN;
++ break;
++ default:
++ /* do nothing */
++ break;
++ }
++}
++
++static int
++lpfc_alloc_vpi(struct lpfc_hba *phba)
++{
++ int vpi;
++
++ spin_lock_irq(&phba->hbalock);
++ /* Start at bit 1 because vpi zero is reserved for the physical port */
++ vpi = find_next_zero_bit(phba->vpi_bmask, (phba->max_vpi + 1), 1);
++ if (vpi > phba->max_vpi)
++ vpi = 0;
++ else
++ set_bit(vpi, phba->vpi_bmask);
++ spin_unlock_irq(&phba->hbalock);
++ return vpi;
++}
++
++static void
++lpfc_free_vpi(struct lpfc_hba *phba, int vpi)
++{
++ spin_lock_irq(&phba->hbalock);
++ clear_bit(vpi, phba->vpi_bmask);
++ spin_unlock_irq(&phba->hbalock);
++}
++
++static int
++lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++ LPFC_MBOXQ_t *pmb;
++ MAILBOX_t *mb;
++ struct lpfc_dmabuf *mp;
++ int rc;
++
++ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++ if (!pmb) {
++ return -ENOMEM;
++ }
++ mb = &pmb->mb;
++
++ lpfc_read_sparam(phba, pmb, vport->vpi);
++ /*
++ * Grab buffer pointer and clear context1 so we can use
++ * lpfc_sli_issue_box_wait
++ */
++ mp = (struct lpfc_dmabuf *) pmb->context1;
++ pmb->context1 = NULL;
++
++ pmb->vport = vport;
++ rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2);
++ if (rc != MBX_SUCCESS) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++ "%d (%d):1818 VPort failed init, mbxCmd x%x "
++ "READ_SPARM mbxStatus x%x, rc = x%x\n",
++ phba->brd_no, vport->vpi,
++ mb->mbxCommand, mb->mbxStatus, rc);
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ if (rc != MBX_TIMEOUT)
++ mempool_free(pmb, phba->mbox_mem_pool);
++ return -EIO;
++ }
++
++ memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
++ sizeof (struct lpfc_name));
++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
++ sizeof (struct lpfc_name));
++
++ lpfc_mbuf_free(phba, mp->virt, mp->phys);
++ kfree(mp);
++ mempool_free(pmb, phba->mbox_mem_pool);
++
++ return 0;
++}
++
++static int
++lpfc_valid_wwn_format(struct lpfc_hba *phba, struct lpfc_name *wwn,
++ const char *name_type)
++{
++ /* ensure that IEEE format 1 addresses
++ * contain zeros in bits 59-48
++ */
++ if (!((wwn->u.wwn[0] >> 4) == 1 &&
++ ((wwn->u.wwn[0] & 0xf) != 0 || (wwn->u.wwn[1] & 0xf) != 0)))
++ return 1;
++
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1822 Invalid %s: %02x:%02x:%02x:%02x:"
++ "%02x:%02x:%02x:%02x\n",
++ phba->brd_no, name_type,
++ wwn->u.wwn[0], wwn->u.wwn[1],
++ wwn->u.wwn[2], wwn->u.wwn[3],
++ wwn->u.wwn[4], wwn->u.wwn[5],
++ wwn->u.wwn[6], wwn->u.wwn[7]);
++ return 0;
++}
++
++static int
++lpfc_unique_wwpn(struct lpfc_hba *phba, struct lpfc_vport *new_vport)
++{
++ struct lpfc_vport *vport;
++
++ list_for_each_entry(vport, &phba->port_list, listentry) {
++ if (vport == new_vport)
++ continue;
++ /* If they match, return not unique */
++ if (memcmp(&vport->fc_sparam.portName,
++ &new_vport->fc_sparam.portName,
++ sizeof(struct lpfc_name)) == 0)
++ return 0;
++ }
++ return 1;
++}
++
++int
++lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
++{
++ struct lpfc_nodelist *ndlp;
++ struct lpfc_vport *pport =
++ (struct lpfc_vport *) fc_vport->shost->hostdata;
++ struct lpfc_hba *phba = pport->phba;
++ struct lpfc_vport *vport = NULL;
++ int instance;
++ int vpi;
++ int rc = VPORT_ERROR;
++
++ if ((phba->sli_rev < 3) ||
++ !(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1808 Create VPORT failed: "
++ "NPIV is not enabled: SLImode:%d\n",
++ phba->brd_no, phba->sli_rev);
++ rc = VPORT_INVAL;
++ goto error_out;
++ }
++
++ vpi = lpfc_alloc_vpi(phba);
++ if (vpi == 0) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1809 Create VPORT failed: "
++ "Max VPORTs (%d) exceeded\n",
++ phba->brd_no, phba->max_vpi);
++ rc = VPORT_NORESOURCES;
++ goto error_out;
++ }
++
++
++ /* Assign an unused board number */
++ if ((instance = lpfc_get_instance()) < 0) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1810 Create VPORT failed: Cannot get "
++ "instance number\n", phba->brd_no);
++ lpfc_free_vpi(phba, vpi);
++ rc = VPORT_NORESOURCES;
++ goto error_out;
++ }
++
++ vport = lpfc_create_port(phba, instance, fc_vport);
++ if (!vport) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1811 Create VPORT failed: vpi x%x\n",
++ phba->brd_no, vpi);
++ lpfc_free_vpi(phba, vpi);
++ rc = VPORT_NORESOURCES;
++ goto error_out;
++ }
++
++ vport->vpi = vpi;
++ lpfc_debugfs_initialize(vport);
++
++ if (lpfc_vport_sparm(phba, vport)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1813 Create VPORT failed: vpi:%d "
++ "Cannot get sparam\n",
++ phba->brd_no, vpi);
++ lpfc_free_vpi(phba, vpi);
++ destroy_port(vport);
++ rc = VPORT_NORESOURCES;
++ goto error_out;
++ }
++
++ memcpy(vport->fc_portname.u.wwn, vport->fc_sparam.portName.u.wwn, 8);
++ memcpy(vport->fc_nodename.u.wwn, vport->fc_sparam.nodeName.u.wwn, 8);
++
++ if (fc_vport->node_name != 0)
++ u64_to_wwn(fc_vport->node_name, vport->fc_nodename.u.wwn);
++ if (fc_vport->port_name != 0)
++ u64_to_wwn(fc_vport->port_name, vport->fc_portname.u.wwn);
++
++ memcpy(&vport->fc_sparam.portName, vport->fc_portname.u.wwn, 8);
++ memcpy(&vport->fc_sparam.nodeName, vport->fc_nodename.u.wwn, 8);
++
++ if (!lpfc_valid_wwn_format(phba, &vport->fc_sparam.nodeName, "WWNN") ||
++ !lpfc_valid_wwn_format(phba, &vport->fc_sparam.portName, "WWPN")) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1821 Create VPORT failed: vpi:%d "
++ "Invalid WWN format\n",
++ phba->brd_no, vpi);
++ lpfc_free_vpi(phba, vpi);
++ destroy_port(vport);
++ rc = VPORT_INVAL;
++ goto error_out;
++ }
++
++ if (!lpfc_unique_wwpn(phba, vport)) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1823 Create VPORT failed: vpi:%d "
++ "Duplicate WWN on HBA\n",
++ phba->brd_no, vpi);
++ lpfc_free_vpi(phba, vpi);
++ destroy_port(vport);
++ rc = VPORT_INVAL;
++ goto error_out;
++ }
++
++ *(struct lpfc_vport **)fc_vport->dd_data = vport;
++ vport->fc_vport = fc_vport;
++
++ if ((phba->link_state < LPFC_LINK_UP) ||
++ (phba->fc_topology == TOPOLOGY_LOOP)) {
++ lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN);
++ rc = VPORT_OK;
++ goto out;
++ }
++
++ if (disable) {
++ rc = VPORT_OK;
++ goto out;
++ }
++
++ /* Use the Physical nodes Fabric NDLP to determine if the link is
++ * up and ready to FDISC.
++ */
++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) {
++ lpfc_set_disctmo(vport);
++ lpfc_initial_fdisc(vport);
++ } else {
++ lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0262 No NPIV Fabric "
++ "support\n",
++ phba->brd_no, vport->vpi);
++ }
++ } else {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ }
++ rc = VPORT_OK;
++
++out:
++ lpfc_host_attrib_init(lpfc_shost_from_vport(vport));
++error_out:
++ return rc;
++}
++
++int
++disable_vport(struct fc_vport *fc_vport)
++{
++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_nodelist *ndlp = NULL, *next_ndlp = NULL;
++ long timeout;
++
++ ndlp = lpfc_findnode_did(vport, Fabric_DID);
++ if (ndlp && phba->link_state >= LPFC_LINK_UP) {
++ vport->unreg_vpi_cmpl = VPORT_INVAL;
++ timeout = msecs_to_jiffies(phba->fc_ratov * 2000);
++ if (!lpfc_issue_els_npiv_logo(vport, ndlp))
++ while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout)
++ timeout = schedule_timeout(timeout);
++ }
++
++ lpfc_sli_host_down(vport);
++
++ /* Mark all nodes for discovery so we can remove them by
++ * calling lpfc_cleanup_rpis(vport, 1)
++ */
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++ continue;
++ lpfc_disc_state_machine(vport, ndlp, NULL,
++ NLP_EVT_DEVICE_RECOVERY);
++ }
++ lpfc_cleanup_rpis(vport, 1);
++
++ lpfc_stop_vport_timers(vport);
++ lpfc_unreg_all_rpis(vport);
++ lpfc_unreg_default_rpis(vport);
++ /*
++ * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the
++ * scsi_host_put() to release the vport.
++ */
++ lpfc_mbx_unreg_vpi(vport);
++
++ lpfc_vport_set_state(vport, FC_VPORT_DISABLED);
++ return VPORT_OK;
++}
++
++int
++enable_vport(struct fc_vport *fc_vport)
++{
++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++ struct lpfc_hba *phba = vport->phba;
++ struct lpfc_nodelist *ndlp = NULL;
++
++ if ((phba->link_state < LPFC_LINK_UP) ||
++ (phba->fc_topology == TOPOLOGY_LOOP)) {
++ lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN);
++ return VPORT_OK;
++ }
++
++ vport->load_flag |= FC_LOADING;
++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++
++ /* Use the Physical nodes Fabric NDLP to determine if the link is
++ * up and ready to FDISC.
++ */
++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) {
++ lpfc_set_disctmo(vport);
++ lpfc_initial_fdisc(vport);
++ } else {
++ lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP);
++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++ "%d (%d):0264 No NPIV Fabric "
++ "support\n",
++ phba->brd_no, vport->vpi);
++ }
++ } else {
++ lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++ }
++
++ return VPORT_OK;
++}
++
++int
++lpfc_vport_disable(struct fc_vport *fc_vport, bool disable)
++{
++ if (disable)
++ return disable_vport(fc_vport);
++ else
++ return enable_vport(fc_vport);
++}
++
++
++int
++lpfc_vport_delete(struct fc_vport *fc_vport)
++{
++ struct lpfc_nodelist *ndlp = NULL;
++ struct lpfc_nodelist *next_ndlp;
++ struct Scsi_Host *shost = (struct Scsi_Host *) fc_vport->shost;
++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++ struct lpfc_hba *phba = vport->phba;
++ long timeout;
++ int rc = VPORT_ERROR;
++
++ /*
++ * This is a bit of a mess. We want to ensure the shost doesn't get
++ * torn down until we're done with the embedded lpfc_vport structure.
++ *
++ * Beyond holding a reference for this function, we also need a
++ * reference for outstanding I/O requests we schedule during delete
++ * processing. But once we scsi_remove_host() we can no longer obtain
++ * a reference through scsi_host_get().
++ *
++ * So we take two references here. We release one reference at the
++ * bottom of the function -- after delinking the vport. And we
++ * release the other at the completion of the unreg_vpi that get's
++ * initiated after we've disposed of all other resources associated
++ * with the port.
++ */
++ if (!scsi_host_get(shost) || !scsi_host_get(shost))
++ return VPORT_INVAL;
++
++ if (vport->port_type == LPFC_PHYSICAL_PORT) {
++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++ "%d:1812 vport_delete failed: Cannot delete "
++ "physical host\n", phba->brd_no);
++ goto out;
++ }
++
++ vport->load_flag |= FC_UNLOADING;
++
++ kfree(vport->vname);
++ lpfc_debugfs_terminate(vport);
++ fc_remove_host(lpfc_shost_from_vport(vport));
++ scsi_remove_host(lpfc_shost_from_vport(vport));
++
++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE &&
++ phba->link_state >= LPFC_LINK_UP) {
++
++ /* First look for the Fabric ndlp */
++ ndlp = lpfc_findnode_did(vport, Fabric_DID);
++ if (!ndlp) {
++ /* Cannot find existing Fabric ndlp, allocate one */
++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++ if (!ndlp)
++ goto skip_logo;
++ lpfc_nlp_init(vport, ndlp, Fabric_DID);
++ } else {
++ lpfc_dequeue_node(vport, ndlp);
++ }
++ vport->unreg_vpi_cmpl = VPORT_INVAL;
++ timeout = msecs_to_jiffies(phba->fc_ratov * 2000);
++ if (!lpfc_issue_els_npiv_logo(vport, ndlp))
++ while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout)
++ timeout = schedule_timeout(timeout);
++ }
++
++skip_logo:
++ lpfc_sli_host_down(vport);
++
++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++ lpfc_disc_state_machine(vport, ndlp, NULL,
++ NLP_EVT_DEVICE_RECOVERY);
++ lpfc_disc_state_machine(vport, ndlp, NULL,
++ NLP_EVT_DEVICE_RM);
++ }
++
++ lpfc_stop_vport_timers(vport);
++ lpfc_unreg_all_rpis(vport);
++ lpfc_unreg_default_rpis(vport);
++ /*
++ * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the
++ * scsi_host_put() to release the vport.
++ */
++ lpfc_mbx_unreg_vpi(vport);
++
++ lpfc_free_vpi(phba, vport->vpi);
++ vport->work_port_events = 0;
++ spin_lock_irq(&phba->hbalock);
++ list_del_init(&vport->listentry);
++ spin_unlock_irq(&phba->hbalock);
++
++ rc = VPORT_OK;
++out:
++ scsi_host_put(shost);
++ return rc;
++}
++
++
++EXPORT_SYMBOL(lpfc_vport_create);
++EXPORT_SYMBOL(lpfc_vport_delete);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,113 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for *
++ * Fibre Channel Host Bus Adapters. *
++ * Copyright (C) 2004-2006 Emulex. All rights reserved. *
++ * EMULEX and SLI are trademarks of Emulex. *
++ * www.emulex.com *
++ * Portions Copyright (C) 2004-2005 Christoph Hellwig *
++ * *
++ * This program is free software; you can redistribute it and/or *
++ * modify it under the terms of version 2 of the GNU General *
++ * Public License as published by the Free Software Foundation. *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID. See the GNU General Public License for *
++ * more details, a copy of which can be found in the file COPYING *
++ * included with this package. *
++ *******************************************************************/
++
++#ifndef _H_LPFC_VPORT
++#define _H_LPFC_VPORT
++
++/* API version values (each will be an individual bit) */
++#define VPORT_API_VERSION_1 0x01
++
++/* Values returned via lpfc_vport_getinfo() */
++struct vport_info {
++
++ uint32_t api_versions;
++ uint8_t linktype;
++#define VPORT_TYPE_PHYSICAL 0
++#define VPORT_TYPE_VIRTUAL 1
++
++ uint8_t state;
++#define VPORT_STATE_OFFLINE 0
++#define VPORT_STATE_ACTIVE 1
++#define VPORT_STATE_FAILED 2
++
++ uint8_t fail_reason;
++ uint8_t prev_fail_reason;
++#define VPORT_FAIL_UNKNOWN 0
++#define VPORT_FAIL_LINKDOWN 1
++#define VPORT_FAIL_FAB_UNSUPPORTED 2
++#define VPORT_FAIL_FAB_NORESOURCES 3
++#define VPORT_FAIL_FAB_LOGOUT 4
++#define VPORT_FAIL_ADAP_NORESOURCES 5
++
++ uint8_t node_name[8]; /* WWNN */
++ uint8_t port_name[8]; /* WWPN */
++
++ struct Scsi_Host *shost;
++
++/* Following values are valid only on physical links */
++ uint32_t vports_max;
++ uint32_t vports_inuse;
++ uint32_t rpi_max;
++ uint32_t rpi_inuse;
++#define VPORT_CNT_INVALID 0xFFFFFFFF
++};
++
++/* data used in link creation */
++struct vport_data {
++ uint32_t api_version;
++
++ uint32_t options;
++#define VPORT_OPT_AUTORETRY 0x01
++
++ uint8_t node_name[8]; /* WWNN */
++ uint8_t port_name[8]; /* WWPN */
++
++/*
++ * Upon successful creation, vport_shost will point to the new Scsi_Host
++ * structure for the new virtual link.
++ */
++ struct Scsi_Host *vport_shost;
++};
++
++/* API function return codes */
++#define VPORT_OK 0
++#define VPORT_ERROR -1
++#define VPORT_INVAL -2
++#define VPORT_NOMEM -3
++#define VPORT_NORESOURCES -4
++
++int lpfc_vport_create(struct fc_vport *, bool);
++int lpfc_vport_delete(struct fc_vport *);
++int lpfc_vport_getinfo(struct Scsi_Host *, struct vport_info *);
++int lpfc_vport_tgt_remove(struct Scsi_Host *, uint, uint);
++
++/*
++ * queuecommand VPORT-specific return codes. Specified in the host byte code.
++ * Returned when the virtual link has failed or is not active.
++ */
++#define DID_VPORT_ERROR 0x0f
++
++#define VPORT_INFO 0x1
++#define VPORT_CREATE 0x2
++#define VPORT_DELETE 0x4
++
++struct vport_cmd_tag {
++ uint32_t cmd;
++ struct vport_data cdata;
++ struct vport_info cinfo;
++ void *vport;
++ int vport_num;
++};
++
++void lpfc_vport_set_state(struct lpfc_vport *vport,
++ enum fc_vport_state new_state);
++
++#endif /* H_LPFC_VPORT */
+diff -Nurb linux-2.6.22-570/drivers/scsi/mac53c94.c linux-2.6.22-591/drivers/scsi/mac53c94.c
+--- linux-2.6.22-570/drivers/scsi/mac53c94.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/mac53c94.c 2007-12-21 15:36:12.000000000 -0500
+@@ -77,7 +77,7 @@
+ for (i = 0; i < cmd->cmd_len; ++i)
+ printk(" %.2x", cmd->cmnd[i]);
+ printk("\n" KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n",
+- cmd->use_sg, cmd->request_bufflen, cmd->request_buffer);
++ scsi_sg_count(cmd), scsi_bufflen(cmd), scsi_sglist(cmd));
+ }
+ #endif
+
+@@ -173,7 +173,6 @@
+ writeb(CMD_SELECT, ®s->command);
+ state->phase = selecting;
+
+- if (cmd->use_sg > 0 || cmd->request_bufflen != 0)
+ set_dma_cmds(state, cmd);
+ }
+
+@@ -262,7 +261,7 @@
+ writeb(CMD_NOP, ®s->command);
+ /* set DMA controller going if any data to transfer */
+ if ((stat & (STAT_MSG|STAT_CD)) == 0
+- && (cmd->use_sg > 0 || cmd->request_bufflen != 0)) {
++ && (scsi_sg_count(cmd) > 0 || scsi_bufflen(cmd))) {
+ nb = cmd->SCp.this_residual;
+ if (nb > 0xfff0)
+ nb = 0xfff0;
+@@ -310,14 +309,7 @@
+ printk(KERN_DEBUG "intr %x before data xfer complete\n", intr);
+ }
+ writel(RUN << 16, &dma->control); /* stop dma */
+- if (cmd->use_sg != 0) {
+- pci_unmap_sg(state->pdev,
+- (struct scatterlist *)cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- } else {
+- pci_unmap_single(state->pdev, state->dma_addr,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+ /* should check dma status */
+ writeb(CMD_I_COMPLETE, ®s->command);
+ state->phase = completing;
+@@ -365,23 +357,23 @@
+ */
+ static void set_dma_cmds(struct fsc_state *state, struct scsi_cmnd *cmd)
+ {
+- int i, dma_cmd, total;
++ int i, dma_cmd, total, nseg;
+ struct scatterlist *scl;
+ struct dbdma_cmd *dcmds;
+ dma_addr_t dma_addr;
+ u32 dma_len;
+
++ nseg = scsi_dma_map(cmd);
++ BUG_ON(nseg < 0);
++ if (!nseg)
++ return;
++
+ dma_cmd = cmd->sc_data_direction == DMA_TO_DEVICE ?
+ OUTPUT_MORE : INPUT_MORE;
+ dcmds = state->dma_cmds;
+- if (cmd->use_sg > 0) {
+- int nseg;
+-
+ total = 0;
+- scl = (struct scatterlist *) cmd->request_buffer;
+- nseg = pci_map_sg(state->pdev, scl, cmd->use_sg,
+- cmd->sc_data_direction);
+- for (i = 0; i < nseg; ++i) {
++
++ scsi_for_each_sg(cmd, scl, nseg, i) {
+ dma_addr = sg_dma_address(scl);
+ dma_len = sg_dma_len(scl);
+ if (dma_len > 0xffff)
+@@ -391,21 +383,9 @@
+ st_le16(&dcmds->command, dma_cmd);
+ st_le32(&dcmds->phy_addr, dma_addr);
+ dcmds->xfer_status = 0;
+- ++scl;
+- ++dcmds;
+- }
+- } else {
+- total = cmd->request_bufflen;
+- if (total > 0xffff)
+- panic("mac53c94: transfer size >= 64k");
+- dma_addr = pci_map_single(state->pdev, cmd->request_buffer,
+- total, cmd->sc_data_direction);
+- state->dma_addr = dma_addr;
+- st_le16(&dcmds->req_count, total);
+- st_le32(&dcmds->phy_addr, dma_addr);
+- dcmds->xfer_status = 0;
+ ++dcmds;
+ }
++
+ dma_cmd += OUTPUT_LAST - OUTPUT_MORE;
+ st_le16(&dcmds[-1].command, dma_cmd);
+ st_le16(&dcmds->command, DBDMA_STOP);
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c
+--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1378,8 +1378,6 @@
+ {
+ struct scatterlist *sgl;
+ mbox_ccb_t *ccb;
+- struct page *page;
+- unsigned long offset;
+ struct scsi_cmnd *scp;
+ int sgcnt;
+ int i;
+@@ -1388,48 +1386,16 @@
+ scp = scb->scp;
+ ccb = (mbox_ccb_t *)scb->ccb;
+
++ sgcnt = scsi_dma_map(scp);
++ BUG_ON(sgcnt < 0 || sgcnt > adapter->sglen);
++
+ // no mapping required if no data to be transferred
+- if (!scp->request_buffer || !scp->request_bufflen)
++ if (!sgcnt)
+ return 0;
+
+- if (!scp->use_sg) { /* scatter-gather list not used */
+-
+- page = virt_to_page(scp->request_buffer);
+-
+- offset = ((unsigned long)scp->request_buffer & ~PAGE_MASK);
+-
+- ccb->buf_dma_h = pci_map_page(adapter->pdev, page, offset,
+- scp->request_bufflen,
+- scb->dma_direction);
+- scb->dma_type = MRAID_DMA_WBUF;
+-
+- /*
+- * We need to handle special 64-bit commands that need a
+- * minimum of 1 SG
+- */
+- sgcnt = 1;
+- ccb->sgl64[0].address = ccb->buf_dma_h;
+- ccb->sgl64[0].length = scp->request_bufflen;
+-
+- return sgcnt;
+- }
+-
+- sgl = (struct scatterlist *)scp->request_buffer;
+-
+- // The number of sg elements returned must not exceed our limit
+- sgcnt = pci_map_sg(adapter->pdev, sgl, scp->use_sg,
+- scb->dma_direction);
+-
+- if (sgcnt > adapter->sglen) {
+- con_log(CL_ANN, (KERN_CRIT
+- "megaraid critical: too many sg elements:%d\n",
+- sgcnt));
+- BUG();
+- }
+-
+ scb->dma_type = MRAID_DMA_WSG;
+
+- for (i = 0; i < sgcnt; i++, sgl++) {
++ scsi_for_each_sg(scp, sgl, sgcnt, i) {
+ ccb->sgl64[i].address = sg_dma_address(sgl);
+ ccb->sgl64[i].length = sg_dma_len(sgl);
+ }
+@@ -1489,19 +1455,11 @@
+
+ adapter->outstanding_cmds++;
+
+- if (scb->dma_direction == PCI_DMA_TODEVICE) {
+- if (!scb->scp->use_sg) { // sg list not used
+- pci_dma_sync_single_for_device(adapter->pdev,
+- ccb->buf_dma_h,
+- scb->scp->request_bufflen,
+- PCI_DMA_TODEVICE);
+- }
+- else {
++ if (scb->dma_direction == PCI_DMA_TODEVICE)
+ pci_dma_sync_sg_for_device(adapter->pdev,
+- scb->scp->request_buffer,
+- scb->scp->use_sg, PCI_DMA_TODEVICE);
+- }
+- }
++ scsi_sglist(scb->scp),
++ scsi_sg_count(scb->scp),
++ PCI_DMA_TODEVICE);
+
+ mbox->busy = 1; // Set busy
+ mbox->poll = 0;
+@@ -1624,11 +1582,11 @@
+ return scb;
+
+ case MODE_SENSE:
+- if (scp->use_sg) {
++ {
+ struct scatterlist *sgl;
+ caddr_t vaddr;
+
+- sgl = (struct scatterlist *)scp->request_buffer;
++ sgl = scsi_sglist(scp);
+ if (sgl->page) {
+ vaddr = (caddr_t)
+ (page_address((&sgl[0])->page)
+@@ -1642,9 +1600,6 @@
+ __LINE__));
+ }
+ }
+- else {
+- memset(scp->request_buffer, 0, scp->cmnd[4]);
+- }
+ scp->result = (DID_OK << 16);
+ return NULL;
+
+@@ -1716,7 +1671,7 @@
+ mbox->cmd = MBOXCMD_PASSTHRU64;
+ scb->dma_direction = scp->sc_data_direction;
+
+- pthru->dataxferlen = scp->request_bufflen;
++ pthru->dataxferlen = scsi_bufflen(scp);
+ pthru->dataxferaddr = ccb->sgl_dma_h;
+ pthru->numsge = megaraid_mbox_mksgl(adapter,
+ scb);
+@@ -2050,8 +2005,8 @@
+
+ memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
+
+- if (scp->request_bufflen) {
+- pthru->dataxferlen = scp->request_bufflen;
++ if (scsi_bufflen(scp)) {
++ pthru->dataxferlen = scsi_bufflen(scp);
+ pthru->dataxferaddr = ccb->sgl_dma_h;
+ pthru->numsge = megaraid_mbox_mksgl(adapter, scb);
+ }
+@@ -2099,8 +2054,8 @@
+
+ memcpy(epthru->cdb, scp->cmnd, scp->cmd_len);
+
+- if (scp->request_bufflen) {
+- epthru->dataxferlen = scp->request_bufflen;
++ if (scsi_bufflen(scp)) {
++ epthru->dataxferlen = scsi_bufflen(scp);
+ epthru->dataxferaddr = ccb->sgl_dma_h;
+ epthru->numsge = megaraid_mbox_mksgl(adapter, scb);
+ }
+@@ -2266,37 +2221,13 @@
+
+ ccb = (mbox_ccb_t *)scb->ccb;
+
+- switch (scb->dma_type) {
+-
+- case MRAID_DMA_WBUF:
+- if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
+- pci_dma_sync_single_for_cpu(adapter->pdev,
+- ccb->buf_dma_h,
+- scb->scp->request_bufflen,
+- PCI_DMA_FROMDEVICE);
+- }
+-
+- pci_unmap_page(adapter->pdev, ccb->buf_dma_h,
+- scb->scp->request_bufflen, scb->dma_direction);
+-
+- break;
+-
+- case MRAID_DMA_WSG:
+- if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
++ if (scb->dma_direction == PCI_DMA_FROMDEVICE)
+ pci_dma_sync_sg_for_cpu(adapter->pdev,
+- scb->scp->request_buffer,
+- scb->scp->use_sg, PCI_DMA_FROMDEVICE);
+- }
+-
+- pci_unmap_sg(adapter->pdev, scb->scp->request_buffer,
+- scb->scp->use_sg, scb->dma_direction);
+-
+- break;
+-
+- default:
+- break;
+- }
++ scsi_sglist(scb->scp),
++ scsi_sg_count(scb->scp),
++ PCI_DMA_FROMDEVICE);
+
++ scsi_dma_unmap(scb->scp);
+ return;
+ }
+
+@@ -2399,25 +2330,17 @@
+ if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0
+ && IS_RAID_CH(raid_dev, scb->dev_channel)) {
+
+- if (scp->use_sg) {
+- sgl = (struct scatterlist *)
+- scp->request_buffer;
+-
++ sgl = scsi_sglist(scp);
+ if (sgl->page) {
+ c = *(unsigned char *)
+ (page_address((&sgl[0])->page) +
+ (&sgl[0])->offset);
+- }
+- else {
++ } else {
+ con_log(CL_ANN, (KERN_WARNING
+ "megaraid mailbox: invalid sg:%d\n",
+ __LINE__));
+ c = 0;
+ }
+- }
+- else {
+- c = *(uint8_t *)scp->request_buffer;
+- }
+
+ if ((c & 0x1F ) == TYPE_DISK) {
+ pdev_index = (scb->dev_channel * 16) +
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c
+--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c 2007-12-21 15:36:12.000000000 -0500
+@@ -433,34 +433,15 @@
+ int sge_count;
+ struct scatterlist *os_sgl;
+
+- /*
+- * Return 0 if there is no data transfer
+- */
+- if (!scp->request_buffer || !scp->request_bufflen)
+- return 0;
++ sge_count = scsi_dma_map(scp);
++ BUG_ON(sge_count < 0);
+
+- if (!scp->use_sg) {
+- mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev,
+- scp->
+- request_buffer,
+- scp->
+- request_bufflen,
+- scp->
+- sc_data_direction);
+- mfi_sgl->sge32[0].length = scp->request_bufflen;
+-
+- return 1;
+- }
+-
+- os_sgl = (struct scatterlist *)scp->request_buffer;
+- sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+- scp->sc_data_direction);
+-
+- for (i = 0; i < sge_count; i++, os_sgl++) {
++ if (sge_count) {
++ scsi_for_each_sg(scp, os_sgl, sge_count, i) {
+ mfi_sgl->sge32[i].length = sg_dma_len(os_sgl);
+ mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl);
+ }
+-
++ }
+ return sge_count;
+ }
+
+@@ -481,35 +462,15 @@
+ int sge_count;
+ struct scatterlist *os_sgl;
+
+- /*
+- * Return 0 if there is no data transfer
+- */
+- if (!scp->request_buffer || !scp->request_bufflen)
+- return 0;
+-
+- if (!scp->use_sg) {
+- mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev,
+- scp->
+- request_buffer,
+- scp->
+- request_bufflen,
+- scp->
+- sc_data_direction);
++ sge_count = scsi_dma_map(scp);
++ BUG_ON(sge_count < 0);
+
+- mfi_sgl->sge64[0].length = scp->request_bufflen;
+-
+- return 1;
+- }
+-
+- os_sgl = (struct scatterlist *)scp->request_buffer;
+- sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+- scp->sc_data_direction);
+-
+- for (i = 0; i < sge_count; i++, os_sgl++) {
++ if (sge_count) {
++ scsi_for_each_sg(scp, os_sgl, sge_count, i) {
+ mfi_sgl->sge64[i].length = sg_dma_len(os_sgl);
+ mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl);
+ }
+-
++ }
+ return sge_count;
+ }
+
+@@ -593,7 +554,7 @@
+ pthru->cdb_len = scp->cmd_len;
+ pthru->timeout = 0;
+ pthru->flags = flags;
+- pthru->data_xfer_len = scp->request_bufflen;
++ pthru->data_xfer_len = scsi_bufflen(scp);
+
+ memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
+
+@@ -1195,45 +1156,6 @@
+ }
+
+ /**
+- * megasas_unmap_sgbuf - Unmap SG buffers
+- * @instance: Adapter soft state
+- * @cmd: Completed command
+- */
+-static void
+-megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd)
+-{
+- dma_addr_t buf_h;
+- u8 opcode;
+-
+- if (cmd->scmd->use_sg) {
+- pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer,
+- cmd->scmd->use_sg, cmd->scmd->sc_data_direction);
+- return;
+- }
+-
+- if (!cmd->scmd->request_bufflen)
+- return;
+-
+- opcode = cmd->frame->hdr.cmd;
+-
+- if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) {
+- if (IS_DMA64)
+- buf_h = cmd->frame->io.sgl.sge64[0].phys_addr;
+- else
+- buf_h = cmd->frame->io.sgl.sge32[0].phys_addr;
+- } else {
+- if (IS_DMA64)
+- buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr;
+- else
+- buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr;
+- }
+-
+- pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen,
+- cmd->scmd->sc_data_direction);
+- return;
+-}
+-
+-/**
+ * megasas_complete_cmd - Completes a command
+ * @instance: Adapter soft state
+ * @cmd: Command to be completed
+@@ -1281,7 +1203,7 @@
+
+ atomic_dec(&instance->fw_outstanding);
+
+- megasas_unmap_sgbuf(instance, cmd);
++ scsi_dma_unmap(cmd->scmd);
+ cmd->scmd->scsi_done(cmd->scmd);
+ megasas_return_cmd(instance, cmd);
+
+@@ -1329,7 +1251,7 @@
+
+ atomic_dec(&instance->fw_outstanding);
+
+- megasas_unmap_sgbuf(instance, cmd);
++ scsi_dma_unmap(cmd->scmd);
+ cmd->scmd->scsi_done(cmd->scmd);
+ megasas_return_cmd(instance, cmd);
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid.c linux-2.6.22-591/drivers/scsi/megaraid.c
+--- linux-2.6.22-570/drivers/scsi/megaraid.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/megaraid.c 2007-12-21 15:36:12.000000000 -0500
+@@ -523,10 +523,8 @@
+ /*
+ * filter the internal and ioctl commands
+ */
+- if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) {
+- return cmd->request_buffer;
+- }
+-
++ if((cmd->cmnd[0] == MEGA_INTERNAL_CMD))
++ return (scb_t *)cmd->host_scribble;
+
+ /*
+ * We know what channels our logical drives are on - mega_find_card()
+@@ -657,22 +655,14 @@
+
+ case MODE_SENSE: {
+ char *buf;
+-
+- if (cmd->use_sg) {
+ struct scatterlist *sg;
+
+- sg = (struct scatterlist *)cmd->request_buffer;
+- buf = kmap_atomic(sg->page, KM_IRQ0) +
+- sg->offset;
+- } else
+- buf = cmd->request_buffer;
+- memset(buf, 0, cmd->cmnd[4]);
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
++ sg = scsi_sglist(cmd);
++ buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+
+- sg = (struct scatterlist *)cmd->request_buffer;
++ memset(buf, 0, cmd->cmnd[4]);
+ kunmap_atomic(buf - sg->offset, KM_IRQ0);
+- }
++
+ cmd->result = (DID_OK << 16);
+ cmd->scsi_done(cmd);
+ return NULL;
+@@ -1551,24 +1541,16 @@
+ islogical = adapter->logdrv_chan[cmd->device->channel];
+ if( cmd->cmnd[0] == INQUIRY && !islogical ) {
+
+- if( cmd->use_sg ) {
+- sgl = (struct scatterlist *)
+- cmd->request_buffer;
+-
++ sgl = scsi_sglist(cmd);
+ if( sgl->page ) {
+ c = *(unsigned char *)
+ page_address((&sgl[0])->page) +
+ (&sgl[0])->offset;
+- }
+- else {
++ } else {
+ printk(KERN_WARNING
+ "megaraid: invalid sg.\n");
+ c = 0;
+ }
+- }
+- else {
+- c = *(u8 *)cmd->request_buffer;
+- }
+
+ if(IS_RAID_CH(adapter, cmd->device->channel) &&
+ ((c & 0x1F ) == TYPE_DISK)) {
+@@ -1704,30 +1686,14 @@
+ static void
+ mega_free_scb(adapter_t *adapter, scb_t *scb)
+ {
+- unsigned long length;
+-
+ switch( scb->dma_type ) {
+
+ case MEGA_DMA_TYPE_NONE:
+ break;
+
+- case MEGA_BULK_DATA:
+- if (scb->cmd->use_sg == 0)
+- length = scb->cmd->request_bufflen;
+- else {
+- struct scatterlist *sgl =
+- (struct scatterlist *)scb->cmd->request_buffer;
+- length = sgl->length;
+- }
+- pci_unmap_page(adapter->dev, scb->dma_h_bulkdata,
+- length, scb->dma_direction);
+- break;
+-
+ case MEGA_SGLIST:
+- pci_unmap_sg(adapter->dev, scb->cmd->request_buffer,
+- scb->cmd->use_sg, scb->dma_direction);
++ scsi_dma_unmap(scb->cmd);
+ break;
+-
+ default:
+ break;
+ }
+@@ -1767,80 +1733,33 @@
+ static int
+ mega_build_sglist(adapter_t *adapter, scb_t *scb, u32 *buf, u32 *len)
+ {
+- struct scatterlist *sgl;
+- struct page *page;
+- unsigned long offset;
+- unsigned int length;
++ struct scatterlist *sg;
+ Scsi_Cmnd *cmd;
+ int sgcnt;
+ int idx;
+
+ cmd = scb->cmd;
+
+- /* Scatter-gather not used */
+- if( cmd->use_sg == 0 || (cmd->use_sg == 1 &&
+- !adapter->has_64bit_addr)) {
+-
+- if (cmd->use_sg == 0) {
+- page = virt_to_page(cmd->request_buffer);
+- offset = offset_in_page(cmd->request_buffer);
+- length = cmd->request_bufflen;
+- } else {
+- sgl = (struct scatterlist *)cmd->request_buffer;
+- page = sgl->page;
+- offset = sgl->offset;
+- length = sgl->length;
+- }
+-
+- scb->dma_h_bulkdata = pci_map_page(adapter->dev,
+- page, offset,
+- length,
+- scb->dma_direction);
+- scb->dma_type = MEGA_BULK_DATA;
+-
+- /*
+- * We need to handle special 64-bit commands that need a
+- * minimum of 1 SG
+- */
+- if( adapter->has_64bit_addr ) {
+- scb->sgl64[0].address = scb->dma_h_bulkdata;
+- scb->sgl64[0].length = length;
+- *buf = (u32)scb->sgl_dma_addr;
+- *len = (u32)length;
+- return 1;
+- }
+- else {
+- *buf = (u32)scb->dma_h_bulkdata;
+- *len = (u32)length;
+- }
+- return 0;
+- }
+-
+- sgl = (struct scatterlist *)cmd->request_buffer;
+-
+ /*
+ * Copy Scatter-Gather list info into controller structure.
+ *
+ * The number of sg elements returned must not exceed our limit
+ */
+- sgcnt = pci_map_sg(adapter->dev, sgl, cmd->use_sg,
+- scb->dma_direction);
++ sgcnt = scsi_dma_map(cmd);
+
+ scb->dma_type = MEGA_SGLIST;
+
+- BUG_ON(sgcnt > adapter->sglen);
++ BUG_ON(sgcnt > adapter->sglen || sgcnt < 0);
+
+ *len = 0;
+
+- for( idx = 0; idx < sgcnt; idx++, sgl++ ) {
+-
+- if( adapter->has_64bit_addr ) {
+- scb->sgl64[idx].address = sg_dma_address(sgl);
+- *len += scb->sgl64[idx].length = sg_dma_len(sgl);
+- }
+- else {
+- scb->sgl[idx].address = sg_dma_address(sgl);
+- *len += scb->sgl[idx].length = sg_dma_len(sgl);
++ scsi_for_each_sg(cmd, sg, sgcnt, idx) {
++ if (adapter->has_64bit_addr) {
++ scb->sgl64[idx].address = sg_dma_address(sg);
++ *len += scb->sgl64[idx].length = sg_dma_len(sg);
++ } else {
++ scb->sgl[idx].address = sg_dma_address(sg);
++ *len += scb->sgl[idx].length = sg_dma_len(sg);
+ }
+ }
+
+@@ -3571,7 +3490,7 @@
+ /*
+ * The user passthru structure
+ */
+- upthru = (mega_passthru __user *)MBOX(uioc)->xferaddr;
++ upthru = (mega_passthru __user *)(unsigned long)MBOX(uioc)->xferaddr;
+
+ /*
+ * Copy in the user passthru here.
+@@ -3623,7 +3542,7 @@
+ /*
+ * Get the user data
+ */
+- if( copy_from_user(data, (char __user *)uxferaddr,
++ if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr,
+ pthru->dataxferlen) ) {
+ rval = (-EFAULT);
+ goto freemem_and_return;
+@@ -3649,7 +3568,7 @@
+ * Is data going up-stream
+ */
+ if( pthru->dataxferlen && (uioc.flags & UIOC_RD) ) {
+- if( copy_to_user((char __user *)uxferaddr, data,
++ if( copy_to_user((char __user *)(unsigned long) uxferaddr, data,
+ pthru->dataxferlen) ) {
+ rval = (-EFAULT);
+ }
+@@ -3702,7 +3621,7 @@
+ /*
+ * Get the user data
+ */
+- if( copy_from_user(data, (char __user *)uxferaddr,
++ if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr,
+ uioc.xferlen) ) {
+
+ pci_free_consistent(pdev,
+@@ -3742,7 +3661,7 @@
+ * Is data going up-stream
+ */
+ if( uioc.xferlen && (uioc.flags & UIOC_RD) ) {
+- if( copy_to_user((char __user *)uxferaddr, data,
++ if( copy_to_user((char __user *)(unsigned long) uxferaddr, data,
+ uioc.xferlen) ) {
+
+ rval = (-EFAULT);
+@@ -4494,7 +4413,7 @@
+ scmd->device = sdev;
+
+ scmd->device->host = adapter->host;
+- scmd->request_buffer = (void *)scb;
++ scmd->host_scribble = (void *)scb;
+ scmd->cmnd[0] = MEGA_INTERNAL_CMD;
+
+ scb->state |= SCB_ACTIVE;
+diff -Nurb linux-2.6.22-570/drivers/scsi/mesh.c linux-2.6.22-591/drivers/scsi/mesh.c
+--- linux-2.6.22-570/drivers/scsi/mesh.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/mesh.c 2007-12-21 15:36:12.000000000 -0500
+@@ -421,7 +421,7 @@
+ for (i = 0; i < cmd->cmd_len; ++i)
+ printk(" %x", cmd->cmnd[i]);
+ printk(" use_sg=%d buffer=%p bufflen=%u\n",
+- cmd->use_sg, cmd->request_buffer, cmd->request_bufflen);
++ scsi_sg_count(cmd), scsi_sglist(cmd), scsi_bufflen(cmd));
+ }
+ #endif
+ if (ms->dma_started)
+@@ -602,13 +602,16 @@
+ cmd->result += (cmd->SCp.Message << 8);
+ if (DEBUG_TARGET(cmd)) {
+ printk(KERN_DEBUG "mesh_done: result = %x, data_ptr=%d, buflen=%d\n",
+- cmd->result, ms->data_ptr, cmd->request_bufflen);
++ cmd->result, ms->data_ptr, scsi_bufflen(cmd));
++#if 0
++ /* needs to use sg? */
+ if ((cmd->cmnd[0] == 0 || cmd->cmnd[0] == 0x12 || cmd->cmnd[0] == 3)
+ && cmd->request_buffer != 0) {
+ unsigned char *b = cmd->request_buffer;
+ printk(KERN_DEBUG "buffer = %x %x %x %x %x %x %x %x\n",
+ b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+ }
++#endif
+ }
+ cmd->SCp.this_residual -= ms->data_ptr;
+ mesh_completed(ms, cmd);
+@@ -1265,15 +1268,18 @@
+ dcmds = ms->dma_cmds;
+ dtot = 0;
+ if (cmd) {
+- cmd->SCp.this_residual = cmd->request_bufflen;
+- if (cmd->use_sg > 0) {
+ int nseg;
++
++ cmd->SCp.this_residual = scsi_bufflen(cmd);
++
++ nseg = scsi_dma_map(cmd);
++ BUG_ON(nseg < 0);
++
++ if (nseg) {
+ total = 0;
+- scl = (struct scatterlist *) cmd->request_buffer;
+ off = ms->data_ptr;
+- nseg = pci_map_sg(ms->pdev, scl, cmd->use_sg,
+- cmd->sc_data_direction);
+- for (i = 0; i <nseg; ++i, ++scl) {
++
++ scsi_for_each_sg(cmd, scl, nseg, i) {
+ u32 dma_addr = sg_dma_address(scl);
+ u32 dma_len = sg_dma_len(scl);
+
+@@ -1292,16 +1298,6 @@
+ dtot += dma_len - off;
+ off = 0;
+ }
+- } else if (ms->data_ptr < cmd->request_bufflen) {
+- dtot = cmd->request_bufflen - ms->data_ptr;
+- if (dtot > 0xffff)
+- panic("mesh: transfer size >= 64k");
+- st_le16(&dcmds->req_count, dtot);
+- /* XXX Use pci DMA API here ... */
+- st_le32(&dcmds->phy_addr,
+- virt_to_phys(cmd->request_buffer) + ms->data_ptr);
+- dcmds->xfer_status = 0;
+- ++dcmds;
+ }
+ }
+ if (dtot == 0) {
+@@ -1356,18 +1352,14 @@
+ dumplog(ms, ms->conn_tgt);
+ dumpslog(ms);
+ #endif /* MESH_DBG */
+- } else if (cmd && cmd->request_bufflen != 0 &&
+- ms->data_ptr > cmd->request_bufflen) {
++ } else if (cmd && scsi_bufflen(cmd) &&
++ ms->data_ptr > scsi_bufflen(cmd)) {
+ printk(KERN_DEBUG "mesh: target %d overrun, "
+ "data_ptr=%x total=%x goes_out=%d\n",
+- ms->conn_tgt, ms->data_ptr, cmd->request_bufflen,
++ ms->conn_tgt, ms->data_ptr, scsi_bufflen(cmd),
+ ms->tgts[ms->conn_tgt].data_goes_out);
+ }
+- if (cmd->use_sg != 0) {
+- struct scatterlist *sg;
+- sg = (struct scatterlist *)cmd->request_buffer;
+- pci_unmap_sg(ms->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+ ms->dma_started = 0;
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.c linux-2.6.22-591/drivers/scsi/mvme16x.c
+--- linux-2.6.22-570/drivers/scsi/mvme16x.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/mvme16x.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,78 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux.
+- *
+- * Based on work by Alan Hourihane
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/mvme16xhw.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "mvme16x.h"
+-
+-#include<linux/stat.h>
+-
+-
+-int mvme16x_scsi_detect(struct scsi_host_template *tpnt)
+-{
+- static unsigned char called = 0;
+- int clock;
+- long long options;
+-
+- if (!MACH_IS_MVME16x)
+- return 0;
+- if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) {
+- printk ("SCSI detection disabled, SCSI chip not present\n");
+- return 0;
+- }
+- if (called)
+- return 0;
+-
+- tpnt->proc_name = "MVME16x";
+-
+- options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT;
+-
+- clock = 66000000; /* 66MHz SCSI Clock */
+-
+- ncr53c7xx_init(tpnt, 0, 710, (unsigned long)0xfff47000,
+- 0, MVME16x_IRQ_SCSI, DMA_NONE,
+- options, clock);
+- called = 1;
+- return 1;
+-}
+-
+-static int mvme16x_scsi_release(struct Scsi_Host *shost)
+-{
+- if (shost->irq)
+- free_irq(shost->irq, NULL);
+- if (shost->dma_channel != 0xff)
+- free_dma(shost->dma_channel);
+- if (shost->io_port && shost->n_io_port)
+- release_region(shost->io_port, shost->n_io_port);
+- scsi_unregister(shost);
+- return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+- .name = "MVME16x NCR53c710 SCSI",
+- .detect = mvme16x_scsi_detect,
+- .release = mvme16x_scsi_release,
+- .queuecommand = NCR53c7xx_queue_command,
+- .abort = NCR53c7xx_abort,
+- .reset = NCR53c7xx_reset,
+- .can_queue = 24,
+- .this_id = 7,
+- .sg_tablesize = 63,
+- .cmd_per_lun = 3,
+- .use_clustering = DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.h linux-2.6.22-591/drivers/scsi/mvme16x.h
+--- linux-2.6.22-570/drivers/scsi/mvme16x.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/mvme16x.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,24 +0,0 @@
+-#ifndef MVME16x_SCSI_H
+-#define MVME16x_SCSI_H
+-
+-#include <linux/types.h>
+-
+-int mvme16x_scsi_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* MVME16x_SCSI_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c
+--- linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,158 @@
++/*
++ * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux.
++ *
++ * Based on work by Alan Hourihane
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/blkdev.h>
++#include <linux/device.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/mvme16xhw.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("MVME16x NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++static struct scsi_host_template mvme16x_scsi_driver_template = {
++ .name = "MVME16x NCR53c710 SCSI",
++ .proc_name = "MVME16x",
++ .this_id = 7,
++ .module = THIS_MODULE,
++};
++
++static struct platform_device *mvme16x_scsi_device;
++
++static __devinit int
++mvme16x_probe(struct device *dev)
++{
++ struct Scsi_Host * host = NULL;
++ struct NCR_700_Host_Parameters *hostdata;
++
++ if (!MACH_IS_MVME16x)
++ goto out;
++
++ if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) {
++ printk(KERN_INFO "mvme16x-scsi: detection disabled, "
++ "SCSI chip not present\n");
++ goto out;
++ }
++
++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++ if (hostdata == NULL) {
++ printk(KERN_ERR "mvme16x-scsi: "
++ "Failed to allocate host data\n");
++ goto out;
++ }
++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++ /* Fill in the required pieces of hostdata */
++ hostdata->base = (void __iomem *)0xfff47000UL;
++ hostdata->clock = 50; /* XXX - depends on the CPU clock! */
++ hostdata->chip710 = 1;
++ hostdata->dmode_extra = DMODE_FC2;
++ hostdata->dcntl_extra = EA_710;
++ hostdata->ctest7_extra = CTEST7_TT1;
++
++ /* and register the chip */
++ host = NCR_700_detect(&mvme16x_scsi_driver_template, hostdata, dev);
++ if (!host) {
++ printk(KERN_ERR "mvme16x-scsi: No host detected; "
++ "board configuration problem?\n");
++ goto out_free;
++ }
++ host->this_id = 7;
++ host->base = 0xfff47000UL;
++ host->irq = MVME16x_IRQ_SCSI;
++ if (request_irq(host->irq, NCR_700_intr, 0, "mvme16x-scsi", host)) {
++ printk(KERN_ERR "mvme16x-scsi: request_irq failed\n");
++ goto out_put_host;
++ }
++
++ /* Enable scsi chip ints */
++ {
++ volatile unsigned long v;
++
++ /* Enable scsi interrupts at level 4 in PCCchip2 */
++ v = in_be32(0xfff4202c);
++ v = (v & ~0xff) | 0x10 | 4;
++ out_be32(0xfff4202c, v);
++ }
++
++ scsi_scan_host(host);
++
++ return 0;
++
++ out_put_host:
++ scsi_host_put(host);
++ out_free:
++ kfree(hostdata);
++ out:
++ return -ENODEV;
++}
++
++static __devexit int
++mvme16x_device_remove(struct device *dev)
++{
++ struct Scsi_Host *host = dev_to_shost(dev);
++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++ /* Disable scsi chip ints */
++ {
++ volatile unsigned long v;
++
++ v = in_be32(0xfff4202c);
++ v &= ~0x10;
++ out_be32(0xfff4202c, v);
++ }
++ scsi_remove_host(host);
++ NCR_700_release(host);
++ kfree(hostdata);
++ free_irq(host->irq, host);
++
++ return 0;
++}
++
++static struct device_driver mvme16x_scsi_driver = {
++ .name = "mvme16x-scsi",
++ .bus = &platform_bus_type,
++ .probe = mvme16x_probe,
++ .remove = __devexit_p(mvme16x_device_remove),
++};
++
++static int __init mvme16x_scsi_init(void)
++{
++ int err;
++
++ err = driver_register(&mvme16x_scsi_driver);
++ if (err)
++ return err;
++
++ mvme16x_scsi_device = platform_device_register_simple("mvme16x-scsi",
++ -1, NULL, 0);
++ if (IS_ERR(mvme16x_scsi_device)) {
++ driver_unregister(&mvme16x_scsi_driver);
++ return PTR_ERR(mvme16x_scsi_device);
++ }
++
++ return 0;
++}
++
++static void __exit mvme16x_scsi_exit(void)
++{
++ platform_device_unregister(mvme16x_scsi_device);
++ driver_unregister(&mvme16x_scsi_driver);
++}
++
++module_init(mvme16x_scsi_init);
++module_exit(mvme16x_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/nsp32.c linux-2.6.22-591/drivers/scsi/nsp32.c
+--- linux-2.6.22-570/drivers/scsi/nsp32.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/nsp32.c 2007-12-21 15:36:12.000000000 -0500
+@@ -49,10 +49,6 @@
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_ioctl.h>
+
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+-# include <linux/blk.h>
+-#endif
+-
+ #include "nsp32.h"
+
+
+@@ -199,17 +195,9 @@
+ static void __exit exit_nsp32 (void);
+
+ /* struct struct scsi_host_template */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ static int nsp32_proc_info (struct Scsi_Host *, char *, char **, off_t, int, int);
+-#else
+-static int nsp32_proc_info (char *, char **, off_t, int, int, int);
+-#endif
+
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ static int nsp32_detect (struct pci_dev *pdev);
+-#else
+-static int nsp32_detect (struct scsi_host_template *);
+-#endif
+ static int nsp32_queuecommand(struct scsi_cmnd *,
+ void (*done)(struct scsi_cmnd *));
+ static const char *nsp32_info (struct Scsi_Host *);
+@@ -296,15 +284,7 @@
+ .eh_abort_handler = nsp32_eh_abort,
+ .eh_bus_reset_handler = nsp32_eh_bus_reset,
+ .eh_host_reset_handler = nsp32_eh_host_reset,
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74))
+- .detect = nsp32_detect,
+- .release = nsp32_release,
+-#endif
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,2))
+- .use_new_eh_code = 1,
+-#else
+ /* .highmem_io = 1, */
+-#endif
+ };
+
+ #include "nsp32_io.h"
+@@ -739,7 +719,7 @@
+ command = 0;
+ command |= (TRANSFER_GO | ALL_COUNTER_CLR);
+ if (data->trans_method & NSP32_TRANSFER_BUSMASTER) {
+- if (SCpnt->request_bufflen > 0) {
++ if (scsi_bufflen(SCpnt) > 0) {
+ command |= BM_START;
+ }
+ } else if (data->trans_method & NSP32_TRANSFER_MMIO) {
+@@ -888,31 +868,28 @@
+ static int nsp32_setup_sg_table(struct scsi_cmnd *SCpnt)
+ {
+ nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata;
+- struct scatterlist *sgl;
++ struct scatterlist *sg;
+ nsp32_sgtable *sgt = data->cur_lunt->sglun->sgt;
+ int num, i;
+ u32_le l;
+
+- if (SCpnt->request_bufflen == 0) {
+- return TRUE;
+- }
+-
+ if (sgt == NULL) {
+ nsp32_dbg(NSP32_DEBUG_SGLIST, "SGT == null");
+ return FALSE;
+ }
+
+- if (SCpnt->use_sg) {
+- sgl = (struct scatterlist *)SCpnt->request_buffer;
+- num = pci_map_sg(data->Pci, sgl, SCpnt->use_sg,
+- SCpnt->sc_data_direction);
+- for (i = 0; i < num; i++) {
++ num = scsi_dma_map(SCpnt);
++ if (!num)
++ return TRUE;
++ else if (num < 0)
++ return FALSE;
++ else {
++ scsi_for_each_sg(SCpnt, sg, num, i) {
+ /*
+ * Build nsp32_sglist, substitute sg dma addresses.
+ */
+- sgt[i].addr = cpu_to_le32(sg_dma_address(sgl));
+- sgt[i].len = cpu_to_le32(sg_dma_len(sgl));
+- sgl++;
++ sgt[i].addr = cpu_to_le32(sg_dma_address(sg));
++ sgt[i].len = cpu_to_le32(sg_dma_len(sg));
+
+ if (le32_to_cpu(sgt[i].len) > 0x10000) {
+ nsp32_msg(KERN_ERR,
+@@ -929,23 +906,6 @@
+ /* set end mark */
+ l = le32_to_cpu(sgt[num-1].len);
+ sgt[num-1].len = cpu_to_le32(l | SGTEND);
+-
+- } else {
+- SCpnt->SCp.have_data_in = pci_map_single(data->Pci,
+- SCpnt->request_buffer, SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+-
+- sgt[0].addr = cpu_to_le32(SCpnt->SCp.have_data_in);
+- sgt[0].len = cpu_to_le32(SCpnt->request_bufflen | SGTEND); /* set end mark */
+-
+- if (SCpnt->request_bufflen > 0x10000) {
+- nsp32_msg(KERN_ERR,
+- "can't transfer over 64KB at a time, size=0x%lx", SCpnt->request_bufflen);
+- return FALSE;
+- }
+- nsp32_dbg(NSP32_DEBUG_SGLIST, "single : addr 0x%lx len=0x%lx",
+- le32_to_cpu(sgt[0].addr),
+- le32_to_cpu(sgt[0].len ));
+ }
+
+ return TRUE;
+@@ -962,7 +922,7 @@
+ "enter. target: 0x%x LUN: 0x%x cmnd: 0x%x cmndlen: 0x%x "
+ "use_sg: 0x%x reqbuf: 0x%lx reqlen: 0x%x",
+ SCpnt->device->id, SCpnt->device->lun, SCpnt->cmnd[0], SCpnt->cmd_len,
+- SCpnt->use_sg, SCpnt->request_buffer, SCpnt->request_bufflen);
++ scsi_sg_count(SCpnt), scsi_sglist(SCpnt), scsi_bufflen(SCpnt));
+
+ if (data->CurrentSC != NULL) {
+ nsp32_msg(KERN_ERR, "Currentsc != NULL. Cancel this command request");
+@@ -994,10 +954,10 @@
+ data->CurrentSC = SCpnt;
+ SCpnt->SCp.Status = CHECK_CONDITION;
+ SCpnt->SCp.Message = 0;
+- SCpnt->resid = SCpnt->request_bufflen;
++ scsi_set_resid(SCpnt, scsi_bufflen(SCpnt));
+
+- SCpnt->SCp.ptr = (char *) SCpnt->request_buffer;
+- SCpnt->SCp.this_residual = SCpnt->request_bufflen;
++ SCpnt->SCp.ptr = (char *)scsi_sglist(SCpnt);
++ SCpnt->SCp.this_residual = scsi_bufflen(SCpnt);
+ SCpnt->SCp.buffer = NULL;
+ SCpnt->SCp.buffers_residual = 0;
+
+@@ -1210,13 +1170,9 @@
+ unsigned long flags;
+ int ret;
+ int handled = 0;
+-
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ struct Scsi_Host *host = data->Host;
++
+ spin_lock_irqsave(host->host_lock, flags);
+-#else
+- spin_lock_irqsave(&io_request_lock, flags);
+-#endif
+
+ /*
+ * IRQ check, then enable IRQ mask
+@@ -1312,7 +1268,7 @@
+ }
+
+ if ((auto_stat & DATA_IN_PHASE) &&
+- (SCpnt->resid > 0) &&
++ (scsi_get_resid(SCpnt) > 0) &&
+ ((nsp32_read2(base, FIFO_REST_CNT) & FIFO_REST_MASK) != 0)) {
+ printk( "auto+fifo\n");
+ //nsp32_pio_read(SCpnt);
+@@ -1333,7 +1289,7 @@
+ nsp32_dbg(NSP32_DEBUG_INTR, "SSACK=0x%lx",
+ nsp32_read4(base, SAVED_SACK_CNT));
+
+- SCpnt->resid = 0; /* all data transfered! */
++ scsi_set_resid(SCpnt, 0); /* all data transfered! */
+ }
+
+ /*
+@@ -1480,11 +1436,7 @@
+ nsp32_write2(base, IRQ_CONTROL, 0);
+
+ out2:
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ spin_unlock_irqrestore(host->host_lock, flags);
+-#else
+- spin_unlock_irqrestore(&io_request_lock, flags);
+-#endif
+
+ nsp32_dbg(NSP32_DEBUG_INTR, "exit");
+
+@@ -1499,28 +1451,15 @@
+ nsp32_dbg(NSP32_DEBUG_PROC, "buffer=0x%p pos=0x%p length=%d %d\n", buffer, pos, length, length - (pos - buffer));\
+ } \
+ } while(0)
+-static int nsp32_proc_info(
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+- struct Scsi_Host *host,
+-#endif
+- char *buffer,
+- char **start,
+- off_t offset,
+- int length,
+-#if !(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+- int hostno,
+-#endif
+- int inout)
++
++static int nsp32_proc_info(struct Scsi_Host *host, char *buffer, char **start,
++ off_t offset, int length, int inout)
+ {
+ char *pos = buffer;
+ int thislength;
+ unsigned long flags;
+ nsp32_hw_data *data;
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ int hostno;
+-#else
+- struct Scsi_Host *host;
+-#endif
+ unsigned int base;
+ unsigned char mode_reg;
+ int id, speed;
+@@ -1531,15 +1470,7 @@
+ return -EINVAL;
+ }
+
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ hostno = host->host_no;
+-#else
+- /* search this HBA host */
+- host = scsi_host_hn_get(hostno);
+- if (host == NULL) {
+- return -ESRCH;
+- }
+-#endif
+ data = (nsp32_hw_data *)host->hostdata;
+ base = host->io_port;
+
+@@ -1626,25 +1557,8 @@
+ nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata;
+ unsigned int base = SCpnt->device->host->io_port;
+
+- /*
+- * unmap pci
+- */
+- if (SCpnt->request_bufflen == 0) {
+- goto skip;
+- }
+-
+- if (SCpnt->use_sg) {
+- pci_unmap_sg(data->Pci,
+- (struct scatterlist *)SCpnt->request_buffer,
+- SCpnt->use_sg, SCpnt->sc_data_direction);
+- } else {
+- pci_unmap_single(data->Pci,
+- (u32)SCpnt->SCp.have_data_in,
+- SCpnt->request_bufflen,
+- SCpnt->sc_data_direction);
+- }
++ scsi_dma_unmap(SCpnt);
+
+- skip:
+ /*
+ * clear TRANSFERCONTROL_BM_START
+ */
+@@ -1800,7 +1714,7 @@
+ SCpnt->SCp.Message = 0;
+ nsp32_dbg(NSP32_DEBUG_BUSFREE,
+ "normal end stat=0x%x resid=0x%x\n",
+- SCpnt->SCp.Status, SCpnt->resid);
++ SCpnt->SCp.Status, scsi_get_resid(SCpnt));
+ SCpnt->result = (DID_OK << 16) |
+ (SCpnt->SCp.Message << 8) |
+ (SCpnt->SCp.Status << 0);
+@@ -1844,7 +1758,7 @@
+ unsigned int restlen, sentlen;
+ u32_le len, addr;
+
+- nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", SCpnt->resid);
++ nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", scsi_get_resid(SCpnt));
+
+ /* adjust saved SACK count with 4 byte start address boundary */
+ s_sacklen -= le32_to_cpu(sgt[old_entry].addr) & 3;
+@@ -1888,12 +1802,12 @@
+ return;
+
+ last:
+- if (SCpnt->resid < sentlen) {
++ if (scsi_get_resid(SCpnt) < sentlen) {
+ nsp32_msg(KERN_ERR, "resid underflow");
+ }
+
+- SCpnt->resid -= sentlen;
+- nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", SCpnt->resid);
++ scsi_set_resid(SCpnt, scsi_get_resid(SCpnt) - sentlen);
++ nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", scsi_get_resid(SCpnt));
+
+ /* update hostdata and lun */
+
+@@ -2022,7 +1936,7 @@
+ transfer = 0;
+ transfer |= (TRANSFER_GO | ALL_COUNTER_CLR);
+ if (data->trans_method & NSP32_TRANSFER_BUSMASTER) {
+- if (SCpnt->request_bufflen > 0) {
++ if (scsi_bufflen(SCpnt) > 0) {
+ transfer |= BM_START;
+ }
+ } else if (data->trans_method & NSP32_TRANSFER_MMIO) {
+@@ -2674,17 +2588,7 @@
+ * 0x900-0xbff: (map same 0x800-0x8ff I/O port image repeatedly)
+ * 0xc00-0xfff: CardBus status registers
+ */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+-#define DETECT_OK 0
+-#define DETECT_NG 1
+-#define PCIDEV pdev
+ static int nsp32_detect(struct pci_dev *pdev)
+-#else
+-#define DETECT_OK 1
+-#define DETECT_NG 0
+-#define PCIDEV (data->Pci)
+-static int nsp32_detect(struct scsi_host_template *sht)
+-#endif
+ {
+ struct Scsi_Host *host; /* registered host structure */
+ struct resource *res;
+@@ -2697,11 +2601,7 @@
+ /*
+ * register this HBA as SCSI device
+ */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ host = scsi_host_alloc(&nsp32_template, sizeof(nsp32_hw_data));
+-#else
+- host = scsi_register(sht, sizeof(nsp32_hw_data));
+-#endif
+ if (host == NULL) {
+ nsp32_msg (KERN_ERR, "failed to scsi register");
+ goto err;
+@@ -2719,9 +2619,6 @@
+ host->unique_id = data->BaseAddress;
+ host->n_io_port = data->NumAddress;
+ host->base = (unsigned long)data->MmioAddress;
+-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,63))
+- scsi_set_pci_device(host, PCIDEV);
+-#endif
+
+ data->Host = host;
+ spin_lock_init(&(data->Lock));
+@@ -2776,7 +2673,7 @@
+ /*
+ * setup DMA
+ */
+- if (pci_set_dma_mask(PCIDEV, DMA_32BIT_MASK) != 0) {
++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) {
+ nsp32_msg (KERN_ERR, "failed to set PCI DMA mask");
+ goto scsi_unregister;
+ }
+@@ -2784,7 +2681,7 @@
+ /*
+ * allocate autoparam DMA resource.
+ */
+- data->autoparam = pci_alloc_consistent(PCIDEV, sizeof(nsp32_autoparam), &(data->auto_paddr));
++ data->autoparam = pci_alloc_consistent(pdev, sizeof(nsp32_autoparam), &(data->auto_paddr));
+ if (data->autoparam == NULL) {
+ nsp32_msg(KERN_ERR, "failed to allocate DMA memory");
+ goto scsi_unregister;
+@@ -2793,7 +2690,7 @@
+ /*
+ * allocate scatter-gather DMA resource.
+ */
+- data->sg_list = pci_alloc_consistent(PCIDEV, NSP32_SG_TABLE_SIZE,
++ data->sg_list = pci_alloc_consistent(pdev, NSP32_SG_TABLE_SIZE,
+ &(data->sg_paddr));
+ if (data->sg_list == NULL) {
+ nsp32_msg(KERN_ERR, "failed to allocate DMA memory");
+@@ -2883,16 +2780,14 @@
+ goto free_irq;
+ }
+
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+- ret = scsi_add_host(host, &PCIDEV->dev);
++ ret = scsi_add_host(host, &pdev->dev);
+ if (ret) {
+ nsp32_msg(KERN_ERR, "failed to add scsi host");
+ goto free_region;
+ }
+ scsi_scan_host(host);
+-#endif
+- pci_set_drvdata(PCIDEV, host);
+- return DETECT_OK;
++ pci_set_drvdata(pdev, host);
++ return 0;
+
+ free_region:
+ release_region(host->io_port, host->n_io_port);
+@@ -2901,22 +2796,19 @@
+ free_irq(host->irq, data);
+
+ free_sg_list:
+- pci_free_consistent(PCIDEV, NSP32_SG_TABLE_SIZE,
++ pci_free_consistent(pdev, NSP32_SG_TABLE_SIZE,
+ data->sg_list, data->sg_paddr);
+
+ free_autoparam:
+- pci_free_consistent(PCIDEV, sizeof(nsp32_autoparam),
++ pci_free_consistent(pdev, sizeof(nsp32_autoparam),
+ data->autoparam, data->auto_paddr);
+
+ scsi_unregister:
+ scsi_host_put(host);
+
+ err:
+- return DETECT_NG;
++ return 1;
+ }
+-#undef DETECT_OK
+-#undef DETECT_NG
+-#undef PCIDEV
+
+ static int nsp32_release(struct Scsi_Host *host)
+ {
+@@ -3525,11 +3417,7 @@
+
+ pci_set_master(pdev);
+
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ ret = nsp32_detect(pdev);
+-#else
+- ret = scsi_register_host(&nsp32_template);
+-#endif
+
+ nsp32_msg(KERN_INFO, "irq: %i mmio: %p+0x%lx slot: %s model: %s",
+ pdev->irq,
+@@ -3544,25 +3432,17 @@
+
+ static void __devexit nsp32_remove(struct pci_dev *pdev)
+ {
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ struct Scsi_Host *host = pci_get_drvdata(pdev);
+-#endif
+
+ nsp32_dbg(NSP32_DEBUG_REGISTER, "enter");
+
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ scsi_remove_host(host);
+
+ nsp32_release(host);
+
+ scsi_host_put(host);
+-#else
+- scsi_unregister_host(&nsp32_template);
+-#endif
+ }
+
+-
+-
+ static struct pci_driver nsp32_driver = {
+ .name = "nsp32",
+ .id_table = nsp32_pci_table,
+diff -Nurb linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c
+--- linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -370,8 +370,6 @@
+ DEB(unsigned char seq_reg;)
+ unsigned char status, int_reg;
+ unsigned char pio_status;
+- struct scatterlist *sglist;
+- unsigned int sgcount;
+ int port_base = dev->io_port;
+ struct sym53c500_data *data =
+ (struct sym53c500_data *)dev->hostdata;
+@@ -434,20 +432,19 @@
+ switch (status & 0x07) { /* scsi phase */
+ case 0x00: /* DATA-OUT */
+ if (int_reg & 0x10) { /* Target requesting info transfer */
++ struct scatterlist *sg;
++ int i;
++
+ curSC->SCp.phase = data_out;
+ VDEB(printk("SYM53C500: Data-Out phase\n"));
+ outb(FLUSH_FIFO, port_base + CMD_REG);
+- LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */
++ LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */
+ outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG);
+- if (!curSC->use_sg) /* Don't use scatter-gather */
+- SYM53C500_pio_write(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen);
+- else { /* use scatter-gather */
+- sgcount = curSC->use_sg;
+- sglist = curSC->request_buffer;
+- while (sgcount--) {
+- SYM53C500_pio_write(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length);
+- sglist++;
+- }
++
++ scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
++ SYM53C500_pio_write(fast_pio, port_base,
++ page_address(sg->page) + sg->offset,
++ sg->length);
+ }
+ REG0(port_base);
+ }
+@@ -455,20 +452,19 @@
+
+ case 0x01: /* DATA-IN */
+ if (int_reg & 0x10) { /* Target requesting info transfer */
++ struct scatterlist *sg;
++ int i;
++
+ curSC->SCp.phase = data_in;
+ VDEB(printk("SYM53C500: Data-In phase\n"));
+ outb(FLUSH_FIFO, port_base + CMD_REG);
+- LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */
++ LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */
+ outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG);
+- if (!curSC->use_sg) /* Don't use scatter-gather */
+- SYM53C500_pio_read(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen);
+- else { /* Use scatter-gather */
+- sgcount = curSC->use_sg;
+- sglist = curSC->request_buffer;
+- while (sgcount--) {
+- SYM53C500_pio_read(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length);
+- sglist++;
+- }
++
++ scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
++ SYM53C500_pio_read(fast_pio, port_base,
++ page_address(sg->page) + sg->offset,
++ sg->length);
+ }
+ REG0(port_base);
+ }
+@@ -578,7 +574,7 @@
+
+ DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n",
+ SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->device->id,
+- SCpnt->device->lun, SCpnt->request_bufflen));
++ SCpnt->device->lun, scsi_bufflen(SCpnt)));
+
+ VDEB(for (i = 0; i < SCpnt->cmd_len; i++)
+ printk("cmd[%d]=%02x ", i, SCpnt->cmnd[i]));
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -11,8 +11,9 @@
+ /* SYSFS attributes --------------------------------------------------------- */
+
+ static ssize_t
+-qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -31,8 +32,9 @@
+ }
+
+ static ssize_t
+-qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -73,7 +75,6 @@
+ .attr = {
+ .name = "fw_dump",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 0,
+ .read = qla2x00_sysfs_read_fw_dump,
+@@ -81,8 +82,9 @@
+ };
+
+ static ssize_t
+-qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_read_nvram(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -101,8 +103,9 @@
+ }
+
+ static ssize_t
+-qla2x00_sysfs_write_nvram(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_write_nvram(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -149,7 +152,6 @@
+ .attr = {
+ .name = "nvram",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 512,
+ .read = qla2x00_sysfs_read_nvram,
+@@ -157,8 +159,9 @@
+ };
+
+ static ssize_t
+-qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_read_optrom(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -176,8 +179,9 @@
+ }
+
+ static ssize_t
+-qla2x00_sysfs_write_optrom(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_write_optrom(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -198,7 +202,6 @@
+ .attr = {
+ .name = "optrom",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = OPTROM_SIZE_24XX,
+ .read = qla2x00_sysfs_read_optrom,
+@@ -206,8 +209,9 @@
+ };
+
+ static ssize_t
+-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -279,15 +283,15 @@
+ .attr = {
+ .name = "optrom_ctl",
+ .mode = S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 0,
+ .write = qla2x00_sysfs_write_optrom_ctl,
+ };
+
+ static ssize_t
+-qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_read_vpd(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -305,8 +309,9 @@
+ }
+
+ static ssize_t
+-qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_write_vpd(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -327,7 +332,6 @@
+ .attr = {
+ .name = "vpd",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = 0,
+ .read = qla2x00_sysfs_read_vpd,
+@@ -335,8 +339,9 @@
+ };
+
+ static ssize_t
+-qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++qla2x00_sysfs_read_sfp(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+@@ -375,7 +380,6 @@
+ .attr = {
+ .name = "sfp",
+ .mode = S_IRUSR | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = SFP_DEV_SIZE * 2,
+ .read = qla2x00_sysfs_read_sfp,
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1411,9 +1411,9 @@
+ printk("0x%02x ", cmd->cmnd[i]);
+ }
+ printk("\n seg_cnt=%d, allowed=%d, retries=%d\n",
+- cmd->use_sg, cmd->allowed, cmd->retries);
++ scsi_sg_count(cmd), cmd->allowed, cmd->retries);
+ printk(" request buffer=0x%p, request buffer len=0x%x\n",
+- cmd->request_buffer, cmd->request_bufflen);
++ scsi_sglist(cmd), scsi_bufflen(cmd));
+ printk(" tag=%d, transfersize=0x%x\n",
+ cmd->tag, cmd->transfersize);
+ printk(" serial_number=%lx, SP=%p\n", cmd->serial_number, sp);
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -155,6 +155,8 @@
+ uint32_t *cur_dsd;
+ scsi_qla_host_t *ha;
+ struct scsi_cmnd *cmd;
++ struct scatterlist *sg;
++ int i;
+
+ cmd = sp->cmd;
+
+@@ -163,7 +165,7 @@
+ __constant_cpu_to_le32(COMMAND_TYPE);
+
+ /* No data transfer */
+- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ return;
+ }
+@@ -177,13 +179,8 @@
+ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+
+ /* Load data segments */
+- if (cmd->use_sg != 0) {
+- struct scatterlist *cur_seg;
+- struct scatterlist *end_seg;
+-
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- end_seg = cur_seg + tot_dsds;
+- while (cur_seg < end_seg) {
++
++ scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ cont_entry_t *cont_pkt;
+
+ /* Allocate additional continuation packets? */
+@@ -197,15 +194,9 @@
+ avail_dsds = 7;
+ }
+
+- *cur_dsd++ = cpu_to_le32(sg_dma_address(cur_seg));
+- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++ *cur_dsd++ = cpu_to_le32(sg_dma_address(sg));
++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ avail_dsds--;
+-
+- cur_seg++;
+- }
+- } else {
+- *cur_dsd++ = cpu_to_le32(sp->dma_handle);
+- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ }
+ }
+
+@@ -224,6 +215,8 @@
+ uint32_t *cur_dsd;
+ scsi_qla_host_t *ha;
+ struct scsi_cmnd *cmd;
++ struct scatterlist *sg;
++ int i;
+
+ cmd = sp->cmd;
+
+@@ -232,7 +225,7 @@
+ __constant_cpu_to_le32(COMMAND_A64_TYPE);
+
+ /* No data transfer */
+- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ return;
+ }
+@@ -246,13 +239,7 @@
+ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+
+ /* Load data segments */
+- if (cmd->use_sg != 0) {
+- struct scatterlist *cur_seg;
+- struct scatterlist *end_seg;
+-
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- end_seg = cur_seg + tot_dsds;
+- while (cur_seg < end_seg) {
++ scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ dma_addr_t sle_dma;
+ cont_a64_entry_t *cont_pkt;
+
+@@ -267,18 +254,11 @@
+ avail_dsds = 5;
+ }
+
+- sle_dma = sg_dma_address(cur_seg);
++ sle_dma = sg_dma_address(sg);
+ *cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+ *cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ avail_dsds--;
+-
+- cur_seg++;
+- }
+- } else {
+- *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle));
+- *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle));
+- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ }
+ }
+
+@@ -291,7 +271,7 @@
+ int
+ qla2x00_start_scsi(srb_t *sp)
+ {
+- int ret;
++ int ret, nseg;
+ unsigned long flags;
+ scsi_qla_host_t *ha;
+ struct scsi_cmnd *cmd;
+@@ -299,7 +279,6 @@
+ uint32_t index;
+ uint32_t handle;
+ cmd_entry_t *cmd_pkt;
+- struct scatterlist *sg;
+ uint16_t cnt;
+ uint16_t req_cnt;
+ uint16_t tot_dsds;
+@@ -337,23 +316,10 @@
+ goto queuing_error;
+
+ /* Map the sg table so we have an accurate count of sg entries needed */
+- if (cmd->use_sg) {
+- sg = (struct scatterlist *) cmd->request_buffer;
+- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- if (tot_dsds == 0)
++ nseg = scsi_dma_map(cmd);
++ if (nseg < 0)
+ goto queuing_error;
+- } else if (cmd->request_bufflen) {
+- dma_addr_t req_dma;
+-
+- req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- if (dma_mapping_error(req_dma))
+- goto queuing_error;
+-
+- sp->dma_handle = req_dma;
+- tot_dsds = 1;
+- }
++ tot_dsds = nseg;
+
+ /* Calculate the number of request entries needed. */
+ req_cnt = ha->isp_ops.calc_req_entries(tot_dsds);
+@@ -391,7 +357,7 @@
+
+ /* Load SCSI command packet. */
+ memcpy(cmd_pkt->scsi_cdb, cmd->cmnd, cmd->cmd_len);
+- cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen);
++ cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+
+ /* Build IOCB segments */
+ ha->isp_ops.build_iocbs(sp, cmd_pkt, tot_dsds);
+@@ -423,14 +389,9 @@
+ return (QLA_SUCCESS);
+
+ queuing_error:
+- if (cmd->use_sg && tot_dsds) {
+- sg = (struct scatterlist *) cmd->request_buffer;
+- pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- } else if (tot_dsds) {
+- pci_unmap_single(ha->pdev, sp->dma_handle,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- }
++ if (tot_dsds)
++ scsi_dma_unmap(cmd);
++
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ return (QLA_FUNCTION_FAILED);
+@@ -642,6 +603,8 @@
+ uint32_t *cur_dsd;
+ scsi_qla_host_t *ha;
+ struct scsi_cmnd *cmd;
++ struct scatterlist *sg;
++ int i;
+
+ cmd = sp->cmd;
+
+@@ -650,7 +613,7 @@
+ __constant_cpu_to_le32(COMMAND_TYPE_7);
+
+ /* No data transfer */
+- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ return;
+ }
+@@ -670,13 +633,8 @@
+ cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+
+ /* Load data segments */
+- if (cmd->use_sg != 0) {
+- struct scatterlist *cur_seg;
+- struct scatterlist *end_seg;
+-
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- end_seg = cur_seg + tot_dsds;
+- while (cur_seg < end_seg) {
++
++ scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ dma_addr_t sle_dma;
+ cont_a64_entry_t *cont_pkt;
+
+@@ -691,18 +649,11 @@
+ avail_dsds = 5;
+ }
+
+- sle_dma = sg_dma_address(cur_seg);
++ sle_dma = sg_dma_address(sg);
+ *cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+ *cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ avail_dsds--;
+-
+- cur_seg++;
+- }
+- } else {
+- *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle));
+- *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle));
+- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ }
+ }
+
+@@ -716,7 +667,7 @@
+ int
+ qla24xx_start_scsi(srb_t *sp)
+ {
+- int ret;
++ int ret, nseg;
+ unsigned long flags;
+ scsi_qla_host_t *ha;
+ struct scsi_cmnd *cmd;
+@@ -724,7 +675,6 @@
+ uint32_t index;
+ uint32_t handle;
+ struct cmd_type_7 *cmd_pkt;
+- struct scatterlist *sg;
+ uint16_t cnt;
+ uint16_t req_cnt;
+ uint16_t tot_dsds;
+@@ -762,23 +712,10 @@
+ goto queuing_error;
+
+ /* Map the sg table so we have an accurate count of sg entries needed */
+- if (cmd->use_sg) {
+- sg = (struct scatterlist *) cmd->request_buffer;
+- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- if (tot_dsds == 0)
+- goto queuing_error;
+- } else if (cmd->request_bufflen) {
+- dma_addr_t req_dma;
+-
+- req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- if (dma_mapping_error(req_dma))
++ nseg = scsi_dma_map(cmd);
++ if (nseg < 0)
+ goto queuing_error;
+-
+- sp->dma_handle = req_dma;
+- tot_dsds = 1;
+- }
++ tot_dsds = nseg;
+
+ req_cnt = qla24xx_calc_iocbs(tot_dsds);
+ if (ha->req_q_cnt < (req_cnt + 2)) {
+@@ -821,7 +758,7 @@
+ memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len);
+ host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb));
+
+- cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen);
++ cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+
+ /* Build IOCB segments */
+ qla24xx_build_scsi_iocbs(sp, cmd_pkt, tot_dsds);
+@@ -853,14 +790,9 @@
+ return QLA_SUCCESS;
+
+ queuing_error:
+- if (cmd->use_sg && tot_dsds) {
+- sg = (struct scatterlist *) cmd->request_buffer;
+- pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- } else if (tot_dsds) {
+- pci_unmap_single(ha->pdev, sp->dma_handle,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- }
++ if (tot_dsds)
++ scsi_dma_unmap(cmd);
++
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ return QLA_FUNCTION_FAILED;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -889,11 +889,11 @@
+ }
+ if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) {
+ resid = resid_len;
+- cp->resid = resid;
++ scsi_set_resid(cp, resid);
+ CMD_RESID_LEN(cp) = resid;
+
+ if (!lscsi_status &&
+- ((unsigned)(cp->request_bufflen - resid) <
++ ((unsigned)(scsi_bufflen(cp) - resid) <
+ cp->underflow)) {
+ qla_printk(KERN_INFO, ha,
+ "scsi(%ld:%d:%d:%d): Mid-layer underflow "
+@@ -901,7 +901,7 @@
+ "error status.\n", ha->host_no,
+ cp->device->channel, cp->device->id,
+ cp->device->lun, resid,
+- cp->request_bufflen);
++ scsi_bufflen(cp));
+
+ cp->result = DID_ERROR << 16;
+ break;
+@@ -963,7 +963,7 @@
+ resid = fw_resid_len;
+
+ if (scsi_status & SS_RESIDUAL_UNDER) {
+- cp->resid = resid;
++ scsi_set_resid(cp, resid);
+ CMD_RESID_LEN(cp) = resid;
+ } else {
+ DEBUG2(printk(KERN_INFO
+@@ -1046,14 +1046,14 @@
+ "retrying command.\n", ha->host_no,
+ cp->device->channel, cp->device->id,
+ cp->device->lun, resid,
+- cp->request_bufflen));
++ scsi_bufflen(cp)));
+
+ cp->result = DID_BUS_BUSY << 16;
+ break;
+ }
+
+ /* Handle mid-layer underflow */
+- if ((unsigned)(cp->request_bufflen - resid) <
++ if ((unsigned)(scsi_bufflen(cp) - resid) <
+ cp->underflow) {
+ qla_printk(KERN_INFO, ha,
+ "scsi(%ld:%d:%d:%d): Mid-layer underflow "
+@@ -1061,7 +1061,7 @@
+ "error status.\n", ha->host_no,
+ cp->device->channel, cp->device->id,
+ cp->device->lun, resid,
+- cp->request_bufflen);
++ scsi_bufflen(cp));
+
+ cp->result = DID_ERROR << 16;
+ break;
+@@ -1084,7 +1084,7 @@
+ DEBUG2(printk(KERN_INFO
+ "PID=0x%lx req=0x%x xtra=0x%x -- returning DID_ERROR "
+ "status!\n",
+- cp->serial_number, cp->request_bufflen, resid_len));
++ cp->serial_number, scsi_bufflen(cp), resid_len));
+
+ cp->result = DID_ERROR << 16;
+ break;
+@@ -1633,7 +1633,7 @@
+ uint16_t entry;
+ uint16_t index;
+ const char *name;
+- irqreturn_t (*handler)(int, void *);
++ irq_handler_t handler;
+ };
+
+ static struct qla_init_msix_entry imsix_entries[QLA_MSIX_ENTRIES] = {
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2426,13 +2426,7 @@
+ struct scsi_cmnd *cmd = sp->cmd;
+
+ if (sp->flags & SRB_DMA_VALID) {
+- if (cmd->use_sg) {
+- dma_unmap_sg(&ha->pdev->dev, cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- } else if (cmd->request_bufflen) {
+- dma_unmap_single(&ha->pdev->dev, sp->dma_handle,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+ sp->flags &= ~SRB_DMA_VALID;
+ }
+ CMD_SP(cmd) = NULL;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,176 +6,9 @@
+ */
+
+ #include "ql4_def.h"
+-#include <scsi/scsi_dbg.h>
+-
+-#if 0
+-
+-static void qla4xxx_print_srb_info(struct srb * srb)
+-{
+- printk("%s: srb = 0x%p, flags=0x%02x\n", __func__, srb, srb->flags);
+- printk("%s: cmd = 0x%p, saved_dma_handle = 0x%lx\n",
+- __func__, srb->cmd, (unsigned long) srb->dma_handle);
+- printk("%s: fw_ddb_index = %d, lun = %d\n",
+- __func__, srb->fw_ddb_index, srb->cmd->device->lun);
+- printk("%s: iocb_tov = %d\n",
+- __func__, srb->iocb_tov);
+- printk("%s: cc_stat = 0x%x, r_start = 0x%lx, u_start = 0x%lx\n\n",
+- __func__, srb->cc_stat, srb->r_start, srb->u_start);
+-}
+-
+-void qla4xxx_print_scsi_cmd(struct scsi_cmnd *cmd)
+-{
+- printk("SCSI Command = 0x%p, Handle=0x%p\n", cmd, cmd->host_scribble);
+- printk(" b=%d, t=%02xh, l=%02xh, cmd_len = %02xh\n",
+- cmd->device->channel, cmd->device->id, cmd->device->lun,
+- cmd->cmd_len);
+- scsi_print_command(cmd);
+- printk(" seg_cnt = %d\n", cmd->use_sg);
+- printk(" request buffer = 0x%p, request buffer len = 0x%x\n",
+- cmd->request_buffer, cmd->request_bufflen);
+- if (cmd->use_sg) {
+- struct scatterlist *sg;
+- sg = (struct scatterlist *)cmd->request_buffer;
+- printk(" SG buffer: \n");
+- qla4xxx_dump_buffer((caddr_t) sg,
+- (cmd->use_sg * sizeof(*sg)));
+- }
+- printk(" tag = %d, transfersize = 0x%x \n", cmd->tag,
+- cmd->transfersize);
+- printk(" Pid = %d, SP = 0x%p\n", (int)cmd->pid, cmd->SCp.ptr);
+- printk(" underflow size = 0x%x, direction=0x%x\n", cmd->underflow,
+- cmd->sc_data_direction);
+- printk(" Current time (jiffies) = 0x%lx, "
+- "timeout expires = 0x%lx\n", jiffies, cmd->eh_timeout.expires);
+- qla4xxx_print_srb_info((struct srb *) cmd->SCp.ptr);
+-}
+-
+-void __dump_registers(struct scsi_qla_host *ha)
+-{
+- uint8_t i;
+- for (i = 0; i < MBOX_REG_COUNT; i++) {
+- printk(KERN_INFO "0x%02X mailbox[%d] = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, mailbox[i]), i,
+- readw(&ha->reg->mailbox[i]));
+- }
+- printk(KERN_INFO "0x%02X flash_address = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, flash_address),
+- readw(&ha->reg->flash_address));
+- printk(KERN_INFO "0x%02X flash_data = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, flash_data),
+- readw(&ha->reg->flash_data));
+- printk(KERN_INFO "0x%02X ctrl_status = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, ctrl_status),
+- readw(&ha->reg->ctrl_status));
+- if (is_qla4010(ha)) {
+- printk(KERN_INFO "0x%02X nvram = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, u1.isp4010.nvram),
+- readw(&ha->reg->u1.isp4010.nvram));
+- }
+-
+- else if (is_qla4022(ha) | is_qla4032(ha)) {
+- printk(KERN_INFO "0x%02X intr_mask = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u1.isp4022.intr_mask),
+- readw(&ha->reg->u1.isp4022.intr_mask));
+- printk(KERN_INFO "0x%02X nvram = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, u1.isp4022.nvram),
+- readw(&ha->reg->u1.isp4022.nvram));
+- printk(KERN_INFO "0x%02X semaphore = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u1.isp4022.semaphore),
+- readw(&ha->reg->u1.isp4022.semaphore));
+- }
+- printk(KERN_INFO "0x%02X req_q_in = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, req_q_in),
+- readw(&ha->reg->req_q_in));
+- printk(KERN_INFO "0x%02X rsp_q_out = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, rsp_q_out),
+- readw(&ha->reg->rsp_q_out));
+- if (is_qla4010(ha)) {
+- printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4010.ext_hw_conf),
+- readw(&ha->reg->u2.isp4010.ext_hw_conf));
+- printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4010.port_ctrl),
+- readw(&ha->reg->u2.isp4010.port_ctrl));
+- printk(KERN_INFO "0x%02X port_status = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4010.port_status),
+- readw(&ha->reg->u2.isp4010.port_status));
+- printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4010.req_q_out),
+- readw(&ha->reg->u2.isp4010.req_q_out));
+- printk(KERN_INFO "0x%02X gp_out = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_out),
+- readw(&ha->reg->u2.isp4010.gp_out));
+- printk(KERN_INFO "0x%02X gp_in = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_in),
+- readw(&ha->reg->u2.isp4010.gp_in));
+- printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4010.port_err_status),
+- readw(&ha->reg->u2.isp4010.port_err_status));
+- }
+-
+- else if (is_qla4022(ha) | is_qla4032(ha)) {
+- printk(KERN_INFO "Page 0 Registers:\n");
+- printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p0.ext_hw_conf),
+- readw(&ha->reg->u2.isp4022.p0.ext_hw_conf));
+- printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p0.port_ctrl),
+- readw(&ha->reg->u2.isp4022.p0.port_ctrl));
+- printk(KERN_INFO "0x%02X port_status = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p0.port_status),
+- readw(&ha->reg->u2.isp4022.p0.port_status));
+- printk(KERN_INFO "0x%02X gp_out = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p0.gp_out),
+- readw(&ha->reg->u2.isp4022.p0.gp_out));
+- printk(KERN_INFO "0x%02X gp_in = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg, u2.isp4022.p0.gp_in),
+- readw(&ha->reg->u2.isp4022.p0.gp_in));
+- printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p0.port_err_status),
+- readw(&ha->reg->u2.isp4022.p0.port_err_status));
+- printk(KERN_INFO "Page 1 Registers:\n");
+- writel(HOST_MEM_CFG_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT),
+- &ha->reg->ctrl_status);
+- printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n",
+- (uint8_t) offsetof(struct isp_reg,
+- u2.isp4022.p1.req_q_out),
+- readw(&ha->reg->u2.isp4022.p1.req_q_out));
+- writel(PORT_CTRL_STAT_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT),
+- &ha->reg->ctrl_status);
+- }
+-}
+-
+-void qla4xxx_dump_mbox_registers(struct scsi_qla_host *ha)
+-{
+- unsigned long flags = 0;
+- int i = 0;
+- spin_lock_irqsave(&ha->hardware_lock, flags);
+- for (i = 1; i < MBOX_REG_COUNT; i++)
+- printk(KERN_INFO " Mailbox[%d] = %08x\n", i,
+- readw(&ha->reg->mailbox[i]));
+- spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-}
+-
+-void qla4xxx_dump_registers(struct scsi_qla_host *ha)
+-{
+- unsigned long flags = 0;
+- spin_lock_irqsave(&ha->hardware_lock, flags);
+- __dump_registers(ha);
+- spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-}
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+ void qla4xxx_dump_buffer(void *b, uint32_t size)
+ {
+@@ -198,4 +31,3 @@
+ printk(KERN_DEBUG "\n");
+ }
+
+-#endif /* 0 */
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h 2007-12-21 15:36:12.000000000 -0500
+@@ -122,8 +122,7 @@
+
+ #define ISCSI_IPADDR_SIZE 4 /* IP address size */
+ #define ISCSI_ALIAS_SIZE 32 /* ISCSI Alais name size */
+-#define ISCSI_NAME_SIZE 255 /* ISCSI Name size -
+- * usually a string */
++#define ISCSI_NAME_SIZE 0xE0 /* ISCSI Name size */
+
+ #define LSDW(x) ((u32)((u64)(x)))
+ #define MSDW(x) ((u32)((((u64)(x)) >> 16) >> 16))
+@@ -187,7 +186,19 @@
+ u_long u_start; /* Time when we handed the cmd to F/W */
+ };
+
+- /*
++/*
++ * Asynchronous Event Queue structure
++ */
++struct aen {
++ uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
++};
++
++struct ql4_aen_log {
++ int count;
++ struct aen entry[MAX_AEN_ENTRIES];
++};
++
++/*
+ * Device Database (DDB) structure
+ */
+ struct ddb_entry {
+@@ -254,13 +265,6 @@
+ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */
+ #define DF_FO_MASKED 3
+
+-/*
+- * Asynchronous Event Queue structure
+- */
+-struct aen {
+- uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
+-};
+-
+
+ #include "ql4_fw.h"
+ #include "ql4_nvram.h"
+@@ -270,20 +274,17 @@
+ */
+ struct scsi_qla_host {
+ /* Linux adapter configuration data */
+- struct Scsi_Host *host; /* pointer to host data */
+- uint32_t tot_ddbs;
+ unsigned long flags;
+
+ #define AF_ONLINE 0 /* 0x00000001 */
+ #define AF_INIT_DONE 1 /* 0x00000002 */
+ #define AF_MBOX_COMMAND 2 /* 0x00000004 */
+ #define AF_MBOX_COMMAND_DONE 3 /* 0x00000008 */
+-#define AF_INTERRUPTS_ON 6 /* 0x00000040 Not Used */
++#define AF_INTERRUPTS_ON 6 /* 0x00000040 */
+ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */
+ #define AF_LINK_UP 8 /* 0x00000100 */
+ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */
+-#define AF_ISNS_CMD_IN_PROCESS 12 /* 0x00001000 */
+-#define AF_ISNS_CMD_DONE 13 /* 0x00002000 */
++#define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */
+
+ unsigned long dpc_flags;
+
+@@ -296,6 +297,9 @@
+ #define DPC_AEN 9 /* 0x00000200 */
+ #define DPC_GET_DHCP_IP_ADDR 15 /* 0x00008000 */
+
++ struct Scsi_Host *host; /* pointer to host data */
++ uint32_t tot_ddbs;
++
+ uint16_t iocb_cnt;
+ uint16_t iocb_hiwat;
+
+@@ -344,6 +348,7 @@
+ uint32_t firmware_version[2];
+ uint32_t patch_number;
+ uint32_t build_number;
++ uint32_t board_id;
+
+ /* --- From Init_FW --- */
+ /* init_cb_t *init_cb; */
+@@ -363,7 +368,6 @@
+
+ /* --- From GetFwState --- */
+ uint32_t firmware_state;
+- uint32_t board_id;
+ uint32_t addl_fw_state;
+
+ /* Linux kernel thread */
+@@ -414,6 +418,8 @@
+ uint16_t aen_out;
+ struct aen aen_q[MAX_AEN_ENTRIES];
+
++ struct ql4_aen_log aen_log;/* tracks all aens */
++
+ /* This mutex protects several threads to do mailbox commands
+ * concurrently.
+ */
+@@ -585,10 +591,4 @@
+ #define FLUSH_DDB_CHANGED_AENS 1
+ #define RELOGIN_DDB_CHANGED_AENS 2
+
+-#include "ql4_version.h"
+-#include "ql4_glbl.h"
+-#include "ql4_dbg.h"
+-#include "ql4_inline.h"
+-
+-
+ #endif /*_QLA4XXX_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h 2007-12-21 15:36:12.000000000 -0500
+@@ -20,143 +20,23 @@
+ *************************************************************************/
+
+ struct port_ctrl_stat_regs {
+- __le32 ext_hw_conf; /* 80 x50 R/W */
+- __le32 intChipConfiguration; /* 84 x54 */
+- __le32 port_ctrl; /* 88 x58 */
+- __le32 port_status; /* 92 x5c */
+- __le32 HostPrimMACHi; /* 96 x60 */
+- __le32 HostPrimMACLow; /* 100 x64 */
+- __le32 HostSecMACHi; /* 104 x68 */
+- __le32 HostSecMACLow; /* 108 x6c */
+- __le32 EPPrimMACHi; /* 112 x70 */
+- __le32 EPPrimMACLow; /* 116 x74 */
+- __le32 EPSecMACHi; /* 120 x78 */
+- __le32 EPSecMACLow; /* 124 x7c */
+- __le32 HostPrimIPHi; /* 128 x80 */
+- __le32 HostPrimIPMidHi; /* 132 x84 */
+- __le32 HostPrimIPMidLow; /* 136 x88 */
+- __le32 HostPrimIPLow; /* 140 x8c */
+- __le32 HostSecIPHi; /* 144 x90 */
+- __le32 HostSecIPMidHi; /* 148 x94 */
+- __le32 HostSecIPMidLow; /* 152 x98 */
+- __le32 HostSecIPLow; /* 156 x9c */
+- __le32 EPPrimIPHi; /* 160 xa0 */
+- __le32 EPPrimIPMidHi; /* 164 xa4 */
+- __le32 EPPrimIPMidLow; /* 168 xa8 */
+- __le32 EPPrimIPLow; /* 172 xac */
+- __le32 EPSecIPHi; /* 176 xb0 */
+- __le32 EPSecIPMidHi; /* 180 xb4 */
+- __le32 EPSecIPMidLow; /* 184 xb8 */
+- __le32 EPSecIPLow; /* 188 xbc */
+- __le32 IPReassemblyTimeout; /* 192 xc0 */
+- __le32 EthMaxFramePayload; /* 196 xc4 */
+- __le32 TCPMaxWindowSize; /* 200 xc8 */
+- __le32 TCPCurrentTimestampHi; /* 204 xcc */
+- __le32 TCPCurrentTimestampLow; /* 208 xd0 */
+- __le32 LocalRAMAddress; /* 212 xd4 */
+- __le32 LocalRAMData; /* 216 xd8 */
+- __le32 PCSReserved1; /* 220 xdc */
+- __le32 gp_out; /* 224 xe0 */
+- __le32 gp_in; /* 228 xe4 */
+- __le32 ProbeMuxAddr; /* 232 xe8 */
+- __le32 ProbeMuxData; /* 236 xec */
+- __le32 ERMQueueBaseAddr0; /* 240 xf0 */
+- __le32 ERMQueueBaseAddr1; /* 244 xf4 */
+- __le32 MACConfiguration; /* 248 xf8 */
+- __le32 port_err_status; /* 252 xfc COR */
++ __le32 ext_hw_conf; /* 0x50 R/W */
++ __le32 rsrvd0; /* 0x54 */
++ __le32 port_ctrl; /* 0x58 */
++ __le32 port_status; /* 0x5c */
++ __le32 rsrvd1[32]; /* 0x60-0xdf */
++ __le32 gp_out; /* 0xe0 */
++ __le32 gp_in; /* 0xe4 */
++ __le32 rsrvd2[5]; /* 0xe8-0xfb */
++ __le32 port_err_status; /* 0xfc */
+ };
+
+ struct host_mem_cfg_regs {
+- __le32 NetRequestQueueOut; /* 80 x50 */
+- __le32 NetRequestQueueOutAddrHi; /* 84 x54 */
+- __le32 NetRequestQueueOutAddrLow; /* 88 x58 */
+- __le32 NetRequestQueueBaseAddrHi; /* 92 x5c */
+- __le32 NetRequestQueueBaseAddrLow; /* 96 x60 */
+- __le32 NetRequestQueueLength; /* 100 x64 */
+- __le32 NetResponseQueueIn; /* 104 x68 */
+- __le32 NetResponseQueueInAddrHi; /* 108 x6c */
+- __le32 NetResponseQueueInAddrLow; /* 112 x70 */
+- __le32 NetResponseQueueBaseAddrHi; /* 116 x74 */
+- __le32 NetResponseQueueBaseAddrLow; /* 120 x78 */
+- __le32 NetResponseQueueLength; /* 124 x7c */
+- __le32 req_q_out; /* 128 x80 */
+- __le32 RequestQueueOutAddrHi; /* 132 x84 */
+- __le32 RequestQueueOutAddrLow; /* 136 x88 */
+- __le32 RequestQueueBaseAddrHi; /* 140 x8c */
+- __le32 RequestQueueBaseAddrLow; /* 144 x90 */
+- __le32 RequestQueueLength; /* 148 x94 */
+- __le32 ResponseQueueIn; /* 152 x98 */
+- __le32 ResponseQueueInAddrHi; /* 156 x9c */
+- __le32 ResponseQueueInAddrLow; /* 160 xa0 */
+- __le32 ResponseQueueBaseAddrHi; /* 164 xa4 */
+- __le32 ResponseQueueBaseAddrLow; /* 168 xa8 */
+- __le32 ResponseQueueLength; /* 172 xac */
+- __le32 NetRxLargeBufferQueueOut; /* 176 xb0 */
+- __le32 NetRxLargeBufferQueueBaseAddrHi; /* 180 xb4 */
+- __le32 NetRxLargeBufferQueueBaseAddrLow; /* 184 xb8 */
+- __le32 NetRxLargeBufferQueueLength; /* 188 xbc */
+- __le32 NetRxLargeBufferLength; /* 192 xc0 */
+- __le32 NetRxSmallBufferQueueOut; /* 196 xc4 */
+- __le32 NetRxSmallBufferQueueBaseAddrHi; /* 200 xc8 */
+- __le32 NetRxSmallBufferQueueBaseAddrLow; /* 204 xcc */
+- __le32 NetRxSmallBufferQueueLength; /* 208 xd0 */
+- __le32 NetRxSmallBufferLength; /* 212 xd4 */
+- __le32 HMCReserved0[10]; /* 216 xd8 */
+-};
+-
+-struct local_ram_cfg_regs {
+- __le32 BufletSize; /* 80 x50 */
+- __le32 BufletMaxCount; /* 84 x54 */
+- __le32 BufletCurrCount; /* 88 x58 */
+- __le32 BufletPauseThresholdCount; /* 92 x5c */
+- __le32 BufletTCPWinThresholdHi; /* 96 x60 */
+- __le32 BufletTCPWinThresholdLow; /* 100 x64 */
+- __le32 IPHashTableBaseAddr; /* 104 x68 */
+- __le32 IPHashTableSize; /* 108 x6c */
+- __le32 TCPHashTableBaseAddr; /* 112 x70 */
+- __le32 TCPHashTableSize; /* 116 x74 */
+- __le32 NCBAreaBaseAddr; /* 120 x78 */
+- __le32 NCBMaxCount; /* 124 x7c */
+- __le32 NCBCurrCount; /* 128 x80 */
+- __le32 DRBAreaBaseAddr; /* 132 x84 */
+- __le32 DRBMaxCount; /* 136 x88 */
+- __le32 DRBCurrCount; /* 140 x8c */
+- __le32 LRCReserved[28]; /* 144 x90 */
+-};
+-
+-struct prot_stat_regs {
+- __le32 MACTxFrameCount; /* 80 x50 R */
+- __le32 MACTxByteCount; /* 84 x54 R */
+- __le32 MACRxFrameCount; /* 88 x58 R */
+- __le32 MACRxByteCount; /* 92 x5c R */
+- __le32 MACCRCErrCount; /* 96 x60 R */
+- __le32 MACEncErrCount; /* 100 x64 R */
+- __le32 MACRxLengthErrCount; /* 104 x68 R */
+- __le32 IPTxPacketCount; /* 108 x6c R */
+- __le32 IPTxByteCount; /* 112 x70 R */
+- __le32 IPTxFragmentCount; /* 116 x74 R */
+- __le32 IPRxPacketCount; /* 120 x78 R */
+- __le32 IPRxByteCount; /* 124 x7c R */
+- __le32 IPRxFragmentCount; /* 128 x80 R */
+- __le32 IPDatagramReassemblyCount; /* 132 x84 R */
+- __le32 IPV6RxPacketCount; /* 136 x88 R */
+- __le32 IPErrPacketCount; /* 140 x8c R */
+- __le32 IPReassemblyErrCount; /* 144 x90 R */
+- __le32 TCPTxSegmentCount; /* 148 x94 R */
+- __le32 TCPTxByteCount; /* 152 x98 R */
+- __le32 TCPRxSegmentCount; /* 156 x9c R */
+- __le32 TCPRxByteCount; /* 160 xa0 R */
+- __le32 TCPTimerExpCount; /* 164 xa4 R */
+- __le32 TCPRxAckCount; /* 168 xa8 R */
+- __le32 TCPTxAckCount; /* 172 xac R */
+- __le32 TCPRxErrOOOCount; /* 176 xb0 R */
+- __le32 PSReserved0; /* 180 xb4 */
+- __le32 TCPRxWindowProbeUpdateCount; /* 184 xb8 R */
+- __le32 ECCErrCorrectionCount; /* 188 xbc R */
+- __le32 PSReserved1[16]; /* 192 xc0 */
++ __le32 rsrvd0[12]; /* 0x50-0x79 */
++ __le32 req_q_out; /* 0x80 */
++ __le32 rsrvd1[31]; /* 0x84-0xFF */
+ };
+
+-
+ /* remote register set (access via PCI memory read/write) */
+ struct isp_reg {
+ #define MBOX_REG_COUNT 8
+@@ -207,11 +87,7 @@
+ union {
+ struct port_ctrl_stat_regs p0;
+ struct host_mem_cfg_regs p1;
+- struct local_ram_cfg_regs p2;
+- struct prot_stat_regs p3;
+- __le32 r_union[44];
+ };
+-
+ } __attribute__ ((packed)) isp4022;
+ } u2;
+ }; /* 256 x100 */
+@@ -296,6 +172,7 @@
+ /* ISP Semaphore definitions */
+
+ /* ISP General Purpose Output definitions */
++#define GPOR_TOPCAT_RESET 0x00000004
+
+ /* shadow registers (DMA'd from HA to system memory. read only) */
+ struct shadow_regs {
+@@ -337,6 +214,7 @@
+
+ /* Mailbox command definitions */
+ #define MBOX_CMD_ABOUT_FW 0x0009
++#define MBOX_CMD_PING 0x000B
+ #define MBOX_CMD_LUN_RESET 0x0016
+ #define MBOX_CMD_GET_MANAGEMENT_DATA 0x001E
+ #define MBOX_CMD_GET_FW_STATUS 0x001F
+@@ -364,6 +242,17 @@
+ #define MBOX_CMD_GET_FW_STATE 0x0069
+ #define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS 0x006A
+ #define MBOX_CMD_RESTORE_FACTORY_DEFAULTS 0x0087
++#define MBOX_CMD_SET_ACB 0x0088
++#define MBOX_CMD_GET_ACB 0x0089
++#define MBOX_CMD_DISABLE_ACB 0x008A
++#define MBOX_CMD_GET_IPV6_NEIGHBOR_CACHE 0x008B
++#define MBOX_CMD_GET_IPV6_DEST_CACHE 0x008C
++#define MBOX_CMD_GET_IPV6_DEF_ROUTER_LIST 0x008D
++#define MBOX_CMD_GET_IPV6_LCL_PREFIX_LIST 0x008E
++#define MBOX_CMD_SET_IPV6_NEIGHBOR_CACHE 0x0090
++#define MBOX_CMD_GET_IP_ADDR_STATE 0x0091
++#define MBOX_CMD_SEND_IPV6_ROUTER_SOL 0x0092
++#define MBOX_CMD_GET_DB_ENTRY_CURRENT_IP_ADDR 0x0093
+
+ /* Mailbox 1 */
+ #define FW_STATE_READY 0x0000
+@@ -409,6 +298,16 @@
+ #define MBOX_ASTS_DHCP_LEASE_EXPIRED 0x801D
+ #define MBOX_ASTS_DHCP_LEASE_ACQUIRED 0x801F
+ #define MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED 0x8021
++#define MBOX_ASTS_DUPLICATE_IP 0x8025
++#define MBOX_ASTS_ARP_COMPLETE 0x8026
++#define MBOX_ASTS_SUBNET_STATE_CHANGE 0x8027
++#define MBOX_ASTS_RESPONSE_QUEUE_FULL 0x8028
++#define MBOX_ASTS_IP_ADDR_STATE_CHANGED 0x8029
++#define MBOX_ASTS_IPV6_PREFIX_EXPIRED 0x802B
++#define MBOX_ASTS_IPV6_ND_PREFIX_IGNORED 0x802C
++#define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D
++#define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E
++
+ #define ISNS_EVENT_DATA_RECEIVED 0x0000
+ #define ISNS_EVENT_CONNECTION_OPENED 0x0001
+ #define ISNS_EVENT_CONNECTION_FAILED 0x0002
+@@ -418,137 +317,166 @@
+ /*************************************************************************/
+
+ /* Host Adapter Initialization Control Block (from host) */
+-struct init_fw_ctrl_blk {
+- uint8_t Version; /* 00 */
+- uint8_t Control; /* 01 */
++struct addr_ctrl_blk {
++ uint8_t version; /* 00 */
++ uint8_t control; /* 01 */
+
+- uint16_t FwOptions; /* 02-03 */
++ uint16_t fw_options; /* 02-03 */
+ #define FWOPT_HEARTBEAT_ENABLE 0x1000
+ #define FWOPT_SESSION_MODE 0x0040
+ #define FWOPT_INITIATOR_MODE 0x0020
+ #define FWOPT_TARGET_MODE 0x0010
+
+- uint16_t ExecThrottle; /* 04-05 */
+- uint8_t RetryCount; /* 06 */
+- uint8_t RetryDelay; /* 07 */
+- uint16_t MaxEthFrPayloadSize; /* 08-09 */
+- uint16_t AddFwOptions; /* 0A-0B */
+-
+- uint8_t HeartbeatInterval; /* 0C */
+- uint8_t InstanceNumber; /* 0D */
+- uint16_t RES2; /* 0E-0F */
+- uint16_t ReqQConsumerIndex; /* 10-11 */
+- uint16_t ComplQProducerIndex; /* 12-13 */
+- uint16_t ReqQLen; /* 14-15 */
+- uint16_t ComplQLen; /* 16-17 */
+- uint32_t ReqQAddrLo; /* 18-1B */
+- uint32_t ReqQAddrHi; /* 1C-1F */
+- uint32_t ComplQAddrLo; /* 20-23 */
+- uint32_t ComplQAddrHi; /* 24-27 */
+- uint32_t ShadowRegBufAddrLo; /* 28-2B */
+- uint32_t ShadowRegBufAddrHi; /* 2C-2F */
+-
+- uint16_t iSCSIOptions; /* 30-31 */
+-
+- uint16_t TCPOptions; /* 32-33 */
+-
+- uint16_t IPOptions; /* 34-35 */
+-
+- uint16_t MaxPDUSize; /* 36-37 */
+- uint16_t RcvMarkerInt; /* 38-39 */
+- uint16_t SndMarkerInt; /* 3A-3B */
+- uint16_t InitMarkerlessInt; /* 3C-3D */
+- uint16_t FirstBurstSize; /* 3E-3F */
+- uint16_t DefaultTime2Wait; /* 40-41 */
+- uint16_t DefaultTime2Retain; /* 42-43 */
+- uint16_t MaxOutStndngR2T; /* 44-45 */
+- uint16_t KeepAliveTimeout; /* 46-47 */
+- uint16_t PortNumber; /* 48-49 */
+- uint16_t MaxBurstSize; /* 4A-4B */
+- uint32_t RES4; /* 4C-4F */
+- uint8_t IPAddr[4]; /* 50-53 */
+- uint8_t RES5[12]; /* 54-5F */
+- uint8_t SubnetMask[4]; /* 60-63 */
+- uint8_t RES6[12]; /* 64-6F */
+- uint8_t GatewayIPAddr[4]; /* 70-73 */
+- uint8_t RES7[12]; /* 74-7F */
+- uint8_t PriDNSIPAddr[4]; /* 80-83 */
+- uint8_t SecDNSIPAddr[4]; /* 84-87 */
+- uint8_t RES8[8]; /* 88-8F */
+- uint8_t Alias[32]; /* 90-AF */
+- uint8_t TargAddr[8]; /* B0-B7 *//* /FIXME: Remove?? */
+- uint8_t CHAPNameSecretsTable[8]; /* B8-BF */
+- uint8_t EthernetMACAddr[6]; /* C0-C5 */
+- uint16_t TargetPortalGroup; /* C6-C7 */
+- uint8_t SendScale; /* C8 */
+- uint8_t RecvScale; /* C9 */
+- uint8_t TypeOfService; /* CA */
+- uint8_t Time2Live; /* CB */
+- uint16_t VLANPriority; /* CC-CD */
+- uint16_t Reserved8; /* CE-CF */
+- uint8_t SecIPAddr[4]; /* D0-D3 */
+- uint8_t Reserved9[12]; /* D4-DF */
+- uint8_t iSNSIPAddr[4]; /* E0-E3 */
+- uint16_t iSNSServerPortNumber; /* E4-E5 */
+- uint8_t Reserved10[10]; /* E6-EF */
+- uint8_t SLPDAIPAddr[4]; /* F0-F3 */
+- uint8_t Reserved11[12]; /* F4-FF */
+- uint8_t iSCSINameString[256]; /* 100-1FF */
++ uint16_t exec_throttle; /* 04-05 */
++ uint8_t zio_count; /* 06 */
++ uint8_t res0; /* 07 */
++ uint16_t eth_mtu_size; /* 08-09 */
++ uint16_t add_fw_options; /* 0A-0B */
++
++ uint8_t hb_interval; /* 0C */
++ uint8_t inst_num; /* 0D */
++ uint16_t res1; /* 0E-0F */
++ uint16_t rqq_consumer_idx; /* 10-11 */
++ uint16_t compq_producer_idx; /* 12-13 */
++ uint16_t rqq_len; /* 14-15 */
++ uint16_t compq_len; /* 16-17 */
++ uint32_t rqq_addr_lo; /* 18-1B */
++ uint32_t rqq_addr_hi; /* 1C-1F */
++ uint32_t compq_addr_lo; /* 20-23 */
++ uint32_t compq_addr_hi; /* 24-27 */
++ uint32_t shdwreg_addr_lo; /* 28-2B */
++ uint32_t shdwreg_addr_hi; /* 2C-2F */
++
++ uint16_t iscsi_opts; /* 30-31 */
++ uint16_t ipv4_tcp_opts; /* 32-33 */
++ uint16_t ipv4_ip_opts; /* 34-35 */
++
++ uint16_t iscsi_max_pdu_size; /* 36-37 */
++ uint8_t ipv4_tos; /* 38 */
++ uint8_t ipv4_ttl; /* 39 */
++ uint8_t acb_version; /* 3A */
++ uint8_t res2; /* 3B */
++ uint16_t def_timeout; /* 3C-3D */
++ uint16_t iscsi_fburst_len; /* 3E-3F */
++ uint16_t iscsi_def_time2wait; /* 40-41 */
++ uint16_t iscsi_def_time2retain; /* 42-43 */
++ uint16_t iscsi_max_outstnd_r2t; /* 44-45 */
++ uint16_t conn_ka_timeout; /* 46-47 */
++ uint16_t ipv4_port; /* 48-49 */
++ uint16_t iscsi_max_burst_len; /* 4A-4B */
++ uint32_t res5; /* 4C-4F */
++ uint8_t ipv4_addr[4]; /* 50-53 */
++ uint16_t ipv4_vlan_tag; /* 54-55 */
++ uint8_t ipv4_addr_state; /* 56 */
++ uint8_t ipv4_cacheid; /* 57 */
++ uint8_t res6[8]; /* 58-5F */
++ uint8_t ipv4_subnet[4]; /* 60-63 */
++ uint8_t res7[12]; /* 64-6F */
++ uint8_t ipv4_gw_addr[4]; /* 70-73 */
++ uint8_t res8[0xc]; /* 74-7F */
++ uint8_t pri_dns_srvr_ip[4];/* 80-83 */
++ uint8_t sec_dns_srvr_ip[4];/* 84-87 */
++ uint16_t min_eph_port; /* 88-89 */
++ uint16_t max_eph_port; /* 8A-8B */
++ uint8_t res9[4]; /* 8C-8F */
++ uint8_t iscsi_alias[32];/* 90-AF */
++ uint8_t res9_1[0x16]; /* B0-C5 */
++ uint16_t tgt_portal_grp;/* C6-C7 */
++ uint8_t abort_timer; /* C8 */
++ uint8_t ipv4_tcp_wsf; /* C9 */
++ uint8_t res10[6]; /* CA-CF */
++ uint8_t ipv4_sec_ip_addr[4]; /* D0-D3 */
++ uint8_t ipv4_dhcp_vid_len; /* D4 */
++ uint8_t ipv4_dhcp_vid[11]; /* D5-DF */
++ uint8_t res11[20]; /* E0-F3 */
++ uint8_t ipv4_dhcp_alt_cid_len; /* F4 */
++ uint8_t ipv4_dhcp_alt_cid[11]; /* F5-FF */
++ uint8_t iscsi_name[224]; /* 100-1DF */
++ uint8_t res12[32]; /* 1E0-1FF */
++ uint32_t cookie; /* 200-203 */
++ uint16_t ipv6_port; /* 204-205 */
++ uint16_t ipv6_opts; /* 206-207 */
++ uint16_t ipv6_addtl_opts; /* 208-209 */
++ uint16_t ipv6_tcp_opts; /* 20A-20B */
++ uint8_t ipv6_tcp_wsf; /* 20C */
++ uint16_t ipv6_flow_lbl; /* 20D-20F */
++ uint8_t ipv6_gw_addr[16]; /* 210-21F */
++ uint16_t ipv6_vlan_tag; /* 220-221 */
++ uint8_t ipv6_lnk_lcl_addr_state;/* 222 */
++ uint8_t ipv6_addr0_state; /* 223 */
++ uint8_t ipv6_addr1_state; /* 224 */
++ uint8_t ipv6_gw_state; /* 225 */
++ uint8_t ipv6_traffic_class; /* 226 */
++ uint8_t ipv6_hop_limit; /* 227 */
++ uint8_t ipv6_if_id[8]; /* 228-22F */
++ uint8_t ipv6_addr0[16]; /* 230-23F */
++ uint8_t ipv6_addr1[16]; /* 240-24F */
++ uint32_t ipv6_nd_reach_time; /* 250-253 */
++ uint32_t ipv6_nd_rexmit_timer; /* 254-257 */
++ uint32_t ipv6_nd_stale_timeout; /* 258-25B */
++ uint8_t ipv6_dup_addr_detect_count; /* 25C */
++ uint8_t ipv6_cache_id; /* 25D */
++ uint8_t res13[18]; /* 25E-26F */
++ uint32_t ipv6_gw_advrt_mtu; /* 270-273 */
++ uint8_t res14[140]; /* 274-2FF */
++};
++
++struct init_fw_ctrl_blk {
++ struct addr_ctrl_blk pri;
++ struct addr_ctrl_blk sec;
+ };
+
+ /*************************************************************************/
+
+ struct dev_db_entry {
+- uint8_t options; /* 00 */
++ uint16_t options; /* 00-01 */
+ #define DDB_OPT_DISC_SESSION 0x10
+ #define DDB_OPT_TARGET 0x02 /* device is a target */
+
+- uint8_t control; /* 01 */
+-
+- uint16_t exeThrottle; /* 02-03 */
+- uint16_t exeCount; /* 04-05 */
+- uint8_t retryCount; /* 06 */
+- uint8_t retryDelay; /* 07 */
+- uint16_t iSCSIOptions; /* 08-09 */
+-
+- uint16_t TCPOptions; /* 0A-0B */
+-
+- uint16_t IPOptions; /* 0C-0D */
+-
+- uint16_t maxPDUSize; /* 0E-0F */
+- uint16_t rcvMarkerInt; /* 10-11 */
+- uint16_t sndMarkerInt; /* 12-13 */
+- uint16_t iSCSIMaxSndDataSegLen; /* 14-15 */
+- uint16_t firstBurstSize; /* 16-17 */
+- uint16_t minTime2Wait; /* 18-19 : RA :default_time2wait */
+- uint16_t maxTime2Retain; /* 1A-1B */
+- uint16_t maxOutstndngR2T; /* 1C-1D */
+- uint16_t keepAliveTimeout; /* 1E-1F */
+- uint8_t ISID[6]; /* 20-25 big-endian, must be converted
++ uint16_t exec_throttle; /* 02-03 */
++ uint16_t exec_count; /* 04-05 */
++ uint16_t res0; /* 06-07 */
++ uint16_t iscsi_options; /* 08-09 */
++ uint16_t tcp_options; /* 0A-0B */
++ uint16_t ip_options; /* 0C-0D */
++ uint16_t iscsi_max_rcv_data_seg_len; /* 0E-0F */
++ uint32_t res1; /* 10-13 */
++ uint16_t iscsi_max_snd_data_seg_len; /* 14-15 */
++ uint16_t iscsi_first_burst_len; /* 16-17 */
++ uint16_t iscsi_def_time2wait; /* 18-19 */
++ uint16_t iscsi_def_time2retain; /* 1A-1B */
++ uint16_t iscsi_max_outsnd_r2t; /* 1C-1D */
++ uint16_t ka_timeout; /* 1E-1F */
++ uint8_t isid[6]; /* 20-25 big-endian, must be converted
+ * to little-endian */
+- uint16_t TSID; /* 26-27 */
+- uint16_t portNumber; /* 28-29 */
+- uint16_t maxBurstSize; /* 2A-2B */
+- uint16_t taskMngmntTimeout; /* 2C-2D */
+- uint16_t reserved1; /* 2E-2F */
+- uint8_t ipAddr[0x10]; /* 30-3F */
+- uint8_t iSCSIAlias[0x20]; /* 40-5F */
+- uint8_t targetAddr[0x20]; /* 60-7F */
+- uint8_t userID[0x20]; /* 80-9F */
+- uint8_t password[0x20]; /* A0-BF */
+- uint8_t iscsiName[0x100]; /* C0-1BF : xxzzy Make this a
++ uint16_t tsid; /* 26-27 */
++ uint16_t port; /* 28-29 */
++ uint16_t iscsi_max_burst_len; /* 2A-2B */
++ uint16_t def_timeout; /* 2C-2D */
++ uint16_t res2; /* 2E-2F */
++ uint8_t ip_addr[0x10]; /* 30-3F */
++ uint8_t iscsi_alias[0x20]; /* 40-5F */
++ uint8_t tgt_addr[0x20]; /* 60-7F */
++ uint16_t mss; /* 80-81 */
++ uint16_t res3; /* 82-83 */
++ uint16_t lcl_port; /* 84-85 */
++ uint8_t ipv4_tos; /* 86 */
++ uint16_t ipv6_flow_lbl; /* 87-89 */
++ uint8_t res4[0x36]; /* 8A-BF */
++ uint8_t iscsi_name[0xE0]; /* C0-19F : xxzzy Make this a
+ * pointer to a string so we
+ * don't have to reserve soooo
+ * much RAM */
+- uint16_t ddbLink; /* 1C0-1C1 */
+- uint16_t CHAPTableIndex; /* 1C2-1C3 */
+- uint16_t TargetPortalGroup; /* 1C4-1C5 */
+- uint16_t reserved2[2]; /* 1C6-1C7 */
+- uint32_t statSN; /* 1C8-1CB */
+- uint32_t expStatSN; /* 1CC-1CF */
+- uint16_t reserved3[0x2C]; /* 1D0-1FB */
+- uint16_t ddbValidCookie; /* 1FC-1FD */
+- uint16_t ddbValidSize; /* 1FE-1FF */
++ uint8_t ipv6_addr[0x10];/* 1A0-1AF */
++ uint8_t res5[0x10]; /* 1B0-1BF */
++ uint16_t ddb_link; /* 1C0-1C1 */
++ uint16_t chap_tbl_idx; /* 1C2-1C3 */
++ uint16_t tgt_portal_grp; /* 1C4-1C5 */
++ uint8_t tcp_xmt_wsf; /* 1C6 */
++ uint8_t tcp_rcv_wsf; /* 1C7 */
++ uint32_t stat_sn; /* 1C8-1CB */
++ uint32_t exp_stat_sn; /* 1CC-1CF */
++ uint8_t res6[0x30]; /* 1D0-1FF */
+ };
+
+ /*************************************************************************/
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h 2007-12-21 15:36:12.000000000 -0500
+@@ -8,6 +8,9 @@
+ #ifndef __QLA4x_GBL_H
+ #define __QLA4x_GBL_H
+
++struct iscsi_cls_conn;
++
++void qla4xxx_hw_reset(struct scsi_qla_host *ha);
+ int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a);
+ int qla4xxx_send_tgts(struct scsi_qla_host *ha, char *ip, uint16_t port);
+ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb);
+@@ -58,11 +61,13 @@
+ void qla4xxx_interrupt_service_routine(struct scsi_qla_host * ha,
+ uint32_t intr_status);
+ int qla4xxx_init_rings(struct scsi_qla_host * ha);
+-struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, uint32_t index);
++struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha,
++ uint32_t index);
+ void qla4xxx_srb_compl(struct scsi_qla_host *ha, struct srb *srb);
+ int qla4xxx_reinitialize_ddb_list(struct scsi_qla_host * ha);
+ int qla4xxx_process_ddb_changed(struct scsi_qla_host * ha,
+ uint32_t fw_ddb_index, uint32_t state);
++void qla4xxx_dump_buffer(void *b, uint32_t size);
+
+ extern int ql4xextended_error_logging;
+ extern int ql4xdiscoverywait;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,6 +6,9 @@
+ */
+
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+ static struct ddb_entry * qla4xxx_alloc_ddb(struct scsi_qla_host *ha,
+ uint32_t fw_ddb_index);
+@@ -300,12 +303,12 @@
+ if (!qla4xxx_fw_ready(ha))
+ return status;
+
+- set_bit(AF_ONLINE, &ha->flags);
+ return qla4xxx_get_firmware_status(ha);
+ }
+
+ static struct ddb_entry* qla4xxx_get_ddb_entry(struct scsi_qla_host *ha,
+- uint32_t fw_ddb_index)
++ uint32_t fw_ddb_index,
++ uint32_t *new_tgt)
+ {
+ struct dev_db_entry *fw_ddb_entry = NULL;
+ dma_addr_t fw_ddb_entry_dma;
+@@ -313,6 +316,7 @@
+ int found = 0;
+ uint32_t device_state;
+
++ *new_tgt = 0;
+ /* Make sure the dma buffer is valid */
+ fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev,
+ sizeof(*fw_ddb_entry),
+@@ -337,7 +341,7 @@
+ DEBUG2(printk("scsi%ld: %s: Looking for ddb[%d]\n", ha->host_no,
+ __func__, fw_ddb_index));
+ list_for_each_entry(ddb_entry, &ha->ddb_list, list) {
+- if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsiName,
++ if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name,
+ ISCSI_NAME_SIZE) == 0) {
+ found++;
+ break;
+@@ -348,6 +352,7 @@
+ DEBUG2(printk("scsi%ld: %s: ddb[%d] not found - allocating "
+ "new ddb\n", ha->host_no, __func__,
+ fw_ddb_index));
++ *new_tgt = 1;
+ ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
+ }
+
+@@ -409,26 +414,26 @@
+ }
+
+ status = QLA_SUCCESS;
+- ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->TSID);
++ ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->tsid);
+ ddb_entry->task_mgmt_timeout =
+- le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
++ le16_to_cpu(fw_ddb_entry->def_timeout);
+ ddb_entry->CmdSn = 0;
+- ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exeThrottle);
++ ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exec_throttle);
+ ddb_entry->default_relogin_timeout =
+- le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
+- ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->minTime2Wait);
++ le16_to_cpu(fw_ddb_entry->def_timeout);
++ ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait);
+
+ /* Update index in case it changed */
+ ddb_entry->fw_ddb_index = fw_ddb_index;
+ ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
+
+- ddb_entry->port = le16_to_cpu(fw_ddb_entry->portNumber);
+- ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->TargetPortalGroup);
+- memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsiName[0],
++ ddb_entry->port = le16_to_cpu(fw_ddb_entry->port);
++ ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp);
++ memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0],
+ min(sizeof(ddb_entry->iscsi_name),
+- sizeof(fw_ddb_entry->iscsiName)));
+- memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ipAddr[0],
+- min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ipAddr)));
++ sizeof(fw_ddb_entry->iscsi_name)));
++ memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0],
++ min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr)));
+
+ DEBUG2(printk("scsi%ld: %s: ddb[%d] - State= %x status= %d.\n",
+ ha->host_no, __func__, fw_ddb_index,
+@@ -495,6 +500,7 @@
+ uint32_t ddb_state;
+ uint32_t conn_err, err_code;
+ struct ddb_entry *ddb_entry;
++ uint32_t new_tgt;
+
+ dev_info(&ha->pdev->dev, "Initializing DDBs ...\n");
+ for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES;
+@@ -526,8 +532,19 @@
+ "completed "
+ "or access denied failure\n",
+ ha->host_no, __func__));
+- } else
++ } else {
+ qla4xxx_set_ddb_entry(ha, fw_ddb_index, 0);
++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index,
++ NULL, 0, NULL, &next_fw_ddb_index,
++ &ddb_state, &conn_err, NULL, NULL)
++ == QLA_ERROR) {
++ DEBUG2(printk("scsi%ld: %s:"
++ "get_ddb_entry %d failed\n",
++ ha->host_no,
++ __func__, fw_ddb_index));
++ return QLA_ERROR;
++ }
++ }
+ }
+
+ if (ddb_state != DDB_DS_SESSION_ACTIVE)
+@@ -540,7 +557,7 @@
+ ha->host_no, __func__, fw_ddb_index));
+
+ /* Add DDB to internal our ddb list. */
+- ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt);
+ if (ddb_entry == NULL) {
+ DEBUG2(printk("scsi%ld: %s: Unable to allocate memory "
+ "for device at fw_ddb_index %d\n",
+@@ -865,21 +882,20 @@
+
+ static void qla4x00_pci_config(struct scsi_qla_host *ha)
+ {
+- uint16_t w, mwi;
++ uint16_t w;
++ int status;
+
+ dev_info(&ha->pdev->dev, "Configuring PCI space...\n");
+
+ pci_set_master(ha->pdev);
+- mwi = 0;
+- if (pci_set_mwi(ha->pdev))
+- mwi = PCI_COMMAND_INVALIDATE;
++ status = pci_set_mwi(ha->pdev);
+ /*
+ * We want to respect framework's setting of PCI configuration space
+ * command register and also want to make sure that all bits of
+ * interest to us are properly set in command register.
+ */
+ pci_read_config_word(ha->pdev, PCI_COMMAND, &w);
+- w |= mwi | (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
++ w |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR;
+ w &= ~PCI_COMMAND_INTX_DISABLE;
+ pci_write_config_word(ha->pdev, PCI_COMMAND, w);
+ }
+@@ -911,6 +927,9 @@
+ writel(set_rmask(NVR_WRITE_ENABLE),
+ &ha->reg->u1.isp4022.nvram);
+
++ writel(2, &ha->reg->mailbox[6]);
++ readl(&ha->reg->mailbox[6]);
++
+ writel(set_rmask(CSR_BOOT_ENABLE), &ha->reg->ctrl_status);
+ readl(&ha->reg->ctrl_status);
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+@@ -958,25 +977,25 @@
+ return status;
+ }
+
+-int ql4xxx_lock_drvr_wait(struct scsi_qla_host *ha)
++int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a)
+ {
+-#define QL4_LOCK_DRVR_WAIT 30
++#define QL4_LOCK_DRVR_WAIT 60
+ #define QL4_LOCK_DRVR_SLEEP 1
+
+ int drvr_wait = QL4_LOCK_DRVR_WAIT;
+ while (drvr_wait) {
+- if (ql4xxx_lock_drvr(ha) == 0) {
++ if (ql4xxx_lock_drvr(a) == 0) {
+ ssleep(QL4_LOCK_DRVR_SLEEP);
+ if (drvr_wait) {
+ DEBUG2(printk("scsi%ld: %s: Waiting for "
+- "Global Init Semaphore(%d)...n",
+- ha->host_no,
++ "Global Init Semaphore(%d)...\n",
++ a->host_no,
+ __func__, drvr_wait));
+ }
+ drvr_wait -= QL4_LOCK_DRVR_SLEEP;
+ } else {
+ DEBUG2(printk("scsi%ld: %s: Global Init Semaphore "
+- "acquired.n", ha->host_no, __func__));
++ "acquired\n", a->host_no, __func__));
+ return QLA_SUCCESS;
+ }
+ }
+@@ -1125,17 +1144,17 @@
+
+ /* Initialize the Host adapter request/response queues and firmware */
+ if (qla4xxx_start_firmware(ha) == QLA_ERROR)
+- return status;
++ goto exit_init_hba;
+
+ if (qla4xxx_validate_mac_address(ha) == QLA_ERROR)
+- return status;
++ goto exit_init_hba;
+
+ if (qla4xxx_init_local_data(ha) == QLA_ERROR)
+- return status;
++ goto exit_init_hba;
+
+ status = qla4xxx_init_firmware(ha);
+ if (status == QLA_ERROR)
+- return status;
++ goto exit_init_hba;
+
+ /*
+ * FW is waiting to get an IP address from DHCP server: Skip building
+@@ -1143,12 +1162,12 @@
+ * followed by 0x8014 aen" to trigger the tgt discovery process.
+ */
+ if (ha->firmware_state & FW_STATE_DHCP_IN_PROGRESS)
+- return status;
++ goto exit_init_online;
+
+ /* Skip device discovery if ip and subnet is zero */
+ if (memcmp(ha->ip_address, ip_address, IP_ADDR_LEN) == 0 ||
+ memcmp(ha->subnet_mask, ip_address, IP_ADDR_LEN) == 0)
+- return status;
++ goto exit_init_online;
+
+ if (renew_ddb_list == PRESERVE_DDB_LIST) {
+ /*
+@@ -1177,9 +1196,10 @@
+ ha->host_no));
+ }
+
+- exit_init_hba:
++exit_init_online:
++ set_bit(AF_ONLINE, &ha->flags);
++exit_init_hba:
+ return status;
+-
+ }
+
+ /**
+@@ -1193,9 +1213,10 @@
+ uint32_t fw_ddb_index)
+ {
+ struct ddb_entry * ddb_entry;
++ uint32_t new_tgt;
+
+ /* First allocate a device structure */
+- ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt);
+ if (ddb_entry == NULL) {
+ DEBUG2(printk(KERN_WARNING
+ "scsi%ld: Unable to allocate memory to add "
+@@ -1203,6 +1224,18 @@
+ return;
+ }
+
++ if (!new_tgt && (ddb_entry->fw_ddb_index != fw_ddb_index)) {
++ /* Target has been bound to a new fw_ddb_index */
++ qla4xxx_free_ddb(ha, ddb_entry);
++ ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
++ if (ddb_entry == NULL) {
++ DEBUG2(printk(KERN_WARNING
++ "scsi%ld: Unable to allocate memory"
++ " to add fw_ddb_index %d\n",
++ ha->host_no, fw_ddb_index));
++ return;
++ }
++ }
+ if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) ==
+ QLA_ERROR) {
+ ha->fw_ddb_index_map[fw_ddb_index] =
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,6 +6,10 @@
+ */
+
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
++
+
+ #include <scsi/scsi_tcq.h>
+
+@@ -141,11 +145,13 @@
+ uint16_t avail_dsds;
+ struct data_seg_a64 *cur_dsd;
+ struct scsi_cmnd *cmd;
++ struct scatterlist *sg;
++ int i;
+
+ cmd = srb->cmd;
+ ha = srb->ha;
+
+- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ /* No data being transferred */
+ cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
+ return;
+@@ -154,14 +160,7 @@
+ avail_dsds = COMMAND_SEG;
+ cur_dsd = (struct data_seg_a64 *) & (cmd_entry->dataseg[0]);
+
+- /* Load data segments */
+- if (cmd->use_sg) {
+- struct scatterlist *cur_seg;
+- struct scatterlist *end_seg;
+-
+- cur_seg = (struct scatterlist *)cmd->request_buffer;
+- end_seg = cur_seg + tot_dsds;
+- while (cur_seg < end_seg) {
++ scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ dma_addr_t sle_dma;
+
+ /* Allocate additional continuation packets? */
+@@ -175,19 +174,13 @@
+ avail_dsds = CONTINUE_SEG;
+ }
+
+- sle_dma = sg_dma_address(cur_seg);
++ sle_dma = sg_dma_address(sg);
+ cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma));
+ cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma));
+- cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg));
++ cur_dsd->count = cpu_to_le32(sg_dma_len(sg));
+ avail_dsds--;
+
+ cur_dsd++;
+- cur_seg++;
+- }
+- } else {
+- cur_dsd->base.addrLow = cpu_to_le32(LSDW(srb->dma_handle));
+- cur_dsd->base.addrHigh = cpu_to_le32(MSDW(srb->dma_handle));
+- cur_dsd->count = cpu_to_le32(cmd->request_bufflen);
+ }
+ }
+
+@@ -204,8 +197,8 @@
+ struct scsi_cmnd *cmd = srb->cmd;
+ struct ddb_entry *ddb_entry;
+ struct command_t3_entry *cmd_entry;
+- struct scatterlist *sg = NULL;
+
++ int nseg;
+ uint16_t tot_dsds;
+ uint16_t req_cnt;
+
+@@ -233,24 +226,11 @@
+ index = (uint32_t)cmd->request->tag;
+
+ /* Calculate the number of request entries needed. */
+- if (cmd->use_sg) {
+- sg = (struct scatterlist *)cmd->request_buffer;
+- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- if (tot_dsds == 0)
++ nseg = scsi_dma_map(cmd);
++ if (nseg < 0)
+ goto queuing_error;
+- } else if (cmd->request_bufflen) {
+- dma_addr_t req_dma;
++ tot_dsds = nseg;
+
+- req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- if (dma_mapping_error(req_dma))
+- goto queuing_error;
+-
+- srb->dma_handle = req_dma;
+- tot_dsds = 1;
+- }
+ req_cnt = qla4xxx_calc_request_entries(tot_dsds);
+
+ if (ha->req_q_count < (req_cnt + 2)) {
+@@ -279,7 +259,7 @@
+
+ int_to_scsilun(cmd->device->lun, &cmd_entry->lun);
+ cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
+- cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen);
++ cmd_entry->ttlByteCnt = cpu_to_le32(scsi_bufflen(cmd));
+ memcpy(cmd_entry->cdb, cmd->cmnd, cmd->cmd_len);
+ cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds);
+ cmd_entry->hdr.entryCount = req_cnt;
+@@ -289,13 +269,13 @@
+ * transferred, as the data direction bit is sometimed filled
+ * in when there is no data to be transferred */
+ cmd_entry->control_flags = CF_NO_DATA;
+- if (cmd->request_bufflen) {
++ if (scsi_bufflen(cmd)) {
+ if (cmd->sc_data_direction == DMA_TO_DEVICE)
+ cmd_entry->control_flags = CF_WRITE;
+ else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+ cmd_entry->control_flags = CF_READ;
+
+- ha->bytes_xfered += cmd->request_bufflen;
++ ha->bytes_xfered += scsi_bufflen(cmd);
+ if (ha->bytes_xfered & ~0xFFFFF){
+ ha->total_mbytes_xferred += ha->bytes_xfered >> 20;
+ ha->bytes_xfered &= 0xFFFFF;
+@@ -359,14 +339,9 @@
+ return QLA_SUCCESS;
+
+ queuing_error:
++ if (tot_dsds)
++ scsi_dma_unmap(cmd);
+
+- if (cmd->use_sg && tot_dsds) {
+- sg = (struct scatterlist *) cmd->request_buffer;
+- pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+- cmd->sc_data_direction);
+- } else if (tot_dsds)
+- pci_unmap_single(ha->pdev, srb->dma_handle,
+- cmd->request_bufflen, cmd->sc_data_direction);
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+ return QLA_ERROR;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,6 +6,9 @@
+ */
+
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+ /**
+ * qla2x00_process_completed_request() - Process a Fast Post response.
+@@ -92,7 +95,7 @@
+
+ if (sts_entry->iscsiFlags &
+ (ISCSI_FLAG_RESIDUAL_OVER|ISCSI_FLAG_RESIDUAL_UNDER))
+- cmd->resid = residual;
++ scsi_set_resid(cmd, residual);
+
+ cmd->result = DID_OK << 16 | scsi_status;
+
+@@ -176,14 +179,14 @@
+ * Firmware detected a SCSI transport underrun
+ * condition
+ */
+- cmd->resid = residual;
++ scsi_set_resid(cmd, residual);
+ DEBUG2(printk("scsi%ld:%d:%d:%d: %s: UNDERRUN status "
+ "detected, xferlen = 0x%x, residual = "
+ "0x%x\n",
+ ha->host_no, cmd->device->channel,
+ cmd->device->id,
+ cmd->device->lun, __func__,
+- cmd->request_bufflen,
++ scsi_bufflen(cmd),
+ residual));
+ }
+
+@@ -227,7 +230,7 @@
+ if ((sts_entry->iscsiFlags &
+ ISCSI_FLAG_RESIDUAL_UNDER) == 0) {
+ cmd->result = DID_BUS_BUSY << 16;
+- } else if ((cmd->request_bufflen - residual) <
++ } else if ((scsi_bufflen(cmd) - residual) <
+ cmd->underflow) {
+ /*
+ * Handle mid-layer underflow???
+@@ -248,7 +251,7 @@
+ cmd->device->channel,
+ cmd->device->id,
+ cmd->device->lun, __func__,
+- cmd->request_bufflen, residual));
++ scsi_bufflen(cmd), residual));
+
+ cmd->result = DID_ERROR << 16;
+ } else {
+@@ -417,6 +420,7 @@
+ uint32_t mbox_status)
+ {
+ int i;
++ uint32_t mbox_stat2, mbox_stat3;
+
+ if ((mbox_status == MBOX_STS_BUSY) ||
+ (mbox_status == MBOX_STS_INTERMEDIATE_COMPLETION) ||
+@@ -437,6 +441,12 @@
+ } else if (mbox_status >> 12 == MBOX_ASYNC_EVENT_STATUS) {
+ /* Immediately process the AENs that don't require much work.
+ * Only queue the database_changed AENs */
++ if (ha->aen_log.count < MAX_AEN_ENTRIES) {
++ for (i = 0; i < MBOX_AEN_REG_COUNT; i++)
++ ha->aen_log.entry[ha->aen_log.count].mbox_sts[i] =
++ readl(&ha->reg->mailbox[i]);
++ ha->aen_log.count++;
++ }
+ switch (mbox_status) {
+ case MBOX_ASTS_SYSTEM_ERROR:
+ /* Log Mailbox registers */
+@@ -493,6 +503,16 @@
+ mbox_status));
+ break;
+
++ case MBOX_ASTS_IP_ADDR_STATE_CHANGED:
++ mbox_stat2 = readl(&ha->reg->mailbox[2]);
++ mbox_stat3 = readl(&ha->reg->mailbox[3]);
++
++ if ((mbox_stat3 == 5) && (mbox_stat2 == 3))
++ set_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags);
++ else if ((mbox_stat3 == 2) && (mbox_stat2 == 5))
++ set_bit(DPC_RESET_HA, &ha->dpc_flags);
++ break;
++
+ case MBOX_ASTS_MAC_ADDRESS_CHANGED:
+ case MBOX_ASTS_DNS:
+ /* No action */
+@@ -518,11 +538,6 @@
+ /* Queue AEN information and process it in the DPC
+ * routine */
+ if (ha->aen_q_count > 0) {
+- /* advance pointer */
+- if (ha->aen_in == (MAX_AEN_ENTRIES - 1))
+- ha->aen_in = 0;
+- else
+- ha->aen_in++;
+
+ /* decrement available counter */
+ ha->aen_q_count--;
+@@ -542,6 +557,10 @@
+ ha->aen_q[ha->aen_in].mbox_sts[2],
+ ha->aen_q[ha->aen_in].mbox_sts[3],
+ ha->aen_q[ha->aen_in]. mbox_sts[4]));
++ /* advance pointer */
++ ha->aen_in++;
++ if (ha->aen_in == MAX_AEN_ENTRIES)
++ ha->aen_in = 0;
+
+ /* The DPC routine will process the aen */
+ set_bit(DPC_AEN, &ha->dpc_flags);
+@@ -724,25 +743,24 @@
+
+ spin_lock_irqsave(&ha->hardware_lock, flags);
+ while (ha->aen_out != ha->aen_in) {
+- /* Advance pointers for next entry */
+- if (ha->aen_out == (MAX_AEN_ENTRIES - 1))
+- ha->aen_out = 0;
+- else
+- ha->aen_out++;
+-
+- ha->aen_q_count++;
+ aen = &ha->aen_q[ha->aen_out];
+-
+ /* copy aen information to local structure */
+ for (i = 0; i < MBOX_AEN_REG_COUNT; i++)
+ mbox_sts[i] = aen->mbox_sts[i];
+
++ ha->aen_q_count++;
++ ha->aen_out++;
++
++ if (ha->aen_out == MAX_AEN_ENTRIES)
++ ha->aen_out = 0;
++
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+- DEBUG(printk("scsi%ld: AEN[%d] %04x, index [%d] state=%04x "
+- "mod=%x conerr=%08x \n", ha->host_no, ha->aen_out,
+- mbox_sts[0], mbox_sts[2], mbox_sts[3],
+- mbox_sts[1], mbox_sts[4]));
++ DEBUG2(printk("qla4xxx(%ld): AEN[%d]=0x%08x, mbx1=0x%08x mbx2=0x%08x"
++ " mbx3=0x%08x mbx4=0x%08x\n", ha->host_no,
++ (ha->aen_out ? (ha->aen_out-1): (MAX_AEN_ENTRIES-1)),
++ mbox_sts[0], mbox_sts[1], mbox_sts[2],
++ mbox_sts[3], mbox_sts[4]));
+
+ switch (mbox_sts[0]) {
+ case MBOX_ASTS_DATABASE_CHANGED:
+@@ -792,6 +810,5 @@
+ spin_lock_irqsave(&ha->hardware_lock, flags);
+ }
+ spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-
+ }
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,6 +6,9 @@
+ */
+
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+
+ /**
+@@ -169,84 +172,6 @@
+ return status;
+ }
+
+-
+-#if 0
+-
+-/**
+- * qla4xxx_issue_iocb - issue mailbox iocb command
+- * @ha: adapter state pointer.
+- * @buffer: buffer pointer.
+- * @phys_addr: physical address of buffer.
+- * @size: size of buffer.
+- *
+- * Issues iocbs via mailbox commands.
+- * TARGET_QUEUE_LOCK must be released.
+- * ADAPTER_STATE_LOCK must be released.
+- **/
+-int
+-qla4xxx_issue_iocb(struct scsi_qla_host * ha, void *buffer,
+- dma_addr_t phys_addr, size_t size)
+-{
+- uint32_t mbox_cmd[MBOX_REG_COUNT];
+- uint32_t mbox_sts[MBOX_REG_COUNT];
+- int status;
+-
+- memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+- memset(&mbox_sts, 0, sizeof(mbox_sts));
+- mbox_cmd[0] = MBOX_CMD_EXECUTE_IOCB_A64;
+- mbox_cmd[1] = 0;
+- mbox_cmd[2] = LSDW(phys_addr);
+- mbox_cmd[3] = MSDW(phys_addr);
+- status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]);
+- return status;
+-}
+-
+-int qla4xxx_conn_close_sess_logout(struct scsi_qla_host * ha,
+- uint16_t fw_ddb_index,
+- uint16_t connection_id,
+- uint16_t option)
+-{
+- uint32_t mbox_cmd[MBOX_REG_COUNT];
+- uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+- memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+- memset(&mbox_sts, 0, sizeof(mbox_sts));
+- mbox_cmd[0] = MBOX_CMD_CONN_CLOSE_SESS_LOGOUT;
+- mbox_cmd[1] = fw_ddb_index;
+- mbox_cmd[2] = connection_id;
+- mbox_cmd[3] = LOGOUT_OPTION_RELOGIN;
+- if (qla4xxx_mailbox_command(ha, 4, 2, &mbox_cmd[0], &mbox_sts[0]) !=
+- QLA_SUCCESS) {
+- DEBUG2(printk("scsi%ld: %s: MBOX_CMD_CONN_CLOSE_SESS_LOGOUT "
+- "option %04x failed sts %04X %04X",
+- ha->host_no, __func__,
+- option, mbox_sts[0], mbox_sts[1]));
+- if (mbox_sts[0] == 0x4005)
+- DEBUG2(printk("%s reason %04X\n", __func__,
+- mbox_sts[1]));
+- }
+- return QLA_SUCCESS;
+-}
+-
+-int qla4xxx_clear_database_entry(struct scsi_qla_host * ha,
+- uint16_t fw_ddb_index)
+-{
+- uint32_t mbox_cmd[MBOX_REG_COUNT];
+- uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+- memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+- memset(&mbox_sts, 0, sizeof(mbox_sts));
+- mbox_cmd[0] = MBOX_CMD_CLEAR_DATABASE_ENTRY;
+- mbox_cmd[1] = fw_ddb_index;
+- if (qla4xxx_mailbox_command(ha, 2, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+- QLA_SUCCESS)
+- return QLA_ERROR;
+-
+- return QLA_SUCCESS;
+-}
+-
+-#endif /* 0 */
+-
+ /**
+ * qla4xxx_initialize_fw_cb - initializes firmware control block.
+ * @ha: Pointer to host adapter structure.
+@@ -272,10 +197,13 @@
+ /* Get Initialize Firmware Control Block. */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
+ mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ mbox_cmd[3] = MSDW(init_fw_cb_dma);
+- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ dma_free_coherent(&ha->pdev->dev,
+ sizeof(struct init_fw_ctrl_blk),
+@@ -287,51 +215,56 @@
+ qla4xxx_init_rings(ha);
+
+ /* Fill in the request and response queue information. */
+- init_fw_cb->ReqQConsumerIndex = cpu_to_le16(ha->request_out);
+- init_fw_cb->ComplQProducerIndex = cpu_to_le16(ha->response_in);
+- init_fw_cb->ReqQLen = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
+- init_fw_cb->ComplQLen = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
+- init_fw_cb->ReqQAddrLo = cpu_to_le32(LSDW(ha->request_dma));
+- init_fw_cb->ReqQAddrHi = cpu_to_le32(MSDW(ha->request_dma));
+- init_fw_cb->ComplQAddrLo = cpu_to_le32(LSDW(ha->response_dma));
+- init_fw_cb->ComplQAddrHi = cpu_to_le32(MSDW(ha->response_dma));
+- init_fw_cb->ShadowRegBufAddrLo =
++ init_fw_cb->pri.rqq_consumer_idx = cpu_to_le16(ha->request_out);
++ init_fw_cb->pri.compq_producer_idx = cpu_to_le16(ha->response_in);
++ init_fw_cb->pri.rqq_len = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
++ init_fw_cb->pri.compq_len = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
++ init_fw_cb->pri.rqq_addr_lo = cpu_to_le32(LSDW(ha->request_dma));
++ init_fw_cb->pri.rqq_addr_hi = cpu_to_le32(MSDW(ha->request_dma));
++ init_fw_cb->pri.compq_addr_lo = cpu_to_le32(LSDW(ha->response_dma));
++ init_fw_cb->pri.compq_addr_hi = cpu_to_le32(MSDW(ha->response_dma));
++ init_fw_cb->pri.shdwreg_addr_lo =
+ cpu_to_le32(LSDW(ha->shadow_regs_dma));
+- init_fw_cb->ShadowRegBufAddrHi =
++ init_fw_cb->pri.shdwreg_addr_hi =
+ cpu_to_le32(MSDW(ha->shadow_regs_dma));
+
+ /* Set up required options. */
+- init_fw_cb->FwOptions |=
++ init_fw_cb->pri.fw_options |=
+ __constant_cpu_to_le16(FWOPT_SESSION_MODE |
+ FWOPT_INITIATOR_MODE);
+- init_fw_cb->FwOptions &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
++ init_fw_cb->pri.fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
+
+ /* Save some info in adapter structure. */
+- ha->firmware_options = le16_to_cpu(init_fw_cb->FwOptions);
+- ha->tcp_options = le16_to_cpu(init_fw_cb->TCPOptions);
+- ha->heartbeat_interval = init_fw_cb->HeartbeatInterval;
+- memcpy(ha->ip_address, init_fw_cb->IPAddr,
+- min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr)));
+- memcpy(ha->subnet_mask, init_fw_cb->SubnetMask,
+- min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask)));
+- memcpy(ha->gateway, init_fw_cb->GatewayIPAddr,
+- min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr)));
+- memcpy(ha->name_string, init_fw_cb->iSCSINameString,
++ ha->firmware_options = le16_to_cpu(init_fw_cb->pri.fw_options);
++ ha->tcp_options = le16_to_cpu(init_fw_cb->pri.ipv4_tcp_opts);
++ ha->heartbeat_interval = init_fw_cb->pri.hb_interval;
++ memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr,
++ min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr)));
++ memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet,
++ min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet)));
++ memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr,
++ min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr)));
++ memcpy(ha->name_string, init_fw_cb->pri.iscsi_name,
+ min(sizeof(ha->name_string),
+- sizeof(init_fw_cb->iSCSINameString)));
+- memcpy(ha->alias, init_fw_cb->Alias,
+- min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));
++ sizeof(init_fw_cb->pri.iscsi_name)));
++ /*memcpy(ha->alias, init_fw_cb->Alias,
++ min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/
+
+ /* Save Command Line Paramater info */
+- ha->port_down_retry_count = le16_to_cpu(init_fw_cb->KeepAliveTimeout);
++ ha->port_down_retry_count = le16_to_cpu(init_fw_cb->pri.conn_ka_timeout);
+ ha->discovery_wait = ql4xdiscoverywait;
+
+ /* Send Initialize Firmware Control Block. */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE;
+ mbox_cmd[1] = 0;
+ mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ mbox_cmd[3] = MSDW(init_fw_cb_dma);
+- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) ==
+ QLA_SUCCESS)
+ status = QLA_SUCCESS;
+ else {
+@@ -368,12 +301,14 @@
+ /* Get Initialize Firmware Control Block. */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ memset(init_fw_cb, 0, sizeof(struct init_fw_ctrl_blk));
+ mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
+ mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ mbox_cmd[3] = MSDW(init_fw_cb_dma);
++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
+
+- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: Failed to get init_fw_ctrl_blk\n",
+ ha->host_no, __func__));
+@@ -384,12 +319,12 @@
+ }
+
+ /* Save IP Address. */
+- memcpy(ha->ip_address, init_fw_cb->IPAddr,
+- min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr)));
+- memcpy(ha->subnet_mask, init_fw_cb->SubnetMask,
+- min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask)));
+- memcpy(ha->gateway, init_fw_cb->GatewayIPAddr,
+- min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr)));
++ memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr,
++ min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr)));
++ memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet,
++ min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet)));
++ memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr,
++ min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr)));
+
+ dma_free_coherent(&ha->pdev->dev, sizeof(struct init_fw_ctrl_blk),
+ init_fw_cb, init_fw_cb_dma);
+@@ -409,8 +344,10 @@
+ /* Get firmware version */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_GET_FW_STATE;
+- if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 4, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATE failed w/ "
+ "status %04X\n", ha->host_no, __func__,
+@@ -438,8 +375,10 @@
+ /* Get firmware version */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_GET_FW_STATUS;
+- if (qla4xxx_mailbox_command(ha, 1, 3, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATUS failed w/ "
+ "status %04X\n", ha->host_no, __func__,
+@@ -491,11 +430,14 @@
+ }
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY;
+ mbox_cmd[1] = (uint32_t) fw_ddb_index;
+ mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
+ mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
+- if (qla4xxx_mailbox_command(ha, 4, 7, &mbox_cmd[0], &mbox_sts[0]) ==
++ mbox_cmd[4] = sizeof(struct dev_db_entry);
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 7, &mbox_cmd[0], &mbox_sts[0]) ==
+ QLA_ERROR) {
+ DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_DATABASE_ENTRY failed"
+ " with status 0x%04X\n", ha->host_no, __func__,
+@@ -512,11 +454,11 @@
+ dev_info(&ha->pdev->dev, "DDB[%d] MB0 %04x Tot %d Next %d "
+ "State %04x ConnErr %08x %d.%d.%d.%d:%04d \"%s\"\n",
+ fw_ddb_index, mbox_sts[0], mbox_sts[2], mbox_sts[3],
+- mbox_sts[4], mbox_sts[5], fw_ddb_entry->ipAddr[0],
+- fw_ddb_entry->ipAddr[1], fw_ddb_entry->ipAddr[2],
+- fw_ddb_entry->ipAddr[3],
+- le16_to_cpu(fw_ddb_entry->portNumber),
+- fw_ddb_entry->iscsiName);
++ mbox_sts[4], mbox_sts[5], fw_ddb_entry->ip_addr[0],
++ fw_ddb_entry->ip_addr[1], fw_ddb_entry->ip_addr[2],
++ fw_ddb_entry->ip_addr[3],
++ le16_to_cpu(fw_ddb_entry->port),
++ fw_ddb_entry->iscsi_name);
+ }
+ if (num_valid_ddb_entries)
+ *num_valid_ddb_entries = mbox_sts[2];
+@@ -571,35 +513,10 @@
+ mbox_cmd[1] = (uint32_t) fw_ddb_index;
+ mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
+ mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
+- return qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]);
+-}
+-
+-#if 0
+-int qla4xxx_conn_open_session_login(struct scsi_qla_host * ha,
+- uint16_t fw_ddb_index)
+-{
+- int status = QLA_ERROR;
+- uint32_t mbox_cmd[MBOX_REG_COUNT];
+- uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+- /* Do not wait for completion. The firmware will send us an
+- * ASTS_DATABASE_CHANGED (0x8014) to notify us of the login status.
+- */
+- memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+- memset(&mbox_sts, 0, sizeof(mbox_sts));
+- mbox_cmd[0] = MBOX_CMD_CONN_OPEN_SESS_LOGIN;
+- mbox_cmd[1] = (uint32_t) fw_ddb_index;
+- mbox_cmd[2] = 0;
+- mbox_cmd[3] = 0;
+- mbox_cmd[4] = 0;
+- status = qla4xxx_mailbox_command(ha, 4, 0, &mbox_cmd[0], &mbox_sts[0]);
+- DEBUG2(printk("%s fw_ddb_index=%d status=%d mbx0_1=0x%x :0x%x\n",
+- __func__, fw_ddb_index, status, mbox_sts[0],
+- mbox_sts[1]);)
++ mbox_cmd[4] = sizeof(struct dev_db_entry);
+
+- return status;
++ return qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]);
+ }
+-#endif /* 0 */
+
+ /**
+ * qla4xxx_get_crash_record - retrieves crash record.
+@@ -614,12 +531,14 @@
+ struct crash_record *crash_record = NULL;
+ dma_addr_t crash_record_dma = 0;
+ uint32_t crash_record_size = 0;
++
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_cmd));
+
+ /* Get size of crash record. */
+ mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
+- if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve size!\n",
+ ha->host_no, __func__));
+@@ -639,11 +558,15 @@
+ goto exit_get_crash_record;
+
+ /* Get Crash Record. */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_cmd));
++
+ mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
+ mbox_cmd[2] = LSDW(crash_record_dma);
+ mbox_cmd[3] = MSDW(crash_record_dma);
+ mbox_cmd[4] = crash_record_size;
+- if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS)
+ goto exit_get_crash_record;
+
+@@ -655,7 +578,6 @@
+ crash_record, crash_record_dma);
+ }
+
+-#if 0
+ /**
+ * qla4xxx_get_conn_event_log - retrieves connection event log
+ * @ha: Pointer to host adapter structure.
+@@ -678,7 +600,8 @@
+
+ /* Get size of crash record. */
+ mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG;
+- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS)
+ goto exit_get_event_log;
+
+@@ -693,10 +616,14 @@
+ goto exit_get_event_log;
+
+ /* Get Crash Record. */
++ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++ memset(&mbox_sts, 0, sizeof(mbox_cmd));
++
+ mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG;
+ mbox_cmd[2] = LSDW(event_log_dma);
+ mbox_cmd[3] = MSDW(event_log_dma);
+- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve event "
+ "log!\n", ha->host_no, __func__));
+@@ -745,7 +672,6 @@
+ dma_free_coherent(&ha->pdev->dev, event_log_size, event_log,
+ event_log_dma);
+ }
+-#endif /* 0 */
+
+ /**
+ * qla4xxx_reset_lun - issues LUN Reset
+@@ -773,11 +699,13 @@
+ */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_LUN_RESET;
+ mbox_cmd[1] = ddb_entry->fw_ddb_index;
+ mbox_cmd[2] = lun << 8;
+ mbox_cmd[5] = 0x01; /* Immediate Command Enable */
+- qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]);
++
++ qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]);
+ if (mbox_sts[0] != MBOX_STS_COMMAND_COMPLETE &&
+ mbox_sts[0] != MBOX_STS_COMMAND_ERROR)
+ status = QLA_ERROR;
+@@ -794,12 +722,14 @@
+
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_READ_FLASH;
+ mbox_cmd[1] = LSDW(dma_addr);
+ mbox_cmd[2] = MSDW(dma_addr);
+ mbox_cmd[3] = offset;
+ mbox_cmd[4] = len;
+- if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 2, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: MBOX_CMD_READ_FLASH, failed w/ "
+ "status %04X %04X, offset %08x, len %08x\n", ha->host_no,
+@@ -825,8 +755,10 @@
+ /* Get firmware version. */
+ memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ mbox_cmd[0] = MBOX_CMD_ABOUT_FW;
+- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: MBOX_CMD_ABOUT_FW failed w/ "
+ "status %04X\n", ha->host_no, __func__, mbox_sts[0]));
+@@ -855,7 +787,7 @@
+ mbox_cmd[2] = LSDW(dma_addr);
+ mbox_cmd[3] = MSDW(dma_addr);
+
+- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ DEBUG2(printk("scsi%ld: %s: failed status %04X\n",
+ ha->host_no, __func__, mbox_sts[0]));
+@@ -875,7 +807,7 @@
+ mbox_cmd[0] = MBOX_CMD_REQUEST_DATABASE_ENTRY;
+ mbox_cmd[1] = MAX_PRST_DEV_DB_ENTRIES;
+
+- if (qla4xxx_mailbox_command(ha, 2, 3, &mbox_cmd[0], &mbox_sts[0]) !=
++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) !=
+ QLA_SUCCESS) {
+ if (mbox_sts[0] == MBOX_STS_COMMAND_ERROR) {
+ *ddb_index = mbox_sts[2];
+@@ -918,23 +850,23 @@
+ if (ret_val != QLA_SUCCESS)
+ goto qla4xxx_send_tgts_exit;
+
+- memset((void *)fw_ddb_entry->iSCSIAlias, 0,
+- sizeof(fw_ddb_entry->iSCSIAlias));
++ memset(fw_ddb_entry->iscsi_alias, 0,
++ sizeof(fw_ddb_entry->iscsi_alias));
+
+- memset((void *)fw_ddb_entry->iscsiName, 0,
+- sizeof(fw_ddb_entry->iscsiName));
++ memset(fw_ddb_entry->iscsi_name, 0,
++ sizeof(fw_ddb_entry->iscsi_name));
+
+- memset((void *)fw_ddb_entry->ipAddr, 0, sizeof(fw_ddb_entry->ipAddr));
+- memset((void *)fw_ddb_entry->targetAddr, 0,
+- sizeof(fw_ddb_entry->targetAddr));
++ memset(fw_ddb_entry->ip_addr, 0, sizeof(fw_ddb_entry->ip_addr));
++ memset(fw_ddb_entry->tgt_addr, 0,
++ sizeof(fw_ddb_entry->tgt_addr));
+
+ fw_ddb_entry->options = (DDB_OPT_DISC_SESSION | DDB_OPT_TARGET);
+- fw_ddb_entry->portNumber = cpu_to_le16(ntohs(port));
++ fw_ddb_entry->port = cpu_to_le16(ntohs(port));
+
+- fw_ddb_entry->ipAddr[0] = *ip;
+- fw_ddb_entry->ipAddr[1] = *(ip + 1);
+- fw_ddb_entry->ipAddr[2] = *(ip + 2);
+- fw_ddb_entry->ipAddr[3] = *(ip + 3);
++ fw_ddb_entry->ip_addr[0] = *ip;
++ fw_ddb_entry->ip_addr[1] = *(ip + 1);
++ fw_ddb_entry->ip_addr[2] = *(ip + 2);
++ fw_ddb_entry->ip_addr[3] = *(ip + 3);
+
+ ret_val = qla4xxx_set_ddb_entry(ha, ddb_index, fw_ddb_entry_dma);
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c 2007-12-21 15:36:12.000000000 -0500
+@@ -6,6 +6,9 @@
+ */
+
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+ static inline void eeprom_cmd(uint32_t cmd, struct scsi_qla_host *ha)
+ {
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c 2007-12-21 15:36:12.000000000 -0500
+@@ -10,6 +10,10 @@
+ #include <scsi/scsicam.h>
+
+ #include "ql4_def.h"
++#include "ql4_version.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+
+ /*
+ * Driver version
+@@ -50,12 +54,15 @@
+ /*
+ * iSCSI template entry points
+ */
+-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no,
+- uint32_t enable, struct sockaddr *dst_addr);
++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost,
++ enum iscsi_tgt_dscvr type, uint32_t enable,
++ struct sockaddr *dst_addr);
+ static int qla4xxx_conn_get_param(struct iscsi_cls_conn *conn,
+ enum iscsi_param param, char *buf);
+ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess,
+ enum iscsi_param param, char *buf);
++static int qla4xxx_host_get_param(struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf);
+ static void qla4xxx_conn_stop(struct iscsi_cls_conn *conn, int flag);
+ static int qla4xxx_conn_start(struct iscsi_cls_conn *conn);
+ static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session);
+@@ -95,16 +102,20 @@
+ static struct iscsi_transport qla4xxx_iscsi_transport = {
+ .owner = THIS_MODULE,
+ .name = DRIVER_NAME,
+- .param_mask = ISCSI_CONN_PORT |
+- ISCSI_CONN_ADDRESS |
+- ISCSI_TARGET_NAME |
+- ISCSI_TPGT,
++ .caps = CAP_FW_DB | CAP_SENDTARGETS_OFFLOAD |
++ CAP_DATA_PATH_OFFLOAD,
++ .param_mask = ISCSI_CONN_PORT | ISCSI_CONN_ADDRESS |
++ ISCSI_TARGET_NAME | ISCSI_TPGT,
++ .host_param_mask = ISCSI_HOST_HWADDRESS |
++ ISCSI_HOST_IPADDRESS |
++ ISCSI_HOST_INITIATOR_NAME,
+ .sessiondata_size = sizeof(struct ddb_entry),
+ .host_template = &qla4xxx_driver_template,
+
+ .tgt_dscvr = qla4xxx_tgt_dscvr,
+ .get_conn_param = qla4xxx_conn_get_param,
+ .get_session_param = qla4xxx_sess_get_param,
++ .get_host_param = qla4xxx_host_get_param,
+ .start_conn = qla4xxx_conn_start,
+ .stop_conn = qla4xxx_conn_stop,
+ .session_recovery_timedout = qla4xxx_recovery_timedout,
+@@ -161,6 +172,43 @@
+ printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag);
+ }
+
++static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
++{
++ int i;
++ char *cp = buf;
++
++ for (i = 0; i < len; i++)
++ cp += sprintf(cp, "%02x%c", addr[i],
++ i == (len - 1) ? '\n' : ':');
++ return cp - buf;
++}
++
++
++static int qla4xxx_host_get_param(struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf)
++{
++ struct scsi_qla_host *ha = to_qla_host(shost);
++ int len;
++
++ switch (param) {
++ case ISCSI_HOST_PARAM_HWADDRESS:
++ len = format_addr(buf, ha->my_mac, MAC_ADDR_LEN);
++ break;
++ case ISCSI_HOST_PARAM_IPADDRESS:
++ len = sprintf(buf, "%d.%d.%d.%d\n", ha->ip_address[0],
++ ha->ip_address[1], ha->ip_address[2],
++ ha->ip_address[3]);
++ break;
++ case ISCSI_HOST_PARAM_INITIATOR_NAME:
++ len = sprintf(buf, "%s\n", ha->name_string);
++ break;
++ default:
++ return -ENOSYS;
++ }
++
++ return len;
++}
++
+ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess,
+ enum iscsi_param param, char *buf)
+ {
+@@ -208,21 +256,15 @@
+ return len;
+ }
+
+-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no,
+- uint32_t enable, struct sockaddr *dst_addr)
++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost,
++ enum iscsi_tgt_dscvr type, uint32_t enable,
++ struct sockaddr *dst_addr)
+ {
+ struct scsi_qla_host *ha;
+- struct Scsi_Host *shost;
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+ int ret = 0;
+
+- shost = scsi_host_lookup(host_no);
+- if (IS_ERR(shost)) {
+- printk(KERN_ERR "Could not find host no %u\n", host_no);
+- return -ENODEV;
+- }
+-
+ ha = (struct scsi_qla_host *) shost->hostdata;
+
+ switch (type) {
+@@ -246,8 +288,6 @@
+ default:
+ ret = -ENOSYS;
+ }
+-
+- scsi_host_put(shost);
+ return ret;
+ }
+
+@@ -369,14 +409,7 @@
+ struct scsi_cmnd *cmd = srb->cmd;
+
+ if (srb->flags & SRB_DMA_VALID) {
+- if (cmd->use_sg) {
+- pci_unmap_sg(ha->pdev, cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- } else if (cmd->request_bufflen) {
+- pci_unmap_single(ha->pdev, srb->dma_handle,
+- cmd->request_bufflen,
+- cmd->sc_data_direction);
+- }
++ scsi_dma_unmap(cmd);
+ srb->flags &= ~SRB_DMA_VALID;
+ }
+ cmd->SCp.ptr = NULL;
+@@ -711,7 +744,7 @@
+ return stat;
+ }
+
+-static void qla4xxx_hw_reset(struct scsi_qla_host *ha)
++void qla4xxx_hw_reset(struct scsi_qla_host *ha)
+ {
+ uint32_t ctrl_status;
+ unsigned long flags = 0;
+@@ -1081,13 +1114,13 @@
+ if (ha->timer_active)
+ qla4xxx_stop_timer(ha);
+
+- /* free extra memory */
+- qla4xxx_mem_free(ha);
+-
+ /* Detach interrupts */
+ if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags))
+ free_irq(ha->pdev->irq, ha);
+
++ /* free extra memory */
++ qla4xxx_mem_free(ha);
++
+ pci_disable_device(ha->pdev);
+
+ }
+@@ -1332,6 +1365,11 @@
+
+ ha = pci_get_drvdata(pdev);
+
++ qla4xxx_disable_intrs(ha);
++
++ while (test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags))
++ ssleep(1);
++
+ /* remove devs from iscsi_sessions to scsi_devices */
+ qla4xxx_free_ddb_list(ha);
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h 2007-12-21 15:36:12.000000000 -0500
+@@ -5,4 +5,5 @@
+ * See LICENSE.qla4xxx for copyright and licensing details.
+ */
+
+-#define QLA4XXX_DRIVER_VERSION "5.00.07-k1"
++#define QLA4XXX_DRIVER_VERSION "5.01.00-k7"
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/qlogicfas408.c linux-2.6.22-591/drivers/scsi/qlogicfas408.c
+--- linux-2.6.22-570/drivers/scsi/qlogicfas408.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/qlogicfas408.c 2007-12-21 15:36:12.000000000 -0500
+@@ -265,8 +265,6 @@
+ unsigned int message; /* scsi returned message */
+ unsigned int phase; /* recorded scsi phase */
+ unsigned int reqlen; /* total length of transfer */
+- struct scatterlist *sglist; /* scatter-gather list pointer */
+- unsigned int sgcount; /* sg counter */
+ char *buf;
+ struct qlogicfas408_priv *priv = get_priv_by_cmd(cmd);
+ int qbase = priv->qbase;
+@@ -301,9 +299,10 @@
+ if (inb(qbase + 7) & 0x1f) /* if some bytes in fifo */
+ outb(1, qbase + 3); /* clear fifo */
+ /* note that request_bufflen is the total xfer size when sg is used */
+- reqlen = cmd->request_bufflen;
++ reqlen = scsi_bufflen(cmd);
+ /* note that it won't work if transfers > 16M are requested */
+ if (reqlen && !((phase = inb(qbase + 4)) & 6)) { /* data phase */
++ struct scatterlist *sg;
+ rtrc(2)
+ outb(reqlen, qbase); /* low-mid xfer cnt */
+ outb(reqlen >> 8, qbase + 1); /* low-mid xfer cnt */
+@@ -311,23 +310,16 @@
+ outb(0x90, qbase + 3); /* command do xfer */
+ /* PIO pseudo DMA to buffer or sglist */
+ REG1;
+- if (!cmd->use_sg)
+- ql_pdma(priv, phase, cmd->request_buffer,
+- cmd->request_bufflen);
+- else {
+- sgcount = cmd->use_sg;
+- sglist = cmd->request_buffer;
+- while (sgcount--) {
++
++ scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
+ if (priv->qabort) {
+ REG0;
+ return ((priv->qabort == 1 ?
+ DID_ABORT : DID_RESET) << 16);
+ }
+- buf = page_address(sglist->page) + sglist->offset;
+- if (ql_pdma(priv, phase, buf, sglist->length))
++ buf = page_address(sg->page) + sg->offset;
++ if (ql_pdma(priv, phase, buf, sg->length))
+ break;
+- sglist++;
+- }
+ }
+ REG0;
+ rtrc(2)
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_debug.c linux-2.6.22-591/drivers/scsi/scsi_debug.c
+--- linux-2.6.22-570/drivers/scsi/scsi_debug.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_debug.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2405,7 +2405,7 @@
+ MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)");
+ MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
+ MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
+-MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)");
++MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)");
+ MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)");
+ MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
+ MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)");
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_error.c linux-2.6.22-591/drivers/scsi/scsi_error.c
+--- linux-2.6.22-570/drivers/scsi/scsi_error.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_error.c 2007-12-21 15:36:12.000000000 -0500
+@@ -18,12 +18,13 @@
+ #include <linux/sched.h>
+ #include <linux/timer.h>
+ #include <linux/string.h>
+-#include <linux/slab.h>
+ #include <linux/kernel.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/interrupt.h>
+ #include <linux/blkdev.h>
+ #include <linux/delay.h>
++#include <linux/scatterlist.h>
+
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+@@ -640,16 +641,8 @@
+ memcpy(scmd->cmnd, cmnd, cmnd_size);
+
+ if (copy_sense) {
+- gfp_t gfp_mask = GFP_ATOMIC;
+-
+- if (shost->hostt->unchecked_isa_dma)
+- gfp_mask |= __GFP_DMA;
+-
+- sgl.page = alloc_page(gfp_mask);
+- if (!sgl.page)
+- return FAILED;
+- sgl.offset = 0;
+- sgl.length = 252;
++ sg_init_one(&sgl, scmd->sense_buffer,
++ sizeof(scmd->sense_buffer));
+
+ scmd->sc_data_direction = DMA_FROM_DEVICE;
+ scmd->request_bufflen = sgl.length;
+@@ -720,18 +713,6 @@
+
+
+ /*
+- * Last chance to have valid sense data.
+- */
+- if (copy_sense) {
+- if (!SCSI_SENSE_VALID(scmd)) {
+- memcpy(scmd->sense_buffer, page_address(sgl.page),
+- sizeof(scmd->sense_buffer));
+- }
+- __free_page(sgl.page);
+- }
+-
+-
+- /*
+ * Restore original data
+ */
+ scmd->request_buffer = old_buffer;
+@@ -1536,8 +1517,6 @@
+ {
+ struct Scsi_Host *shost = data;
+
+- current->flags |= PF_NOFREEZE;
+-
+ /*
+ * We use TASK_INTERRUPTIBLE so that the thread is not
+ * counted against the load average as a running process.
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_lib.c linux-2.6.22-591/drivers/scsi/scsi_lib.c
+--- linux-2.6.22-570/drivers/scsi/scsi_lib.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_lib.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2290,3 +2290,41 @@
+ kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+ }
+ EXPORT_SYMBOL(scsi_kunmap_atomic_sg);
++
++/**
++ * scsi_dma_map - perform DMA mapping against command's sg lists
++ * @cmd: scsi command
++ *
++ * Returns the number of sg lists actually used, zero if the sg lists
++ * is NULL, or -ENOMEM if the mapping failed.
++ */
++int scsi_dma_map(struct scsi_cmnd *cmd)
++{
++ int nseg = 0;
++
++ if (scsi_sg_count(cmd)) {
++ struct device *dev = cmd->device->host->shost_gendev.parent;
++
++ nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
++ cmd->sc_data_direction);
++ if (unlikely(!nseg))
++ return -ENOMEM;
++ }
++ return nseg;
++}
++EXPORT_SYMBOL(scsi_dma_map);
++
++/**
++ * scsi_dma_unmap - unmap command's sg lists mapped by scsi_dma_map
++ * @cmd: scsi command
++ */
++void scsi_dma_unmap(struct scsi_cmnd *cmd)
++{
++ if (scsi_sg_count(cmd)) {
++ struct device *dev = cmd->device->host->shost_gendev.parent;
++
++ dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
++ cmd->sc_data_direction);
++ }
++}
++EXPORT_SYMBOL(scsi_dma_unmap);
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_netlink.c linux-2.6.22-591/drivers/scsi/scsi_netlink.c
+--- linux-2.6.22-570/drivers/scsi/scsi_netlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_netlink.c 2007-12-21 15:36:14.000000000 -0500
+@@ -167,7 +167,7 @@
+ return;
+ }
+
+- scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
++ scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
+ SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+ THIS_MODULE);
+ if (!scsi_nl_sock) {
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_scan.c linux-2.6.22-591/drivers/scsi/scsi_scan.c
+--- linux-2.6.22-570/drivers/scsi/scsi_scan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_scan.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1213,7 +1213,7 @@
+ * Given a struct scsi_lun of: 0a 04 0b 03 00 00 00 00, this function returns
+ * the integer: 0x0b030a04
+ **/
+-static int scsilun_to_int(struct scsi_lun *scsilun)
++int scsilun_to_int(struct scsi_lun *scsilun)
+ {
+ int i;
+ unsigned int lun;
+@@ -1224,6 +1224,7 @@
+ scsilun->scsi_lun[i + 1]) << (i * 8));
+ return lun;
+ }
++EXPORT_SYMBOL(scsilun_to_int);
+
+ /**
+ * int_to_scsilun: reverts an int into a scsi_lun
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_sysfs.c linux-2.6.22-591/drivers/scsi/scsi_sysfs.c
+--- linux-2.6.22-570/drivers/scsi/scsi_sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -293,30 +293,18 @@
+ {
+ struct device_driver *drv = dev->driver;
+ struct scsi_device *sdev = to_scsi_device(dev);
+- struct scsi_host_template *sht = sdev->host->hostt;
+ int err;
+
+ err = scsi_device_quiesce(sdev);
+ if (err)
+ return err;
+
+- /* call HLD suspend first */
+ if (drv && drv->suspend) {
+ err = drv->suspend(dev, state);
+ if (err)
+ return err;
+ }
+
+- /* then, call host suspend */
+- if (sht->suspend) {
+- err = sht->suspend(sdev, state);
+- if (err) {
+- if (drv && drv->resume)
+- drv->resume(dev);
+- return err;
+- }
+- }
+-
+ return 0;
+ }
+
+@@ -324,21 +312,14 @@
+ {
+ struct device_driver *drv = dev->driver;
+ struct scsi_device *sdev = to_scsi_device(dev);
+- struct scsi_host_template *sht = sdev->host->hostt;
+- int err = 0, err2 = 0;
++ int err = 0;
+
+- /* call host resume first */
+- if (sht->resume)
+- err = sht->resume(sdev);
+-
+- /* then, call HLD resume */
+ if (drv && drv->resume)
+- err2 = drv->resume(dev);
++ err = drv->resume(dev);
+
+ scsi_device_resume(sdev);
+
+- /* favor LLD failure */
+- return err ? err : err2;;
++ return err;
+ }
+
+ struct bus_type scsi_bus_type = {
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c
+--- linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -19,9 +19,10 @@
+ *
+ * ========
+ *
+- * Copyright (C) 2004-2005 James Smart, Emulex Corporation
++ * Copyright (C) 2004-2007 James Smart, Emulex Corporation
+ * Rewrite for host, target, device, and remote port attributes,
+ * statistics, and service functions...
++ * Add vports, etc
+ *
+ */
+ #include <linux/module.h>
+@@ -37,6 +38,34 @@
+ #include "scsi_priv.h"
+
+ static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
++static void fc_vport_sched_delete(struct work_struct *work);
++
++/*
++ * This is a temporary carrier for creating a vport. It will eventually
++ * be replaced by a real message definition for sgio or netlink.
++ *
++ * fc_vport_identifiers: This set of data contains all elements
++ * to uniquely identify and instantiate a FC virtual port.
++ *
++ * Notes:
++ * symbolic_name: The driver is to append the symbolic_name string data
++ * to the symbolic_node_name data that it generates by default.
++ * the resulting combination should then be registered with the switch.
++ * It is expected that things like Xen may stuff a VM title into
++ * this field.
++ */
++struct fc_vport_identifiers {
++ u64 node_name;
++ u64 port_name;
++ u32 roles;
++ bool disable;
++ enum fc_port_type vport_type; /* only FC_PORTTYPE_NPIV allowed */
++ char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
++};
++
++static int fc_vport_create(struct Scsi_Host *shost, int channel,
++ struct device *pdev, struct fc_vport_identifiers *ids,
++ struct fc_vport **vport);
+
+ /*
+ * Redefine so that we can have same named attributes in the
+@@ -90,10 +119,14 @@
+ { FC_PORTTYPE_NLPORT, "NLPort (fabric via loop)" },
+ { FC_PORTTYPE_LPORT, "LPort (private loop)" },
+ { FC_PORTTYPE_PTP, "Point-To-Point (direct nport connection" },
++ { FC_PORTTYPE_NPIV, "NPIV VPORT" },
+ };
+ fc_enum_name_search(port_type, fc_port_type, fc_port_type_names)
+ #define FC_PORTTYPE_MAX_NAMELEN 50
+
++/* Reuse fc_port_type enum function for vport_type */
++#define get_fc_vport_type_name get_fc_port_type_name
++
+
+ /* Convert fc_host_event_code values to ascii string name */
+ static const struct {
+@@ -139,6 +172,29 @@
+ #define FC_PORTSTATE_MAX_NAMELEN 20
+
+
++/* Convert fc_vport_state values to ascii string name */
++static struct {
++ enum fc_vport_state value;
++ char *name;
++} fc_vport_state_names[] = {
++ { FC_VPORT_UNKNOWN, "Unknown" },
++ { FC_VPORT_ACTIVE, "Active" },
++ { FC_VPORT_DISABLED, "Disabled" },
++ { FC_VPORT_LINKDOWN, "Linkdown" },
++ { FC_VPORT_INITIALIZING, "Initializing" },
++ { FC_VPORT_NO_FABRIC_SUPP, "No Fabric Support" },
++ { FC_VPORT_NO_FABRIC_RSCS, "No Fabric Resources" },
++ { FC_VPORT_FABRIC_LOGOUT, "Fabric Logout" },
++ { FC_VPORT_FABRIC_REJ_WWN, "Fabric Rejected WWN" },
++ { FC_VPORT_FAILED, "VPort Failed" },
++};
++fc_enum_name_search(vport_state, fc_vport_state, fc_vport_state_names)
++#define FC_VPORTSTATE_MAX_NAMELEN 24
++
++/* Reuse fc_vport_state enum function for vport_last_state */
++#define get_fc_vport_last_state_name get_fc_vport_state_name
++
++
+ /* Convert fc_tgtid_binding_type values to ascii string name */
+ static const struct {
+ enum fc_tgtid_binding_type value;
+@@ -219,16 +275,16 @@
+ }
+
+
+-/* Convert FC_RPORT_ROLE bit values to ascii string name */
++/* Convert FC_PORT_ROLE bit values to ascii string name */
+ static const struct {
+ u32 value;
+ char *name;
+-} fc_remote_port_role_names[] = {
+- { FC_RPORT_ROLE_FCP_TARGET, "FCP Target" },
+- { FC_RPORT_ROLE_FCP_INITIATOR, "FCP Initiator" },
+- { FC_RPORT_ROLE_IP_PORT, "IP Port" },
++} fc_port_role_names[] = {
++ { FC_PORT_ROLE_FCP_TARGET, "FCP Target" },
++ { FC_PORT_ROLE_FCP_INITIATOR, "FCP Initiator" },
++ { FC_PORT_ROLE_IP_PORT, "IP Port" },
+ };
+-fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names)
++fc_bitfield_name_search(port_roles, fc_port_role_names)
+
+ /*
+ * Define roles that are specific to port_id. Values are relative to ROLE_MASK.
+@@ -252,7 +308,8 @@
+ */
+ #define FC_STARGET_NUM_ATTRS 3
+ #define FC_RPORT_NUM_ATTRS 10
+-#define FC_HOST_NUM_ATTRS 17
++#define FC_VPORT_NUM_ATTRS 9
++#define FC_HOST_NUM_ATTRS 21
+
+ struct fc_internal {
+ struct scsi_transport_template t;
+@@ -278,6 +335,10 @@
+ struct transport_container rport_attr_cont;
+ struct class_device_attribute private_rport_attrs[FC_RPORT_NUM_ATTRS];
+ struct class_device_attribute *rport_attrs[FC_RPORT_NUM_ATTRS + 1];
++
++ struct transport_container vport_attr_cont;
++ struct class_device_attribute private_vport_attrs[FC_VPORT_NUM_ATTRS];
++ struct class_device_attribute *vport_attrs[FC_VPORT_NUM_ATTRS + 1];
+ };
+
+ #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t)
+@@ -331,6 +392,7 @@
+ sizeof(fc_host->supported_fc4s));
+ fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN;
+ fc_host->maxframe_size = -1;
++ fc_host->max_npiv_vports = 0;
+ memset(fc_host->serial_number, 0,
+ sizeof(fc_host->serial_number));
+
+@@ -348,8 +410,11 @@
+
+ INIT_LIST_HEAD(&fc_host->rports);
+ INIT_LIST_HEAD(&fc_host->rport_bindings);
++ INIT_LIST_HEAD(&fc_host->vports);
+ fc_host->next_rport_number = 0;
+ fc_host->next_target_id = 0;
++ fc_host->next_vport_number = 0;
++ fc_host->npiv_vports_inuse = 0;
+
+ snprintf(fc_host->work_q_name, KOBJ_NAME_LEN, "fc_wq_%d",
+ shost->host_no);
+@@ -388,6 +453,16 @@
+ NULL);
+
+ /*
++ * Setup and Remove actions for virtual ports are handled
++ * in the service functions below.
++ */
++static DECLARE_TRANSPORT_CLASS(fc_vport_class,
++ "fc_vports",
++ NULL,
++ NULL,
++ NULL);
++
++/*
+ * Module Parameters
+ */
+
+@@ -585,6 +660,9 @@
+ error = transport_class_register(&fc_host_class);
+ if (error)
+ return error;
++ error = transport_class_register(&fc_vport_class);
++ if (error)
++ return error;
+ error = transport_class_register(&fc_rport_class);
+ if (error)
+ return error;
+@@ -596,6 +674,7 @@
+ transport_class_unregister(&fc_transport_class);
+ transport_class_unregister(&fc_rport_class);
+ transport_class_unregister(&fc_host_class);
++ transport_class_unregister(&fc_vport_class);
+ }
+
+ /*
+@@ -800,9 +879,9 @@
+ return snprintf(buf, 30, "Unknown Fabric Entity\n");
+ }
+ } else {
+- if (rport->roles == FC_RPORT_ROLE_UNKNOWN)
++ if (rport->roles == FC_PORT_ROLE_UNKNOWN)
+ return snprintf(buf, 20, "unknown\n");
+- return get_fc_remote_port_roles_names(rport->roles, buf);
++ return get_fc_port_roles_names(rport->roles, buf);
+ }
+ }
+ static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO,
+@@ -857,7 +936,7 @@
+
+ /*
+ * Note: in the target show function we recognize when the remote
+- * port is in the hierarchy and do not allow the driver to get
++ * port is in the heirarchy and do not allow the driver to get
+ * involved in sysfs functions. The driver only gets involved if
+ * it's the "old" style that doesn't use rports.
+ */
+@@ -912,6 +991,257 @@
+
+
+ /*
++ * FC Virtual Port Attribute Management
++ */
++
++#define fc_vport_show_function(field, format_string, sz, cast) \
++static ssize_t \
++show_fc_vport_##field (struct class_device *cdev, char *buf) \
++{ \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ struct Scsi_Host *shost = vport_to_shost(vport); \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ if ((i->f->get_vport_##field) && \
++ !(vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))) \
++ i->f->get_vport_##field(vport); \
++ return snprintf(buf, sz, format_string, cast vport->field); \
++}
++
++#define fc_vport_store_function(field) \
++static ssize_t \
++store_fc_vport_##field(struct class_device *cdev, const char *buf, \
++ size_t count) \
++{ \
++ int val; \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ struct Scsi_Host *shost = vport_to_shost(vport); \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ char *cp; \
++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \
++ return -EBUSY; \
++ val = simple_strtoul(buf, &cp, 0); \
++ if (*cp && (*cp != '\n')) \
++ return -EINVAL; \
++ i->f->set_vport_##field(vport, val); \
++ return count; \
++}
++
++#define fc_vport_store_str_function(field, slen) \
++static ssize_t \
++store_fc_vport_##field(struct class_device *cdev, const char *buf, \
++ size_t count) \
++{ \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ struct Scsi_Host *shost = vport_to_shost(vport); \
++ struct fc_internal *i = to_fc_internal(shost->transportt); \
++ unsigned int cnt=count; \
++ \
++ /* count may include a LF at end of string */ \
++ if (buf[cnt-1] == '\n') \
++ cnt--; \
++ if (cnt > ((slen) - 1)) \
++ return -EINVAL; \
++ memcpy(vport->field, buf, cnt); \
++ i->f->set_vport_##field(vport); \
++ return count; \
++}
++
++#define fc_vport_rd_attr(field, format_string, sz) \
++ fc_vport_show_function(field, format_string, sz, ) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \
++ show_fc_vport_##field, NULL)
++
++#define fc_vport_rd_attr_cast(field, format_string, sz, cast) \
++ fc_vport_show_function(field, format_string, sz, (cast)) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \
++ show_fc_vport_##field, NULL)
++
++#define fc_vport_rw_attr(field, format_string, sz) \
++ fc_vport_show_function(field, format_string, sz, ) \
++ fc_vport_store_function(field) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \
++ show_fc_vport_##field, \
++ store_fc_vport_##field)
++
++#define fc_private_vport_show_function(field, format_string, sz, cast) \
++static ssize_t \
++show_fc_vport_##field (struct class_device *cdev, char *buf) \
++{ \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ return snprintf(buf, sz, format_string, cast vport->field); \
++}
++
++#define fc_private_vport_store_u32_function(field) \
++static ssize_t \
++store_fc_vport_##field(struct class_device *cdev, const char *buf, \
++ size_t count) \
++{ \
++ u32 val; \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ char *cp; \
++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \
++ return -EBUSY; \
++ val = simple_strtoul(buf, &cp, 0); \
++ if (*cp && (*cp != '\n')) \
++ return -EINVAL; \
++ vport->field = val; \
++ return count; \
++}
++
++
++#define fc_private_vport_rd_attr(field, format_string, sz) \
++ fc_private_vport_show_function(field, format_string, sz, ) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \
++ show_fc_vport_##field, NULL)
++
++#define fc_private_vport_rd_attr_cast(field, format_string, sz, cast) \
++ fc_private_vport_show_function(field, format_string, sz, (cast)) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \
++ show_fc_vport_##field, NULL)
++
++#define fc_private_vport_rw_u32_attr(field, format_string, sz) \
++ fc_private_vport_show_function(field, format_string, sz, ) \
++ fc_private_vport_store_u32_function(field) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \
++ show_fc_vport_##field, \
++ store_fc_vport_##field)
++
++
++#define fc_private_vport_rd_enum_attr(title, maxlen) \
++static ssize_t \
++show_fc_vport_##title (struct class_device *cdev, char *buf) \
++{ \
++ struct fc_vport *vport = transport_class_to_vport(cdev); \
++ const char *name; \
++ name = get_fc_##title##_name(vport->title); \
++ if (!name) \
++ return -EINVAL; \
++ return snprintf(buf, maxlen, "%s\n", name); \
++} \
++static FC_CLASS_DEVICE_ATTR(vport, title, S_IRUGO, \
++ show_fc_vport_##title, NULL)
++
++
++#define SETUP_VPORT_ATTRIBUTE_RD(field) \
++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \
++ i->private_vport_attrs[count].store = NULL; \
++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \
++ if (i->f->get_##field) \
++ count++
++ /* NOTE: Above MACRO differs: checks function not show bit */
++
++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(field) \
++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \
++ i->private_vport_attrs[count].store = NULL; \
++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \
++ count++
++
++#define SETUP_VPORT_ATTRIBUTE_WR(field) \
++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \
++ if (i->f->field) \
++ count++
++ /* NOTE: Above MACRO differs: checks function */
++
++#define SETUP_VPORT_ATTRIBUTE_RW(field) \
++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++ if (!i->f->set_vport_##field) { \
++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \
++ i->private_vport_attrs[count].store = NULL; \
++ } \
++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \
++ count++
++ /* NOTE: Above MACRO differs: does not check show bit */
++
++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RW(field) \
++{ \
++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \
++ count++; \
++}
++
++
++/* The FC Transport Virtual Port Attributes: */
++
++/* Fixed Virtual Port Attributes */
++
++/* Dynamic Virtual Port Attributes */
++
++/* Private Virtual Port Attributes */
++
++fc_private_vport_rd_enum_attr(vport_state, FC_VPORTSTATE_MAX_NAMELEN);
++fc_private_vport_rd_enum_attr(vport_last_state, FC_VPORTSTATE_MAX_NAMELEN);
++fc_private_vport_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long);
++fc_private_vport_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long);
++
++static ssize_t
++show_fc_vport_roles (struct class_device *cdev, char *buf)
++{
++ struct fc_vport *vport = transport_class_to_vport(cdev);
++
++ if (vport->roles == FC_PORT_ROLE_UNKNOWN)
++ return snprintf(buf, 20, "unknown\n");
++ return get_fc_port_roles_names(vport->roles, buf);
++}
++static FC_CLASS_DEVICE_ATTR(vport, roles, S_IRUGO, show_fc_vport_roles, NULL);
++
++fc_private_vport_rd_enum_attr(vport_type, FC_PORTTYPE_MAX_NAMELEN);
++
++fc_private_vport_show_function(symbolic_name, "%s\n",
++ FC_VPORT_SYMBOLIC_NAMELEN + 1, )
++fc_vport_store_str_function(symbolic_name, FC_VPORT_SYMBOLIC_NAMELEN)
++static FC_CLASS_DEVICE_ATTR(vport, symbolic_name, S_IRUGO | S_IWUSR,
++ show_fc_vport_symbolic_name, store_fc_vport_symbolic_name);
++
++static ssize_t
++store_fc_vport_delete(struct class_device *cdev, const char *buf,
++ size_t count)
++{
++ struct fc_vport *vport = transport_class_to_vport(cdev);
++ struct Scsi_Host *shost = vport_to_shost(vport);
++
++ fc_queue_work(shost, &vport->vport_delete_work);
++ return count;
++}
++static FC_CLASS_DEVICE_ATTR(vport, vport_delete, S_IWUSR,
++ NULL, store_fc_vport_delete);
++
++
++/*
++ * Enable/Disable vport
++ * Write "1" to disable, write "0" to enable
++ */
++static ssize_t
++store_fc_vport_disable(struct class_device *cdev, const char *buf,
++ size_t count)
++{
++ struct fc_vport *vport = transport_class_to_vport(cdev);
++ struct Scsi_Host *shost = vport_to_shost(vport);
++ struct fc_internal *i = to_fc_internal(shost->transportt);
++ int stat;
++
++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))
++ return -EBUSY;
++
++ if (*buf == '0') {
++ if (vport->vport_state != FC_VPORT_DISABLED)
++ return -EALREADY;
++ } else if (*buf == '1') {
++ if (vport->vport_state == FC_VPORT_DISABLED)
++ return -EALREADY;
++ } else
++ return -EINVAL;
++
++ stat = i->f->vport_disable(vport, ((*buf == '0') ? false : true));
++ return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(vport, vport_disable, S_IWUSR,
++ NULL, store_fc_vport_disable);
++
++
++/*
+ * Host Attribute Management
+ */
+
+@@ -1003,6 +1333,13 @@
+ if (i->f->show_host_##field) \
+ count++
+
++#define SETUP_HOST_ATTRIBUTE_RD_NS(field) \
++ i->private_host_attrs[count] = class_device_attr_host_##field; \
++ i->private_host_attrs[count].attr.mode = S_IRUGO; \
++ i->private_host_attrs[count].store = NULL; \
++ i->host_attrs[count] = &i->private_host_attrs[count]; \
++ count++
++
+ #define SETUP_HOST_ATTRIBUTE_RW(field) \
+ i->private_host_attrs[count] = class_device_attr_host_##field; \
+ if (!i->f->set_host_##field) { \
+@@ -1090,6 +1427,7 @@
+ fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20,
+ unsigned long long);
+ fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20);
++fc_private_host_rd_attr(max_npiv_vports, "%u\n", 20);
+ fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1));
+
+
+@@ -1210,6 +1548,9 @@
+ static FC_CLASS_DEVICE_ATTR(host, issue_lip, S_IWUSR, NULL,
+ store_fc_private_host_issue_lip);
+
++fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20);
++
++
+ /*
+ * Host Statistics Management
+ */
+@@ -1285,7 +1626,6 @@
+ static FC_CLASS_DEVICE_ATTR(host, reset_statistics, S_IWUSR, NULL,
+ fc_reset_statistics);
+
+-
+ static struct attribute *fc_statistics_attrs[] = {
+ &class_device_attr_host_seconds_since_last_reset.attr,
+ &class_device_attr_host_tx_frames.attr,
+@@ -1316,6 +1656,142 @@
+ .attrs = fc_statistics_attrs,
+ };
+
++
++/* Host Vport Attributes */
++
++static int
++fc_parse_wwn(const char *ns, u64 *nm)
++{
++ unsigned int i, j;
++ u8 wwn[8];
++
++ memset(wwn, 0, sizeof(wwn));
++
++ /* Validate and store the new name */
++ for (i=0, j=0; i < 16; i++) {
++ if ((*ns >= 'a') && (*ns <= 'f'))
++ j = ((j << 4) | ((*ns++ -'a') + 10));
++ else if ((*ns >= 'A') && (*ns <= 'F'))
++ j = ((j << 4) | ((*ns++ -'A') + 10));
++ else if ((*ns >= '0') && (*ns <= '9'))
++ j = ((j << 4) | (*ns++ -'0'));
++ else
++ return -EINVAL;
++ if (i % 2) {
++ wwn[i/2] = j & 0xff;
++ j = 0;
++ }
++ }
++
++ *nm = wwn_to_u64(wwn);
++
++ return 0;
++}
++
++
++/*
++ * "Short-cut" sysfs variable to create a new vport on a FC Host.
++ * Input is a string of the form "<WWPN>:<WWNN>". Other attributes
++ * will default to a NPIV-based FCP_Initiator; The WWNs are specified
++ * as hex characters, and may *not* contain any prefixes (e.g. 0x, x, etc)
++ */
++static ssize_t
++store_fc_host_vport_create(struct class_device *cdev, const char *buf,
++ size_t count)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(cdev);
++ struct fc_vport_identifiers vid;
++ struct fc_vport *vport;
++ unsigned int cnt=count;
++ int stat;
++
++ memset(&vid, 0, sizeof(vid));
++
++ /* count may include a LF at end of string */
++ if (buf[cnt-1] == '\n')
++ cnt--;
++
++ /* validate we have enough characters for WWPN */
++ if ((cnt != (16+1+16)) || (buf[16] != ':'))
++ return -EINVAL;
++
++ stat = fc_parse_wwn(&buf[0], &vid.port_name);
++ if (stat)
++ return stat;
++
++ stat = fc_parse_wwn(&buf[17], &vid.node_name);
++ if (stat)
++ return stat;
++
++ vid.roles = FC_PORT_ROLE_FCP_INITIATOR;
++ vid.vport_type = FC_PORTTYPE_NPIV;
++ /* vid.symbolic_name is already zero/NULL's */
++ vid.disable = false; /* always enabled */
++
++ /* we only allow support on Channel 0 !!! */
++ stat = fc_vport_create(shost, 0, &shost->shost_gendev, &vid, &vport);
++ return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(host, vport_create, S_IWUSR, NULL,
++ store_fc_host_vport_create);
++
++
++/*
++ * "Short-cut" sysfs variable to delete a vport on a FC Host.
++ * Vport is identified by a string containing "<WWPN>:<WWNN>".
++ * The WWNs are specified as hex characters, and may *not* contain
++ * any prefixes (e.g. 0x, x, etc)
++ */
++static ssize_t
++store_fc_host_vport_delete(struct class_device *cdev, const char *buf,
++ size_t count)
++{
++ struct Scsi_Host *shost = transport_class_to_shost(cdev);
++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++ struct fc_vport *vport;
++ u64 wwpn, wwnn;
++ unsigned long flags;
++ unsigned int cnt=count;
++ int stat, match;
++
++ /* count may include a LF at end of string */
++ if (buf[cnt-1] == '\n')
++ cnt--;
++
++ /* validate we have enough characters for WWPN */
++ if ((cnt != (16+1+16)) || (buf[16] != ':'))
++ return -EINVAL;
++
++ stat = fc_parse_wwn(&buf[0], &wwpn);
++ if (stat)
++ return stat;
++
++ stat = fc_parse_wwn(&buf[17], &wwnn);
++ if (stat)
++ return stat;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ match = 0;
++ /* we only allow support on Channel 0 !!! */
++ list_for_each_entry(vport, &fc_host->vports, peers) {
++ if ((vport->channel == 0) &&
++ (vport->port_name == wwpn) && (vport->node_name == wwnn)) {
++ match = 1;
++ break;
++ }
++ }
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ if (!match)
++ return -ENODEV;
++
++ stat = fc_vport_terminate(vport);
++ return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(host, vport_delete, S_IWUSR, NULL,
++ store_fc_host_vport_delete);
++
++
+ static int fc_host_match(struct attribute_container *cont,
+ struct device *dev)
+ {
+@@ -1387,6 +1863,40 @@
+ }
+
+
++static void fc_vport_dev_release(struct device *dev)
++{
++ struct fc_vport *vport = dev_to_vport(dev);
++ put_device(dev->parent); /* release kobj parent */
++ kfree(vport);
++}
++
++int scsi_is_fc_vport(const struct device *dev)
++{
++ return dev->release == fc_vport_dev_release;
++}
++EXPORT_SYMBOL(scsi_is_fc_vport);
++
++static int fc_vport_match(struct attribute_container *cont,
++ struct device *dev)
++{
++ struct fc_vport *vport;
++ struct Scsi_Host *shost;
++ struct fc_internal *i;
++
++ if (!scsi_is_fc_vport(dev))
++ return 0;
++ vport = dev_to_vport(dev);
++
++ shost = vport_to_shost(vport);
++ if (!shost->transportt || shost->transportt->host_attrs.ac.class
++ != &fc_host_class.class)
++ return 0;
++
++ i = to_fc_internal(shost->transportt);
++ return &i->vport_attr_cont.ac == cont;
++}
++
++
+ /**
+ * fc_timed_out - FC Transport I/O timeout intercept handler
+ *
+@@ -1433,6 +1943,9 @@
+ if (rport->scsi_target_id == -1)
+ continue;
+
++ if (rport->port_state != FC_PORTSTATE_ONLINE)
++ continue;
++
+ if ((channel == SCAN_WILD_CARD || channel == rport->channel) &&
+ (id == SCAN_WILD_CARD || id == rport->scsi_target_id)) {
+ scsi_scan_target(&rport->dev, rport->channel,
+@@ -1472,6 +1985,11 @@
+ i->rport_attr_cont.ac.match = fc_rport_match;
+ transport_container_register(&i->rport_attr_cont);
+
++ i->vport_attr_cont.ac.attrs = &i->vport_attrs[0];
++ i->vport_attr_cont.ac.class = &fc_vport_class.class;
++ i->vport_attr_cont.ac.match = fc_vport_match;
++ transport_container_register(&i->vport_attr_cont);
++
+ i->f = ft;
+
+ /* Transport uses the shost workq for scsi scanning */
+@@ -1505,6 +2023,10 @@
+ SETUP_HOST_ATTRIBUTE_RD(supported_fc4s);
+ SETUP_HOST_ATTRIBUTE_RD(supported_speeds);
+ SETUP_HOST_ATTRIBUTE_RD(maxframe_size);
++ if (ft->vport_create) {
++ SETUP_HOST_ATTRIBUTE_RD_NS(max_npiv_vports);
++ SETUP_HOST_ATTRIBUTE_RD_NS(npiv_vports_inuse);
++ }
+ SETUP_HOST_ATTRIBUTE_RD(serial_number);
+
+ SETUP_HOST_ATTRIBUTE_RD(port_id);
+@@ -1520,6 +2042,10 @@
+ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
+ if (ft->issue_fc_host_lip)
+ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(issue_lip);
++ if (ft->vport_create)
++ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_create);
++ if (ft->vport_delete)
++ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_delete);
+
+ BUG_ON(count > FC_HOST_NUM_ATTRS);
+
+@@ -1545,6 +2071,24 @@
+
+ i->rport_attrs[count] = NULL;
+
++ /*
++ * Setup Virtual Port Attributes.
++ */
++ count=0;
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_state);
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_last_state);
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(node_name);
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(port_name);
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(roles);
++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_type);
++ SETUP_VPORT_ATTRIBUTE_RW(symbolic_name);
++ SETUP_VPORT_ATTRIBUTE_WR(vport_delete);
++ SETUP_VPORT_ATTRIBUTE_WR(vport_disable);
++
++ BUG_ON(count > FC_VPORT_NUM_ATTRS);
++
++ i->vport_attrs[count] = NULL;
++
+ return &i->t;
+ }
+ EXPORT_SYMBOL(fc_attach_transport);
+@@ -1556,6 +2100,7 @@
+ transport_container_unregister(&i->t.target_attrs);
+ transport_container_unregister(&i->t.host_attrs);
+ transport_container_unregister(&i->rport_attr_cont);
++ transport_container_unregister(&i->vport_attr_cont);
+
+ kfree(i);
+ }
+@@ -1667,9 +2212,17 @@
+ void
+ fc_remove_host(struct Scsi_Host *shost)
+ {
+- struct fc_rport *rport, *next_rport;
++ struct fc_vport *vport = NULL, *next_vport = NULL;
++ struct fc_rport *rport = NULL, *next_rport = NULL;
+ struct workqueue_struct *work_q;
+ struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++ unsigned long flags;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++
++ /* Remove any vports */
++ list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers)
++ fc_queue_work(shost, &vport->vport_delete_work);
+
+ /* Remove any remote ports */
+ list_for_each_entry_safe(rport, next_rport,
+@@ -1686,6 +2239,8 @@
+ fc_queue_work(shost, &rport->rport_delete_work);
+ }
+
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
+ /* flush all scan work items */
+ scsi_flush_work(shost);
+
+@@ -1844,7 +2399,7 @@
+ spin_lock_irqsave(shost->host_lock, flags);
+
+ rport->number = fc_host->next_rport_number++;
+- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
+ rport->scsi_target_id = fc_host->next_target_id++;
+ else
+ rport->scsi_target_id = -1;
+@@ -1869,7 +2424,7 @@
+ transport_add_device(dev);
+ transport_configure_device(dev);
+
+- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) {
++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) {
+ /* initiate a scan of the target */
+ rport->flags |= FC_RPORT_SCAN_PENDING;
+ scsi_queue_work(shost, &rport->scan_work);
+@@ -2003,7 +2558,7 @@
+
+ /* was a target, not in roles */
+ if ((rport->scsi_target_id != -1) &&
+- (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET)))
++ (!(ids->roles & FC_PORT_ROLE_FCP_TARGET)))
+ return rport;
+
+ /*
+@@ -2086,7 +2641,7 @@
+ memset(rport->dd_data, 0,
+ fci->f->dd_fcrport_size);
+
+- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) {
++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) {
+ /* initiate a scan of the target */
+ rport->flags |= FC_RPORT_SCAN_PENDING;
+ scsi_queue_work(shost, &rport->scan_work);
+@@ -2243,11 +2798,11 @@
+ int create = 0;
+
+ spin_lock_irqsave(shost->host_lock, flags);
+- if (roles & FC_RPORT_ROLE_FCP_TARGET) {
++ if (roles & FC_PORT_ROLE_FCP_TARGET) {
+ if (rport->scsi_target_id == -1) {
+ rport->scsi_target_id = fc_host->next_target_id++;
+ create = 1;
+- } else if (!(rport->roles & FC_RPORT_ROLE_FCP_TARGET))
++ } else if (!(rport->roles & FC_PORT_ROLE_FCP_TARGET))
+ create = 1;
+ }
+
+@@ -2317,7 +2872,7 @@
+ */
+ if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+ (rport->scsi_target_id != -1) &&
+- !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
++ !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
+ dev_printk(KERN_ERR, &rport->dev,
+ "blocked FC remote port time out: no longer"
+ " a FCP target, removing starget\n");
+@@ -2367,7 +2922,7 @@
+ */
+ rport->maxframe_size = -1;
+ rport->supported_classes = FC_COS_UNSPECIFIED;
+- rport->roles = FC_RPORT_ROLE_UNKNOWN;
++ rport->roles = FC_PORT_ROLE_UNKNOWN;
+ rport->port_state = FC_PORTSTATE_NOTPRESENT;
+
+ /* remove the identifiers that aren't used in the consisting binding */
+@@ -2436,7 +2991,7 @@
+ unsigned long flags;
+
+ if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+- (rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
++ (rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
+ scsi_scan_target(&rport->dev, rport->channel,
+ rport->scsi_target_id, SCAN_WILD_CARD, 1);
+ }
+@@ -2447,7 +3002,227 @@
+ }
+
+
+-MODULE_AUTHOR("Martin Hicks");
++/**
++ * fc_vport_create - allocates and creates a FC virtual port.
++ * @shost: scsi host the virtual port is connected to.
++ * @channel: Channel on shost port connected to.
++ * @pdev: parent device for vport
++ * @ids: The world wide names, FC4 port roles, etc for
++ * the virtual port.
++ * @ret_vport: The pointer to the created vport.
++ *
++ * Allocates and creates the vport structure, calls the parent host
++ * to instantiate the vport, the completes w/ class and sysfs creation.
++ *
++ * Notes:
++ * This routine assumes no locks are held on entry.
++ **/
++static int
++fc_vport_create(struct Scsi_Host *shost, int channel, struct device *pdev,
++ struct fc_vport_identifiers *ids, struct fc_vport **ret_vport)
++{
++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++ struct fc_internal *fci = to_fc_internal(shost->transportt);
++ struct fc_vport *vport;
++ struct device *dev;
++ unsigned long flags;
++ size_t size;
++ int error;
++
++ *ret_vport = NULL;
++
++ if ( ! fci->f->vport_create)
++ return -ENOENT;
++
++ size = (sizeof(struct fc_vport) + fci->f->dd_fcvport_size);
++ vport = kzalloc(size, GFP_KERNEL);
++ if (unlikely(!vport)) {
++ printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__);
++ return -ENOMEM;
++ }
++
++ vport->vport_state = FC_VPORT_UNKNOWN;
++ vport->vport_last_state = FC_VPORT_UNKNOWN;
++ vport->node_name = ids->node_name;
++ vport->port_name = ids->port_name;
++ vport->roles = ids->roles;
++ vport->vport_type = ids->vport_type;
++ if (fci->f->dd_fcvport_size)
++ vport->dd_data = &vport[1];
++ vport->shost = shost;
++ vport->channel = channel;
++ vport->flags = FC_VPORT_CREATING;
++ INIT_WORK(&vport->vport_delete_work, fc_vport_sched_delete);
++
++ spin_lock_irqsave(shost->host_lock, flags);
++
++ if (fc_host->npiv_vports_inuse >= fc_host->max_npiv_vports) {
++ spin_unlock_irqrestore(shost->host_lock, flags);
++ kfree(vport);
++ return -ENOSPC;
++ }
++ fc_host->npiv_vports_inuse++;
++ vport->number = fc_host->next_vport_number++;
++ list_add_tail(&vport->peers, &fc_host->vports);
++ get_device(&shost->shost_gendev); /* for fc_host->vport list */
++
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ dev = &vport->dev;
++ device_initialize(dev); /* takes self reference */
++ dev->parent = get_device(pdev); /* takes parent reference */
++ dev->release = fc_vport_dev_release;
++ sprintf(dev->bus_id, "vport-%d:%d-%d",
++ shost->host_no, channel, vport->number);
++ transport_setup_device(dev);
++
++ error = device_add(dev);
++ if (error) {
++ printk(KERN_ERR "FC Virtual Port device_add failed\n");
++ goto delete_vport;
++ }
++ transport_add_device(dev);
++ transport_configure_device(dev);
++
++ error = fci->f->vport_create(vport, ids->disable);
++ if (error) {
++ printk(KERN_ERR "FC Virtual Port LLDD Create failed\n");
++ goto delete_vport_all;
++ }
++
++ /*
++ * if the parent isn't the physical adapter's Scsi_Host, ensure
++ * the Scsi_Host at least contains ia symlink to the vport.
++ */
++ if (pdev != &shost->shost_gendev) {
++ error = sysfs_create_link(&shost->shost_gendev.kobj,
++ &dev->kobj, dev->bus_id);
++ if (error)
++ printk(KERN_ERR
++ "%s: Cannot create vport symlinks for "
++ "%s, err=%d\n",
++ __FUNCTION__, dev->bus_id, error);
++ }
++ spin_lock_irqsave(shost->host_lock, flags);
++ vport->flags &= ~FC_VPORT_CREATING;
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ dev_printk(KERN_NOTICE, pdev,
++ "%s created via shost%d channel %d\n", dev->bus_id,
++ shost->host_no, channel);
++
++ *ret_vport = vport;
++
++ return 0;
++
++delete_vport_all:
++ transport_remove_device(dev);
++ device_del(dev);
++delete_vport:
++ transport_destroy_device(dev);
++ spin_lock_irqsave(shost->host_lock, flags);
++ list_del(&vport->peers);
++ put_device(&shost->shost_gendev); /* for fc_host->vport list */
++ fc_host->npiv_vports_inuse--;
++ spin_unlock_irqrestore(shost->host_lock, flags);
++ put_device(dev->parent);
++ kfree(vport);
++
++ return error;
++}
++
++
++/**
++ * fc_vport_terminate - Admin App or LLDD requests termination of a vport
++ * @vport: fc_vport to be terminated
++ *
++ * Calls the LLDD vport_delete() function, then deallocates and removes
++ * the vport from the shost and object tree.
++ *
++ * Notes:
++ * This routine assumes no locks are held on entry.
++ **/
++int
++fc_vport_terminate(struct fc_vport *vport)
++{
++ struct Scsi_Host *shost = vport_to_shost(vport);
++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++ struct fc_internal *i = to_fc_internal(shost->transportt);
++ struct device *dev = &vport->dev;
++ unsigned long flags;
++ int stat;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ if (vport->flags & FC_VPORT_CREATING) {
++ spin_unlock_irqrestore(shost->host_lock, flags);
++ return -EBUSY;
++ }
++ if (vport->flags & (FC_VPORT_DEL)) {
++ spin_unlock_irqrestore(shost->host_lock, flags);
++ return -EALREADY;
++ }
++ vport->flags |= FC_VPORT_DELETING;
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ if (i->f->vport_delete)
++ stat = i->f->vport_delete(vport);
++ else
++ stat = -ENOENT;
++
++ spin_lock_irqsave(shost->host_lock, flags);
++ vport->flags &= ~FC_VPORT_DELETING;
++ if (!stat) {
++ vport->flags |= FC_VPORT_DELETED;
++ list_del(&vport->peers);
++ fc_host->npiv_vports_inuse--;
++ put_device(&shost->shost_gendev); /* for fc_host->vport list */
++ }
++ spin_unlock_irqrestore(shost->host_lock, flags);
++
++ if (stat)
++ return stat;
++
++ if (dev->parent != &shost->shost_gendev)
++ sysfs_remove_link(&shost->shost_gendev.kobj, dev->bus_id);
++ transport_remove_device(dev);
++ device_del(dev);
++ transport_destroy_device(dev);
++
++ /*
++ * Removing our self-reference should mean our
++ * release function gets called, which will drop the remaining
++ * parent reference and free the data structure.
++ */
++ put_device(dev); /* for self-reference */
++
++ return 0; /* SUCCESS */
++}
++EXPORT_SYMBOL(fc_vport_terminate);
++
++/**
++ * fc_vport_sched_delete - workq-based delete request for a vport
++ *
++ * @work: vport to be deleted.
++ **/
++static void
++fc_vport_sched_delete(struct work_struct *work)
++{
++ struct fc_vport *vport =
++ container_of(work, struct fc_vport, vport_delete_work);
++ int stat;
++
++ stat = fc_vport_terminate(vport);
++ if (stat)
++ dev_printk(KERN_ERR, vport->dev.parent,
++ "%s: %s could not be deleted created via "
++ "shost%d channel %d - error %d\n", __FUNCTION__,
++ vport->dev.bus_id, vport->shost->host_no,
++ vport->channel, stat);
++}
++
++
++/* Original Author: Martin Hicks */
++MODULE_AUTHOR("James Smart");
+ MODULE_DESCRIPTION("FC Transport Attributes");
+ MODULE_LICENSE("GPL");
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c
+--- linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c 2007-12-21 15:36:14.000000000 -0500
+@@ -30,9 +30,9 @@
+ #include <scsi/scsi_transport_iscsi.h>
+ #include <scsi/iscsi_if.h>
+
+-#define ISCSI_SESSION_ATTRS 11
++#define ISCSI_SESSION_ATTRS 15
+ #define ISCSI_CONN_ATTRS 11
+-#define ISCSI_HOST_ATTRS 0
++#define ISCSI_HOST_ATTRS 4
+ #define ISCSI_TRANSPORT_VERSION "2.0-724"
+
+ struct iscsi_internal {
+@@ -609,12 +609,10 @@
+ int t = done ? NLMSG_DONE : type;
+
+ skb = alloc_skb(len, GFP_ATOMIC);
+- /*
+- * FIXME:
+- * user is supposed to react on iferror == -ENOMEM;
+- * see iscsi_if_rx().
+- */
+- BUG_ON(!skb);
++ if (!skb) {
++ printk(KERN_ERR "Could not allocate skb to send reply.\n");
++ return -ENOMEM;
++ }
+
+ nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+ nlh->nlmsg_flags = flags;
+@@ -816,6 +814,8 @@
+ uint32_t hostno;
+
+ session = transport->create_session(transport, &priv->t,
++ ev->u.c_session.cmds_max,
++ ev->u.c_session.queue_depth,
+ ev->u.c_session.initial_cmdsn,
+ &hostno);
+ if (!session)
+@@ -947,15 +947,50 @@
+ iscsi_tgt_dscvr(struct iscsi_transport *transport,
+ struct iscsi_uevent *ev)
+ {
++ struct Scsi_Host *shost;
+ struct sockaddr *dst_addr;
++ int err;
+
+ if (!transport->tgt_dscvr)
+ return -EINVAL;
+
++ shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no);
++ if (IS_ERR(shost)) {
++ printk(KERN_ERR "target discovery could not find host no %u\n",
++ ev->u.tgt_dscvr.host_no);
++ return -ENODEV;
++ }
++
++
+ dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+- return transport->tgt_dscvr(ev->u.tgt_dscvr.type,
+- ev->u.tgt_dscvr.host_no,
++ err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type,
+ ev->u.tgt_dscvr.enable, dst_addr);
++ scsi_host_put(shost);
++ return err;
++}
++
++static int
++iscsi_set_host_param(struct iscsi_transport *transport,
++ struct iscsi_uevent *ev)
++{
++ char *data = (char*)ev + sizeof(*ev);
++ struct Scsi_Host *shost;
++ int err;
++
++ if (!transport->set_host_param)
++ return -ENOSYS;
++
++ shost = scsi_host_lookup(ev->u.set_host_param.host_no);
++ if (IS_ERR(shost)) {
++ printk(KERN_ERR "set_host_param could not find host no %u\n",
++ ev->u.set_host_param.host_no);
++ return -ENODEV;
++ }
++
++ err = transport->set_host_param(shost, ev->u.set_host_param.param,
++ data, ev->u.set_host_param.len);
++ scsi_host_put(shost);
++ return err;
+ }
+
+ static int
+@@ -1049,8 +1084,11 @@
+ case ISCSI_UEVENT_TGT_DSCVR:
+ err = iscsi_tgt_dscvr(transport, ev);
+ break;
++ case ISCSI_UEVENT_SET_HOST_PARAM:
++ err = iscsi_set_host_param(transport, ev);
++ break;
+ default:
+- err = -EINVAL;
++ err = -ENOSYS;
+ break;
+ }
+
+@@ -1160,30 +1198,37 @@
+ /*
+ * iSCSI session attrs
+ */
+-#define iscsi_session_attr_show(param) \
++#define iscsi_session_attr_show(param, perm) \
+ static ssize_t \
+ show_session_param_##param(struct class_device *cdev, char *buf) \
+ { \
+ struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \
+ struct iscsi_transport *t = session->transport; \
++ \
++ if (perm && !capable(CAP_SYS_ADMIN)) \
++ return -EACCES; \
+ return t->get_session_param(session, param, buf); \
+ }
+
+-#define iscsi_session_attr(field, param) \
+- iscsi_session_attr_show(param) \
++#define iscsi_session_attr(field, param, perm) \
++ iscsi_session_attr_show(param, perm) \
+ static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \
+ NULL);
+
+-iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME);
+-iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN);
+-iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T);
+-iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN);
+-iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST);
+-iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST);
+-iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN);
+-iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN);
+-iscsi_session_attr(erl, ISCSI_PARAM_ERL);
+-iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT);
++iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0);
++iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0);
++iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0);
++iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0);
++iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0);
++iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0);
++iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0);
++iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0);
++iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0);
++iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0);
++iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1);
++iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1);
++iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1);
++iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1);
+
+ #define iscsi_priv_session_attr_show(field, format) \
+ static ssize_t \
+@@ -1199,6 +1244,28 @@
+ NULL)
+ iscsi_priv_session_attr(recovery_tmo, "%d");
+
++/*
++ * iSCSI host attrs
++ */
++#define iscsi_host_attr_show(param) \
++static ssize_t \
++show_host_param_##param(struct class_device *cdev, char *buf) \
++{ \
++ struct Scsi_Host *shost = transport_class_to_shost(cdev); \
++ struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \
++ return priv->iscsi_transport->get_host_param(shost, param, buf); \
++}
++
++#define iscsi_host_attr(field, param) \
++ iscsi_host_attr_show(param) \
++static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param, \
++ NULL);
++
++iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME);
++iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS);
++iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS);
++iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME);
++
+ #define SETUP_PRIV_SESSION_RD_ATTR(field) \
+ do { \
+ priv->session_attrs[count] = &class_device_attr_priv_sess_##field; \
+@@ -1222,6 +1289,14 @@
+ } \
+ } while (0)
+
++#define SETUP_HOST_RD_ATTR(field, param_flag) \
++do { \
++ if (tt->host_param_mask & param_flag) { \
++ priv->host_attrs[count] = &class_device_attr_host_##field; \
++ count++; \
++ } \
++} while (0)
++
+ static int iscsi_session_match(struct attribute_container *cont,
+ struct device *dev)
+ {
+@@ -1323,9 +1398,16 @@
+ priv->t.host_attrs.ac.class = &iscsi_host_class.class;
+ priv->t.host_attrs.ac.match = iscsi_host_match;
+ priv->t.host_size = sizeof(struct iscsi_host);
+- priv->host_attrs[0] = NULL;
+ transport_container_register(&priv->t.host_attrs);
+
++ SETUP_HOST_RD_ATTR(netdev, ISCSI_HOST_NETDEV_NAME);
++ SETUP_HOST_RD_ATTR(ipaddress, ISCSI_HOST_IPADDRESS);
++ SETUP_HOST_RD_ATTR(hwaddress, ISCSI_HOST_HWADDRESS);
++ SETUP_HOST_RD_ATTR(initiatorname, ISCSI_HOST_INITIATOR_NAME);
++ BUG_ON(count > ISCSI_HOST_ATTRS);
++ priv->host_attrs[count] = NULL;
++ count = 0;
++
+ /* connection parameters */
+ priv->conn_cont.ac.attrs = &priv->conn_attrs[0];
+ priv->conn_cont.ac.class = &iscsi_connection_class.class;
+@@ -1364,6 +1446,10 @@
+ SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL);
+ SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME);
+ SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT);
++ SETUP_SESSION_RD_ATTR(password, ISCSI_USERNAME);
++ SETUP_SESSION_RD_ATTR(password_in, ISCSI_USERNAME_IN);
++ SETUP_SESSION_RD_ATTR(username, ISCSI_PASSWORD);
++ SETUP_SESSION_RD_ATTR(username_in, ISCSI_PASSWORD_IN);
+ SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo);
+
+ BUG_ON(count > ISCSI_SESSION_ATTRS);
+@@ -1437,7 +1523,7 @@
+ if (err)
+ goto unregister_conn_class;
+
+- nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
++ nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
+ THIS_MODULE);
+ if (!nls) {
+ err = -ENOBUFS;
+diff -Nurb linux-2.6.22-570/drivers/scsi/sd.c linux-2.6.22-591/drivers/scsi/sd.c
+--- linux-2.6.22-570/drivers/scsi/sd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/sd.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1515,7 +1515,7 @@
+ if (!scsi_device_online(sdp))
+ goto out;
+
+- buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA);
++ buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL);
+ if (!buffer) {
+ sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory "
+ "allocation failure.\n");
+diff -Nurb linux-2.6.22-570/drivers/scsi/sg.c linux-2.6.22-591/drivers/scsi/sg.c
+--- linux-2.6.22-570/drivers/scsi/sg.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/sg.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1842,7 +1842,7 @@
+ int blk_size = buff_size;
+ struct page *p = NULL;
+
+- if ((blk_size < 0) || (!sfp))
++ if (blk_size < 0)
+ return -EFAULT;
+ if (0 == blk_size)
+ ++blk_size; /* don't know why */
+diff -Nurb linux-2.6.22-570/drivers/scsi/stex.c linux-2.6.22-591/drivers/scsi/stex.c
+--- linux-2.6.22-570/drivers/scsi/stex.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/stex.c 2007-12-21 15:36:12.000000000 -0500
+@@ -395,53 +395,34 @@
+ static int stex_map_sg(struct st_hba *hba,
+ struct req_msg *req, struct st_ccb *ccb)
+ {
+- struct pci_dev *pdev = hba->pdev;
+ struct scsi_cmnd *cmd;
+- dma_addr_t dma_handle;
+- struct scatterlist *src;
++ struct scatterlist *sg;
+ struct st_sgtable *dst;
+- int i;
++ int i, nseg;
+
+ cmd = ccb->cmd;
+ dst = (struct st_sgtable *)req->variable;
+ dst->max_sg_count = cpu_to_le16(ST_MAX_SG);
+- dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen);
+-
+- if (cmd->use_sg) {
+- int n_elem;
++ dst->sz_in_byte = cpu_to_le32(scsi_bufflen(cmd));
+
+- src = (struct scatterlist *) cmd->request_buffer;
+- n_elem = pci_map_sg(pdev, src,
+- cmd->use_sg, cmd->sc_data_direction);
+- if (n_elem <= 0)
++ nseg = scsi_dma_map(cmd);
++ if (nseg < 0)
+ return -EIO;
++ if (nseg) {
++ ccb->sg_count = nseg;
++ dst->sg_count = cpu_to_le16((u16)nseg);
+
+- ccb->sg_count = n_elem;
+- dst->sg_count = cpu_to_le16((u16)n_elem);
+-
+- for (i = 0; i < n_elem; i++, src++) {
+- dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src));
++ scsi_for_each_sg(cmd, sg, nseg, i) {
++ dst->table[i].count = cpu_to_le32((u32)sg_dma_len(sg));
+ dst->table[i].addr =
+- cpu_to_le32(sg_dma_address(src) & 0xffffffff);
++ cpu_to_le32(sg_dma_address(sg) & 0xffffffff);
+ dst->table[i].addr_hi =
+- cpu_to_le32((sg_dma_address(src) >> 16) >> 16);
++ cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
+ dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST;
+ }
+ dst->table[--i].ctrl |= SG_CF_EOT;
+- return 0;
+ }
+
+- dma_handle = pci_map_single(pdev, cmd->request_buffer,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- cmd->SCp.dma_handle = dma_handle;
+-
+- ccb->sg_count = 1;
+- dst->sg_count = cpu_to_le16(1);
+- dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff);
+- dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16);
+- dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen);
+- dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST;
+-
+ return 0;
+ }
+
+@@ -451,24 +432,24 @@
+ size_t lcount;
+ size_t len;
+ void *s, *d, *base = NULL;
+- if (*count > cmd->request_bufflen)
+- *count = cmd->request_bufflen;
++ size_t offset;
++
++ if (*count > scsi_bufflen(cmd))
++ *count = scsi_bufflen(cmd);
+ lcount = *count;
+ while (lcount) {
+ len = lcount;
+ s = (void *)src;
+- if (cmd->use_sg) {
+- size_t offset = *count - lcount;
++
++ offset = *count - lcount;
+ s += offset;
+- base = scsi_kmap_atomic_sg(cmd->request_buffer,
++ base = scsi_kmap_atomic_sg(scsi_sglist(cmd),
+ sg_count, &offset, &len);
+- if (base == NULL) {
++ if (!base) {
+ *count -= lcount;
+ return;
+ }
+ d = base + offset;
+- } else
+- d = cmd->request_buffer;
+
+ if (direction == ST_TO_CMD)
+ memcpy(d, s, len);
+@@ -476,7 +457,6 @@
+ memcpy(s, d, len);
+
+ lcount -= len;
+- if (cmd->use_sg)
+ scsi_kunmap_atomic_sg(base);
+ }
+ }
+@@ -484,22 +464,17 @@
+ static int stex_direct_copy(struct scsi_cmnd *cmd,
+ const void *src, size_t count)
+ {
+- struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+ size_t cp_len = count;
+ int n_elem = 0;
+
+- if (cmd->use_sg) {
+- n_elem = pci_map_sg(hba->pdev, cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- if (n_elem <= 0)
++ n_elem = scsi_dma_map(cmd);
++ if (n_elem < 0)
+ return 0;
+- }
+
+ stex_internal_copy(cmd, src, &cp_len, n_elem, ST_TO_CMD);
+
+- if (cmd->use_sg)
+- pci_unmap_sg(hba->pdev, cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
++ scsi_dma_unmap(cmd);
++
+ return cp_len == count;
+ }
+
+@@ -678,18 +653,6 @@
+ return 0;
+ }
+
+-static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd)
+-{
+- if (cmd->sc_data_direction != DMA_NONE) {
+- if (cmd->use_sg)
+- pci_unmap_sg(hba->pdev, cmd->request_buffer,
+- cmd->use_sg, cmd->sc_data_direction);
+- else
+- pci_unmap_single(hba->pdev, cmd->SCp.dma_handle,
+- cmd->request_bufflen, cmd->sc_data_direction);
+- }
+-}
+-
+ static void stex_scsi_done(struct st_ccb *ccb)
+ {
+ struct scsi_cmnd *cmd = ccb->cmd;
+@@ -756,7 +719,7 @@
+
+ if (ccb->cmd->cmnd[0] == MGT_CMD &&
+ resp->scsi_status != SAM_STAT_CHECK_CONDITION) {
+- ccb->cmd->request_bufflen =
++ scsi_bufflen(ccb->cmd) =
+ le32_to_cpu(*(__le32 *)&resp->variable[0]);
+ return;
+ }
+@@ -855,7 +818,7 @@
+ ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER))
+ stex_controller_info(hba, ccb);
+
+- stex_unmap_sg(hba, ccb->cmd);
++ scsi_dma_unmap(ccb->cmd);
+ stex_scsi_done(ccb);
+ hba->out_req_cnt--;
+ } else if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+@@ -1028,7 +991,7 @@
+ }
+
+ fail_out:
+- stex_unmap_sg(hba, cmd);
++ scsi_dma_unmap(cmd);
+ hba->wait_ccb->req = NULL; /* nullify the req's future return */
+ hba->wait_ccb = NULL;
+ result = FAILED;
+diff -Nurb linux-2.6.22-570/drivers/scsi/sun_esp.c linux-2.6.22-591/drivers/scsi/sun_esp.c
+--- linux-2.6.22-570/drivers/scsi/sun_esp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/sun_esp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -493,7 +493,7 @@
+ goto fail;
+
+ host->max_id = (hme ? 16 : 8);
+- esp = host_to_esp(host);
++ esp = shost_priv(host);
+
+ esp->host = host;
+ esp->dev = esp_dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/sym53c416.c linux-2.6.22-591/drivers/scsi/sym53c416.c
+--- linux-2.6.22-570/drivers/scsi/sym53c416.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/sym53c416.c 2007-12-21 15:36:12.000000000 -0500
+@@ -332,8 +332,7 @@
+ int i;
+ unsigned long flags = 0;
+ unsigned char status_reg, pio_int_reg, int_reg;
+- struct scatterlist *sglist;
+- unsigned int sgcount;
++ struct scatterlist *sg;
+ unsigned int tot_trans = 0;
+
+ /* We search the base address of the host adapter which caused the interrupt */
+@@ -429,19 +428,15 @@
+ {
+ current_command->SCp.phase = data_out;
+ outb(FLUSH_FIFO, base + COMMAND_REG);
+- sym53c416_set_transfer_counter(base, current_command->request_bufflen);
++ sym53c416_set_transfer_counter(base,
++ scsi_bufflen(current_command));
+ outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG);
+- if(!current_command->use_sg)
+- tot_trans = sym53c416_write(base, current_command->request_buffer, current_command->request_bufflen);
+- else
+- {
+- sgcount = current_command->use_sg;
+- sglist = current_command->request_buffer;
+- while(sgcount--)
+- {
+- tot_trans += sym53c416_write(base, SG_ADDRESS(sglist), sglist->length);
+- sglist++;
+- }
++
++ scsi_for_each_sg(current_command,
++ sg, scsi_sg_count(current_command), i) {
++ tot_trans += sym53c416_write(base,
++ SG_ADDRESS(sg),
++ sg->length);
+ }
+ if(tot_trans < current_command->underflow)
+ printk(KERN_WARNING "sym53c416: Underflow, wrote %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow);
+@@ -455,19 +450,16 @@
+ {
+ current_command->SCp.phase = data_in;
+ outb(FLUSH_FIFO, base + COMMAND_REG);
+- sym53c416_set_transfer_counter(base, current_command->request_bufflen);
++ sym53c416_set_transfer_counter(base,
++ scsi_bufflen(current_command));
++
+ outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG);
+- if(!current_command->use_sg)
+- tot_trans = sym53c416_read(base, current_command->request_buffer, current_command->request_bufflen);
+- else
+- {
+- sgcount = current_command->use_sg;
+- sglist = current_command->request_buffer;
+- while(sgcount--)
+- {
+- tot_trans += sym53c416_read(base, SG_ADDRESS(sglist), sglist->length);
+- sglist++;
+- }
++
++ scsi_for_each_sg(current_command,
++ sg, scsi_sg_count(current_command), i) {
++ tot_trans += sym53c416_read(base,
++ SG_ADDRESS(sg),
++ sg->length);
+ }
+ if(tot_trans < current_command->underflow)
+ printk(KERN_WARNING "sym53c416: Underflow, read %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow);
+diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.c linux-2.6.22-591/drivers/scsi/tmscsim.c
+--- linux-2.6.22-570/drivers/scsi/tmscsim.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/tmscsim.c 2007-12-21 15:36:12.000000000 -0500
+@@ -457,27 +457,20 @@
+ error = 1;
+ DEBUG1(printk("%s(): Mapped sense buffer %p at %x\n", __FUNCTION__, pcmd->sense_buffer, cmdp->saved_dma_handle));
+ /* Map SG list */
+- } else if (pcmd->use_sg) {
+- pSRB->pSegmentList = (struct scatterlist *) pcmd->request_buffer;
+- pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, pcmd->use_sg,
+- pcmd->sc_data_direction);
++ } else if (scsi_sg_count(pcmd)) {
++ int nseg;
++
++ nseg = scsi_dma_map(pcmd);
++
++ pSRB->pSegmentList = scsi_sglist(pcmd);
++ pSRB->SGcount = nseg;
++
+ /* TODO: error handling */
+- if (!pSRB->SGcount)
++ if (nseg < 0)
+ error = 1;
+ DEBUG1(printk("%s(): Mapped SG %p with %d (%d) elements\n",\
+- __FUNCTION__, pcmd->request_buffer, pSRB->SGcount, pcmd->use_sg));
++ __FUNCTION__, scsi_sglist(pcmd), nseg, scsi_sg_count(pcmd)));
+ /* Map single segment */
+- } else if (pcmd->request_buffer && pcmd->request_bufflen) {
+- pSRB->pSegmentList = dc390_sg_build_single(&pSRB->Segmentx, pcmd->request_buffer, pcmd->request_bufflen);
+- pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, 1,
+- pcmd->sc_data_direction);
+- cmdp->saved_dma_handle = sg_dma_address(pSRB->pSegmentList);
+-
+- /* TODO: error handling */
+- if (pSRB->SGcount != 1)
+- error = 1;
+- DEBUG1(printk("%s(): Mapped request buffer %p at %x\n", __FUNCTION__, pcmd->request_buffer, cmdp->saved_dma_handle));
+- /* No mapping !? */
+ } else
+ pSRB->SGcount = 0;
+
+@@ -494,12 +487,10 @@
+ if (pSRB->SRBFlag) {
+ pci_unmap_sg(pdev, &pSRB->Segmentx, 1, DMA_FROM_DEVICE);
+ DEBUG1(printk("%s(): Unmapped sense buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle));
+- } else if (pcmd->use_sg) {
+- pci_unmap_sg(pdev, pcmd->request_buffer, pcmd->use_sg, pcmd->sc_data_direction);
+- DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", __FUNCTION__, pcmd->request_buffer, pcmd->use_sg));
+- } else if (pcmd->request_buffer && pcmd->request_bufflen) {
+- pci_unmap_sg(pdev, &pSRB->Segmentx, 1, pcmd->sc_data_direction);
+- DEBUG1(printk("%s(): Unmapped request buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle));
++ } else {
++ scsi_dma_unmap(pcmd);
++ DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n",
++ __FUNCTION__, scsi_sglist(pcmd), scsi_sg_count(pcmd)));
+ }
+ }
+
+@@ -1153,9 +1144,9 @@
+ struct scatterlist *psgl;
+ pSRB->TotalXferredLen = 0;
+ pSRB->SGIndex = 0;
+- if (pcmd->use_sg) {
++ if (scsi_sg_count(pcmd)) {
+ size_t saved;
+- pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
++ pSRB->pSegmentList = scsi_sglist(pcmd);
+ psgl = pSRB->pSegmentList;
+ //dc390_pci_sync(pSRB);
+
+@@ -1179,12 +1170,6 @@
+ printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+ pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
+
+- } else if(pcmd->request_buffer) {
+- //dc390_pci_sync(pSRB);
+-
+- sg_dma_len(&pSRB->Segmentx) = pcmd->request_bufflen - pSRB->Saved_Ptr;
+- pSRB->SGcount = 1;
+- pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
+ } else {
+ pSRB->SGcount = 0;
+ printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+@@ -1579,7 +1564,8 @@
+ if( (pSRB->SRBState & (SRB_START_+SRB_MSGOUT)) ||
+ !(pSRB->SRBState & (SRB_DISCONNECT+SRB_COMPLETED)) )
+ { /* Selection time out */
+- pSRB->TargetStatus = SCSI_STAT_SEL_TIMEOUT;
++ pSRB->AdaptStatus = H_SEL_TIMEOUT;
++ pSRB->TargetStatus = 0;
+ goto disc1;
+ }
+ else if (!(pSRB->SRBState & SRB_DISCONNECT) && (pSRB->SRBState & SRB_COMPLETED))
+@@ -1612,7 +1598,7 @@
+ if( !( pACB->scan_devices ) )
+ {
+ struct scsi_cmnd *pcmd = pSRB->pcmd;
+- pcmd->resid = pcmd->request_bufflen;
++ scsi_set_resid(pcmd, scsi_bufflen(pcmd));
+ SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
+ dc390_Going_remove(pDCB, pSRB);
+ dc390_Free_insert(pACB, pSRB);
+@@ -1695,7 +1681,7 @@
+ pcmd->cmnd[0], pDCB->TargetID, pDCB->TargetLUN));
+
+ pSRB->SRBFlag |= AUTO_REQSENSE;
+- pSRB->SavedSGCount = pcmd->use_sg;
++ pSRB->SavedSGCount = scsi_sg_count(pcmd);
+ pSRB->SavedTotXLen = pSRB->TotalXferredLen;
+ pSRB->AdaptStatus = 0;
+ pSRB->TargetStatus = 0; /* CHECK_CONDITION<<1; */
+@@ -1728,22 +1714,22 @@
+ { /* Last command was a Request Sense */
+ pSRB->SRBFlag &= ~AUTO_REQSENSE;
+ pSRB->AdaptStatus = 0;
+- pSRB->TargetStatus = CHECK_CONDITION << 1;
++ pSRB->TargetStatus = SAM_STAT_CHECK_CONDITION;
+
+ //pcmd->result = MK_RES(DRIVER_SENSE,DID_OK,0,status);
+- if (status == (CHECK_CONDITION << 1))
++ if (status == SAM_STAT_CHECK_CONDITION)
+ pcmd->result = MK_RES_LNX(0, DID_BAD_TARGET, 0, /*CHECK_CONDITION*/0);
+ else /* Retry */
+ {
+ if( pSRB->pcmd->cmnd[0] == TEST_UNIT_READY /* || pSRB->pcmd->cmnd[0] == START_STOP */)
+ {
+ /* Don't retry on TEST_UNIT_READY */
+- pcmd->result = MK_RES_LNX(DRIVER_SENSE,DID_OK,0,CHECK_CONDITION);
++ pcmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, 0, SAM_STAT_CHECK_CONDITION);
+ REMOVABLEDEBUG(printk(KERN_INFO "Cmd=%02x, Result=%08x, XferL=%08x\n",pSRB->pcmd->cmnd[0],\
+ (u32) pcmd->result, (u32) pSRB->TotalXferredLen));
+ } else {
+ SET_RES_DRV(pcmd->result, DRIVER_SENSE);
+- pcmd->use_sg = pSRB->SavedSGCount;
++ scsi_sg_count(pcmd) = pSRB->SavedSGCount;
+ //pSRB->ScsiCmdLen = (u8) (pSRB->Segment1[0] >> 8);
+ DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+ pSRB->TotalXferredLen = 0;
+@@ -1754,7 +1740,7 @@
+ }
+ if( status )
+ {
+- if( status_byte(status) == CHECK_CONDITION )
++ if (status == SAM_STAT_CHECK_CONDITION)
+ {
+ if (dc390_RequestSense(pACB, pDCB, pSRB)) {
+ SET_RES_DID(pcmd->result, DID_ERROR);
+@@ -1762,22 +1748,15 @@
+ }
+ return;
+ }
+- else if( status_byte(status) == QUEUE_FULL )
++ else if (status == SAM_STAT_TASK_SET_FULL)
+ {
+ scsi_track_queue_full(pcmd->device, pDCB->GoingSRBCnt - 1);
+- pcmd->use_sg = pSRB->SavedSGCount;
++ scsi_sg_count(pcmd) = pSRB->SavedSGCount;
+ DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+ pSRB->TotalXferredLen = 0;
+ SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
+ }
+- else if(status == SCSI_STAT_SEL_TIMEOUT)
+- {
+- pSRB->AdaptStatus = H_SEL_TIMEOUT;
+- pSRB->TargetStatus = 0;
+- pcmd->result = MK_RES(0,DID_NO_CONNECT,0,0);
+- /* Devices are removed below ... */
+- }
+- else if (status_byte(status) == BUSY &&
++ else if (status == SAM_STAT_BUSY &&
+ (pcmd->cmnd[0] == TEST_UNIT_READY || pcmd->cmnd[0] == INQUIRY) &&
+ pACB->scan_devices)
+ {
+@@ -1795,12 +1774,17 @@
+ else
+ { /* Target status == 0 */
+ status = pSRB->AdaptStatus;
+- if(status & H_OVER_UNDER_RUN)
++ if (status == H_OVER_UNDER_RUN)
+ {
+ pSRB->TargetStatus = 0;
+ SET_RES_DID(pcmd->result,DID_OK);
+ SET_RES_MSG(pcmd->result,pSRB->EndMessage);
+ }
++ else if (status == H_SEL_TIMEOUT)
++ {
++ pcmd->result = MK_RES(0, DID_NO_CONNECT, 0, 0);
++ /* Devices are removed below ... */
++ }
+ else if( pSRB->SRBStatus & PARITY_ERROR)
+ {
+ //pcmd->result = MK_RES(0,DID_PARITY,pSRB->EndMessage,0);
+@@ -1816,7 +1800,7 @@
+ }
+
+ cmd_done:
+- pcmd->resid = pcmd->request_bufflen - pSRB->TotalXferredLen;
++ scsi_set_resid(pcmd, scsi_bufflen(pcmd) - pSRB->TotalXferredLen);
+
+ dc390_Going_remove (pDCB, pSRB);
+ /* Add to free list */
+diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.h linux-2.6.22-591/drivers/scsi/tmscsim.h
+--- linux-2.6.22-570/drivers/scsi/tmscsim.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/tmscsim.h 2007-12-21 15:36:12.000000000 -0500
+@@ -258,13 +258,6 @@
+ #define H_BAD_CCB_OR_SG 0x1A
+ #define H_ABORT 0x0FF
+
+-/*; SCSI Status byte codes*/
+-/* The values defined in include/scsi/scsi.h, to be shifted << 1 */
+-
+-#define SCSI_STAT_UNEXP_BUS_F 0xFD /*; Unexpect Bus Free */
+-#define SCSI_STAT_BUS_RST_DETECT 0xFE /*; Scsi Bus Reset detected */
+-#define SCSI_STAT_SEL_TIMEOUT 0xFF /*; Selection Time out */
+-
+ /* cmd->result */
+ #define RES_TARGET 0x000000FF /* Target State */
+ #define RES_TARGET_LNX STATUS_MASK /* Only official ... */
+@@ -273,7 +266,7 @@
+ #define RES_DRV 0xFF000000 /* DRIVER_ codes */
+
+ #define MK_RES(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt))
+-#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)<<1)
++#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt))
+
+ #define SET_RES_TARGET(who, tgt) do { who &= ~RES_TARGET; who |= (int)(tgt); } while (0)
+ #define SET_RES_TARGET_LNX(who, tgt) do { who &= ~RES_TARGET_LNX; who |= (int)(tgt) << 1; } while (0)
+diff -Nurb linux-2.6.22-570/drivers/scsi/u14-34f.c linux-2.6.22-591/drivers/scsi/u14-34f.c
+--- linux-2.6.22-570/drivers/scsi/u14-34f.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/u14-34f.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1111,7 +1111,7 @@
+ static void map_dma(unsigned int i, unsigned int j) {
+ unsigned int data_len = 0;
+ unsigned int k, count, pci_dir;
+- struct scatterlist *sgpnt;
++ struct scatterlist *sg;
+ struct mscp *cpp;
+ struct scsi_cmnd *SCpnt;
+
+@@ -1124,33 +1124,28 @@
+
+ cpp->sense_len = sizeof SCpnt->sense_buffer;
+
+- if (!SCpnt->use_sg) {
+-
+- /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */
+- if (!SCpnt->request_bufflen) pci_dir = PCI_DMA_BIDIRECTIONAL;
+-
+- if (SCpnt->request_buffer)
+- cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev,
+- SCpnt->request_buffer, SCpnt->request_bufflen, pci_dir));
+-
+- cpp->data_len = H2DEV(SCpnt->request_bufflen);
+- return;
+- }
+-
+- sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+- count = pci_map_sg(HD(j)->pdev, sgpnt, SCpnt->use_sg, pci_dir);
+-
+- for (k = 0; k < count; k++) {
+- cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k]));
+- cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k]));
+- data_len += sgpnt[k].length;
++ if (scsi_bufflen(SCpnt)) {
++ count = scsi_dma_map(SCpnt);
++ BUG_ON(count < 0);
++
++ scsi_for_each_sg(SCpnt, sg, count, k) {
++ cpp->sglist[k].address = H2DEV(sg_dma_address(sg));
++ cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg));
++ data_len += sg->length;
+ }
+
+ cpp->sg = TRUE;
+- cpp->use_sg = SCpnt->use_sg;
+- cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist,
+- SCpnt->use_sg * sizeof(struct sg_list), pci_dir));
++ cpp->use_sg = scsi_sg_count(SCpnt);
++ cpp->data_address =
++ H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist,
++ cpp->use_sg * sizeof(struct sg_list),
++ pci_dir));
+ cpp->data_len = H2DEV(data_len);
++
++ } else {
++ pci_dir = PCI_DMA_BIDIRECTIONAL;
++ cpp->data_len = H2DEV(scsi_bufflen(SCpnt));
++ }
+ }
+
+ static void unmap_dma(unsigned int i, unsigned int j) {
+@@ -1165,8 +1160,7 @@
+ pci_unmap_single(HD(j)->pdev, DEV2H(cpp->sense_addr),
+ DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+
+- if (SCpnt->use_sg)
+- pci_unmap_sg(HD(j)->pdev, SCpnt->request_buffer, SCpnt->use_sg, pci_dir);
++ scsi_dma_unmap(SCpnt);
+
+ if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
+
+@@ -1187,9 +1181,9 @@
+ pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr),
+ DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+
+- if (SCpnt->use_sg)
+- pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer,
+- SCpnt->use_sg, pci_dir);
++ if (scsi_sg_count(SCpnt))
++ pci_dma_sync_sg_for_cpu(HD(j)->pdev, scsi_sglist(SCpnt),
++ scsi_sg_count(SCpnt), pci_dir);
+
+ if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
+
+diff -Nurb linux-2.6.22-570/drivers/scsi/ultrastor.c linux-2.6.22-591/drivers/scsi/ultrastor.c
+--- linux-2.6.22-570/drivers/scsi/ultrastor.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/ultrastor.c 2007-12-21 15:36:12.000000000 -0500
+@@ -675,16 +675,15 @@
+
+ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
+ {
+- struct scatterlist *sl;
++ struct scatterlist *sg;
+ long transfer_length = 0;
+ int i, max;
+
+- sl = (struct scatterlist *) SCpnt->request_buffer;
+- max = SCpnt->use_sg;
+- for (i = 0; i < max; i++) {
+- mscp->sglist[i].address = isa_page_to_bus(sl[i].page) + sl[i].offset;
+- mscp->sglist[i].num_bytes = sl[i].length;
+- transfer_length += sl[i].length;
++ max = scsi_sg_count(SCpnt);
++ scsi_for_each_sg(SCpnt, sg, max, i) {
++ mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset;
++ mscp->sglist[i].num_bytes = sg->length;
++ transfer_length += sg->length;
+ }
+ mscp->number_of_sg_list = max;
+ mscp->transfer_data = isa_virt_to_bus(mscp->sglist);
+@@ -730,15 +729,15 @@
+ my_mscp->target_id = SCpnt->device->id;
+ my_mscp->ch_no = 0;
+ my_mscp->lun = SCpnt->device->lun;
+- if (SCpnt->use_sg) {
++ if (scsi_sg_count(SCpnt)) {
+ /* Set scatter/gather flag in SCSI command packet */
+ my_mscp->sg = TRUE;
+ build_sg_list(my_mscp, SCpnt);
+ } else {
+ /* Unset scatter/gather flag in SCSI command packet */
+ my_mscp->sg = FALSE;
+- my_mscp->transfer_data = isa_virt_to_bus(SCpnt->request_buffer);
+- my_mscp->transfer_data_length = SCpnt->request_bufflen;
++ my_mscp->transfer_data = isa_virt_to_bus(scsi_sglist(SCpnt));
++ my_mscp->transfer_data_length = scsi_bufflen(SCpnt);
+ }
+ my_mscp->command_link = 0; /*???*/
+ my_mscp->scsi_command_link_id = 0; /*???*/
+diff -Nurb linux-2.6.22-570/drivers/scsi/wd7000.c linux-2.6.22-591/drivers/scsi/wd7000.c
+--- linux-2.6.22-570/drivers/scsi/wd7000.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/scsi/wd7000.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1091,6 +1091,7 @@
+ unchar *cdb = (unchar *) SCpnt->cmnd;
+ unchar idlun;
+ short cdblen;
++ int nseg;
+ Adapter *host = (Adapter *) SCpnt->device->host->hostdata;
+
+ cdblen = SCpnt->cmd_len;
+@@ -1106,28 +1107,29 @@
+ SCpnt->host_scribble = (unchar *) scb;
+ scb->host = host;
+
+- if (SCpnt->use_sg) {
+- struct scatterlist *sg = (struct scatterlist *) SCpnt->request_buffer;
++ nseg = scsi_sg_count(SCpnt);
++ if (nseg) {
++ struct scatterlist *sg;
+ unsigned i;
+
+ if (SCpnt->device->host->sg_tablesize == SG_NONE) {
+ panic("wd7000_queuecommand: scatter/gather not supported.\n");
+ }
+- dprintk("Using scatter/gather with %d elements.\n", SCpnt->use_sg);
++ dprintk("Using scatter/gather with %d elements.\n", nseg);
+
+ sgb = scb->sgb;
+ scb->op = 1;
+ any2scsi(scb->dataptr, (int) sgb);
+- any2scsi(scb->maxlen, SCpnt->use_sg * sizeof(Sgb));
++ any2scsi(scb->maxlen, nseg * sizeof(Sgb));
+
+- for (i = 0; i < SCpnt->use_sg; i++) {
+- any2scsi(sgb[i].ptr, isa_page_to_bus(sg[i].page) + sg[i].offset);
+- any2scsi(sgb[i].len, sg[i].length);
++ scsi_for_each_sg(SCpnt, sg, nseg, i) {
++ any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset);
++ any2scsi(sgb[i].len, sg->length);
+ }
+ } else {
+ scb->op = 0;
+- any2scsi(scb->dataptr, isa_virt_to_bus(SCpnt->request_buffer));
+- any2scsi(scb->maxlen, SCpnt->request_bufflen);
++ any2scsi(scb->dataptr, isa_virt_to_bus(scsi_sglist(SCpnt)));
++ any2scsi(scb->maxlen, scsi_bufflen(SCpnt));
+ }
+
+ /* FIXME: drop lock and yield here ? */
+diff -Nurb linux-2.6.22-570/drivers/scsi/zorro7xx.c linux-2.6.22-591/drivers/scsi/zorro7xx.c
+--- linux-2.6.22-570/drivers/scsi/zorro7xx.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/scsi/zorro7xx.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,180 @@
++/*
++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
++ * Amiga MacroSystemUS WarpEngine SCSI controller.
++ * Amiga Technologies/DKB A4091 SCSI controller.
++ *
++ * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
++ * plus modifications of the 53c7xx.c driver to support the Amiga.
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/zorro.h>
++#include <asm/amigaints.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("Amiga Zorro NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++
++static struct scsi_host_template zorro7xx_scsi_driver_template = {
++ .proc_name = "zorro7xx",
++ .this_id = 7,
++ .module = THIS_MODULE,
++};
++
++static struct zorro_driver_data {
++ const char *name;
++ unsigned long offset;
++ int absolute; /* offset is absolute address */
++} zorro7xx_driver_data[] __devinitdata = {
++ { .name = "PowerUP 603e+", .offset = 0xf40000, .absolute = 1 },
++ { .name = "WarpEngine 40xx", .offset = 0x40000 },
++ { .name = "A4091", .offset = 0x800000 },
++ { .name = "GForce 040/060", .offset = 0x40000 },
++ { 0 }
++};
++
++static struct zorro_device_id zorro7xx_zorro_tbl[] __devinitdata = {
++ {
++ .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS,
++ .driver_data = (unsigned long)&zorro7xx_driver_data[0],
++ },
++ {
++ .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx,
++ .driver_data = (unsigned long)&zorro7xx_driver_data[1],
++ },
++ {
++ .id = ZORRO_PROD_CBM_A4091_1,
++ .driver_data = (unsigned long)&zorro7xx_driver_data[2],
++ },
++ {
++ .id = ZORRO_PROD_CBM_A4091_2,
++ .driver_data = (unsigned long)&zorro7xx_driver_data[2],
++ },
++ {
++ .id = ZORRO_PROD_GVP_GFORCE_040_060,
++ .driver_data = (unsigned long)&zorro7xx_driver_data[3],
++ },
++ { 0 }
++};
++
++static int __devinit zorro7xx_init_one(struct zorro_dev *z,
++ const struct zorro_device_id *ent)
++{
++ struct Scsi_Host * host = NULL;
++ struct NCR_700_Host_Parameters *hostdata;
++ struct zorro_driver_data *zdd;
++ unsigned long board, ioaddr;
++
++ board = zorro_resource_start(z);
++ zdd = (struct zorro_driver_data *)ent->driver_data;
++
++ if (zdd->absolute) {
++ ioaddr = zdd->offset;
++ } else {
++ ioaddr = board + zdd->offset;
++ }
++
++ if (!zorro_request_device(z, zdd->name)) {
++ printk(KERN_ERR "zorro7xx: cannot reserve region 0x%lx, abort\n",
++ board);
++ return -EBUSY;
++ }
++
++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++ if (hostdata == NULL) {
++ printk(KERN_ERR "zorro7xx: Failed to allocate host data\n");
++ goto out_release;
++ }
++
++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++ /* Fill in the required pieces of hostdata */
++ if (ioaddr > 0x01000000)
++ hostdata->base = ioremap(ioaddr, zorro_resource_len(z));
++ else
++ hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr);
++
++ hostdata->clock = 50;
++ hostdata->chip710 = 1;
++
++ /* Settings for at least WarpEngine 40xx */
++ hostdata->ctest7_extra = CTEST7_TT1;
++
++ zorro7xx_scsi_driver_template.name = zdd->name;
++
++ /* and register the chip */
++ host = NCR_700_detect(&zorro7xx_scsi_driver_template, hostdata,
++ &z->dev);
++ if (!host) {
++ printk(KERN_ERR "zorro7xx: No host detected; "
++ "board configuration problem?\n");
++ goto out_free;
++ }
++
++ host->this_id = 7;
++ host->base = ioaddr;
++ host->irq = IRQ_AMIGA_PORTS;
++
++ if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "zorro7xx-scsi",
++ host)) {
++ printk(KERN_ERR "zorro7xx: request_irq failed\n");
++ goto out_put_host;
++ }
++
++ scsi_scan_host(host);
++
++ return 0;
++
++ out_put_host:
++ scsi_host_put(host);
++ out_free:
++ if (ioaddr > 0x01000000)
++ iounmap(hostdata->base);
++ kfree(hostdata);
++ out_release:
++ zorro_release_device(z);
++
++ return -ENODEV;
++}
++
++static __devexit void zorro7xx_remove_one(struct zorro_dev *z)
++{
++ struct Scsi_Host *host = dev_to_shost(&z->dev);
++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++ scsi_remove_host(host);
++
++ NCR_700_release(host);
++ kfree(hostdata);
++ free_irq(host->irq, host);
++ zorro_release_device(z);
++}
++
++static struct zorro_driver zorro7xx_driver = {
++ .name = "zorro7xx-scsi",
++ .id_table = zorro7xx_zorro_tbl,
++ .probe = zorro7xx_init_one,
++ .remove = __devexit_p(zorro7xx_remove_one),
++};
++
++static int __init zorro7xx_scsi_init(void)
++{
++ return zorro_register_driver(&zorro7xx_driver);
++}
++
++static void __exit zorro7xx_scsi_exit(void)
++{
++ zorro_unregister_driver(&zorro7xx_driver);
++}
++
++module_init(zorro7xx_scsi_init);
++module_exit(zorro7xx_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/serial/8250.c linux-2.6.22-591/drivers/serial/8250.c
+--- linux-2.6.22-570/drivers/serial/8250.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/8250.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2845,6 +2845,25 @@
+ }
+ EXPORT_SYMBOL(serial8250_unregister_port);
+
++/**
++ * serial8250_unregister_by_port - remove a 16x50 serial port
++ * at runtime.
++ * @port: A &struct uart_port that describes the port to remove.
++ *
++ * Remove one serial port. This may not be called from interrupt
++ * context. We hand the port back to the our control.
++ */
++void serial8250_unregister_by_port(struct uart_port *port)
++{
++ struct uart_8250_port *uart;
++
++ uart = serial8250_find_match_or_unused(port);
++
++ if (uart)
++ serial8250_unregister_port(uart->port.line);
++}
++EXPORT_SYMBOL(serial8250_unregister_by_port);
++
+ static int __init serial8250_init(void)
+ {
+ int ret, i;
+diff -Nurb linux-2.6.22-570/drivers/serial/8250_kgdb.c linux-2.6.22-591/drivers/serial/8250_kgdb.c
+--- linux-2.6.22-570/drivers/serial/8250_kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/8250_kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,515 @@
++/*
++ * 8250 interface for kgdb.
++ *
++ * This is a merging of many different drivers, and all of the people have
++ * had an impact in some form or another:
++ *
++ * 2004-2005 (c) MontaVista Software, Inc.
++ * 2005-2006 (c) Wind River Systems, Inc.
++ *
++ * Amit Kale <amitkale@emsyssoft.com>, David Grothe <dave@gcom.com>,
++ * Scott Foehner <sfoehner@engr.sgi.com>, George Anzinger <george@mvista.com>,
++ * Robert Walsh <rjwalsh@durables.org>, wangdi <wangdi@clusterfs.com>,
++ * San Mehat, Tom Rini <trini@mvista.com>,
++ * Jason Wessel <jason.wessel@windriver.com>
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/serial.h>
++#include <linux/serial_reg.h>
++#include <linux/serialP.h>
++#include <linux/ioport.h>
++
++#include <asm/io.h>
++#include <asm/serial.h> /* For BASE_BAUD and SERIAL_PORT_DFNS */
++
++#include "8250.h"
++
++#define GDB_BUF_SIZE 512 /* power of 2, please */
++
++MODULE_DESCRIPTION("KGDB driver for the 8250");
++MODULE_LICENSE("GPL");
++/* These will conflict with early_param otherwise. */
++#ifdef CONFIG_KGDB_8250_MODULE
++static char config[256];
++module_param_string(kgdb8250, config, 256, 0);
++MODULE_PARM_DESC(kgdb8250,
++ " kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>\n");
++static struct kgdb_io local_kgdb_io_ops;
++#endif /* CONFIG_KGDB_8250_MODULE */
++
++/* Speed of the UART. */
++static int kgdb8250_baud;
++
++/* Flag for if we need to call request_mem_region */
++static int kgdb8250_needs_request_mem_region;
++
++static char kgdb8250_buf[GDB_BUF_SIZE];
++static atomic_t kgdb8250_buf_in_cnt;
++static int kgdb8250_buf_out_inx;
++
++/* Old-style serial definitions, if existant, and a counter. */
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++static int __initdata should_copy_rs_table = 1;
++static struct serial_state old_rs_table[] __initdata = {
++#ifdef SERIAL_PORT_DFNS
++ SERIAL_PORT_DFNS
++#endif
++};
++#endif
++
++/* Our internal table of UARTS. */
++#define UART_NR CONFIG_SERIAL_8250_NR_UARTS
++static struct uart_port kgdb8250_ports[UART_NR];
++
++static struct uart_port *current_port;
++
++/* Base of the UART. */
++static void *kgdb8250_addr;
++
++/* Forward declarations. */
++static int kgdb8250_uart_init(void);
++static int __init kgdb_init_io(void);
++static int __init kgdb8250_opt(char *str);
++
++/* These are much shorter calls to ioread8/iowrite8 that take into
++ * account our shifts, etc. */
++static inline unsigned int kgdb_ioread(u8 mask)
++{
++ return ioread8(kgdb8250_addr + (mask << current_port->regshift));
++}
++
++static inline void kgdb_iowrite(u8 val, u8 mask)
++{
++ iowrite8(val, kgdb8250_addr + (mask << current_port->regshift));
++}
++
++/*
++ * Wait until the interface can accept a char, then write it.
++ */
++static void kgdb_put_debug_char(u8 chr)
++{
++ while (!(kgdb_ioread(UART_LSR) & UART_LSR_THRE)) ;
++
++ kgdb_iowrite(chr, UART_TX);
++}
++
++/*
++ * Get a byte from the hardware data buffer and return it
++ */
++static int read_data_bfr(void)
++{
++ char it = kgdb_ioread(UART_LSR);
++
++ if (it & UART_LSR_DR)
++ return kgdb_ioread(UART_RX);
++
++ /*
++ * If we have a framing error assume somebody messed with
++ * our uart. Reprogram it and send '-' both ways...
++ */
++ if (it & 0xc) {
++ kgdb8250_uart_init();
++ kgdb_put_debug_char('-');
++ return '-';
++ }
++
++ return -1;
++}
++
++/*
++ * Get a char if available, return -1 if nothing available.
++ * Empty the receive buffer first, then look at the interface hardware.
++ */
++static int kgdb_get_debug_char(void)
++{
++ int retchr;
++
++ /* intr routine has q'd chars */
++ if (atomic_read(&kgdb8250_buf_in_cnt) != 0) {
++ retchr = kgdb8250_buf[kgdb8250_buf_out_inx++];
++ kgdb8250_buf_out_inx &= (GDB_BUF_SIZE - 1);
++ atomic_dec(&kgdb8250_buf_in_cnt);
++ return retchr;
++ }
++
++ do {
++ retchr = read_data_bfr();
++ } while (retchr < 0);
++
++ return retchr;
++}
++
++/*
++ * This is the receiver interrupt routine for the GDB stub.
++ * All that we need to do is verify that the interrupt happened on the
++ * line we're in charge of. If this is true, schedule a breakpoint and
++ * return.
++ */
++static irqreturn_t
++kgdb8250_interrupt(int irq, void *dev_id)
++{
++ if (kgdb_ioread(UART_IIR) & UART_IIR_RDI) {
++ /* Throw away the data if another I/O routine is active. */
++ if (kgdb_io_ops.read_char != kgdb_get_debug_char &&
++ (kgdb_ioread(UART_LSR) & UART_LSR_DR))
++ kgdb_ioread(UART_RX);
++ else
++ breakpoint();
++ }
++
++ return IRQ_HANDLED;
++}
++
++/*
++ * Initializes the UART.
++ * Returns:
++ * 0 on success, 1 on failure.
++ */
++static int
++kgdb8250_uart_init (void)
++{
++ unsigned int ier, base_baud = current_port->uartclk ?
++ current_port->uartclk / 16 : BASE_BAUD;
++
++ /* test uart existance */
++ if(kgdb_ioread(UART_LSR) == 0xff)
++ return -1;
++
++ /* disable interrupts */
++ kgdb_iowrite(0, UART_IER);
++
++#if defined(CONFIG_ARCH_OMAP1510)
++ /* Workaround to enable 115200 baud on OMAP1510 internal ports */
++ if (cpu_is_omap1510() && is_omap_port((void *)kgdb8250_addr)) {
++ if (kgdb8250_baud == 115200) {
++ base_baud = 1;
++ kgdb8250_baud = 1;
++ kgdb_iowrite(1, UART_OMAP_OSC_12M_SEL);
++ } else
++ kgdb_iowrite(0, UART_OMAP_OSC_12M_SEL);
++ }
++#endif
++ /* set DLAB */
++ kgdb_iowrite(UART_LCR_DLAB, UART_LCR);
++
++ /* set baud */
++ kgdb_iowrite((base_baud / kgdb8250_baud) & 0xff, UART_DLL);
++ kgdb_iowrite((base_baud / kgdb8250_baud) >> 8, UART_DLM);
++
++ /* reset DLAB, set LCR */
++ kgdb_iowrite(UART_LCR_WLEN8, UART_LCR);
++
++ /* set DTR and RTS */
++ kgdb_iowrite(UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS, UART_MCR);
++
++ /* setup fifo */
++ kgdb_iowrite(UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR
++ | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_8,
++ UART_FCR);
++
++ /* clear pending interrupts */
++ kgdb_ioread(UART_IIR);
++ kgdb_ioread(UART_RX);
++ kgdb_ioread(UART_LSR);
++ kgdb_ioread(UART_MSR);
++
++ /* turn on RX interrupt only */
++ kgdb_iowrite(UART_IER_RDI, UART_IER);
++
++ /*
++ * Borrowed from the main 8250 driver.
++ * Try writing and reading the UART_IER_UUE bit (b6).
++ * If it works, this is probably one of the Xscale platform's
++ * internal UARTs.
++ * We're going to explicitly set the UUE bit to 0 before
++ * trying to write and read a 1 just to make sure it's not
++ * already a 1 and maybe locked there before we even start start.
++ */
++ ier = kgdb_ioread(UART_IER);
++ kgdb_iowrite(ier & ~UART_IER_UUE, UART_IER);
++ if (!(kgdb_ioread(UART_IER) & UART_IER_UUE)) {
++ /*
++ * OK it's in a known zero state, try writing and reading
++ * without disturbing the current state of the other bits.
++ */
++ kgdb_iowrite(ier | UART_IER_UUE, UART_IER);
++ if (kgdb_ioread(UART_IER) & UART_IER_UUE)
++ /*
++ * It's an Xscale.
++ */
++ ier |= UART_IER_UUE | UART_IER_RTOIE;
++ }
++ kgdb_iowrite(ier, UART_IER);
++ return 0;
++}
++
++/*
++ * Copy the old serial_state table to our uart_port table if we haven't
++ * had values specifically configured in. We need to make sure this only
++ * happens once.
++ */
++static void __init kgdb8250_copy_rs_table(void)
++{
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++ int i;
++
++ if (!should_copy_rs_table)
++ return;
++
++ for (i = 0; i < ARRAY_SIZE(old_rs_table); i++) {
++ kgdb8250_ports[i].iobase = old_rs_table[i].port;
++ kgdb8250_ports[i].irq = irq_canonicalize(old_rs_table[i].irq);
++ kgdb8250_ports[i].uartclk = old_rs_table[i].baud_base * 16;
++ kgdb8250_ports[i].membase = old_rs_table[i].iomem_base;
++ kgdb8250_ports[i].iotype = old_rs_table[i].io_type;
++ kgdb8250_ports[i].regshift = old_rs_table[i].iomem_reg_shift;
++ kgdb8250_ports[i].line = i;
++ }
++
++ should_copy_rs_table = 0;
++#endif
++}
++
++/*
++ * Hookup our IRQ line now that it is safe to do so, after we grab any
++ * memory regions we might need to. If we haven't been initialized yet,
++ * go ahead and copy the old_rs_table in.
++ */
++static void __init kgdb8250_late_init(void)
++{
++ /* Try and copy the old_rs_table. */
++ kgdb8250_copy_rs_table();
++
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE)
++ /* Take the port away from the main driver. */
++ serial8250_unregister_by_port(current_port);
++
++ /* Now reinit the port as the above has disabled things. */
++ kgdb8250_uart_init();
++#endif
++ /* We may need to call request_mem_region() first. */
++ if (kgdb8250_needs_request_mem_region)
++ request_mem_region(current_port->mapbase,
++ 8 << current_port->regshift, "kgdb");
++ if (request_irq(current_port->irq, kgdb8250_interrupt, SA_SHIRQ,
++ "GDB-stub", current_port) < 0)
++ printk(KERN_ERR "KGDB failed to request the serial IRQ (%d)\n",
++ current_port->irq);
++}
++
++static __init int kgdb_init_io(void)
++{
++ /* Give us the basic table of uarts. */
++ kgdb8250_copy_rs_table();
++
++ /* We're either a module and parse a config string, or we have a
++ * semi-static config. */
++#ifdef CONFIG_KGDB_8250_MODULE
++ if (strlen(config)) {
++ if (kgdb8250_opt(config))
++ return -EINVAL;
++ } else {
++ printk(KERN_ERR "kgdb8250: argument error, usage: "
++ "kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>\n");
++ return -EINVAL;
++ }
++#elif defined(CONFIG_KGDB_SIMPLE_SERIAL)
++ kgdb8250_baud = CONFIG_KGDB_BAUDRATE;
++
++ /* Setup our pointer to the serial port now. */
++ current_port = &kgdb8250_ports[CONFIG_KGDB_PORT_NUM];
++#else
++ if (kgdb8250_opt(CONFIG_KGDB_8250_CONF_STRING))
++ return -EINVAL;
++#endif
++
++
++ /* Internal driver setup. */
++ switch (current_port->iotype) {
++ case UPIO_MEM:
++ if (current_port->mapbase)
++ kgdb8250_needs_request_mem_region = 1;
++ if (current_port->flags & UPF_IOREMAP) {
++ current_port->membase = ioremap(current_port->mapbase,
++ 8 << current_port->regshift);
++ if (!current_port->membase)
++ return -EIO; /* Failed. */
++ }
++ kgdb8250_addr = current_port->membase;
++ break;
++ case UPIO_PORT:
++ default:
++ kgdb8250_addr = ioport_map(current_port->iobase,
++ 8 << current_port->regshift);
++ if (!kgdb8250_addr)
++ return -EIO; /* Failed. */
++ }
++
++ if (kgdb8250_uart_init() == -1) {
++ printk(KERN_ERR "kgdb8250: init failed\n");
++ return -EIO;
++ }
++#ifdef CONFIG_KGDB_8250_MODULE
++ /* Attach the kgdb irq. When this is built into the kernel, it
++ * is called as a part of late_init sequence.
++ */
++ kgdb8250_late_init();
++ if (kgdb_register_io_module(&local_kgdb_io_ops))
++ return -EINVAL;
++
++ printk(KERN_INFO "kgdb8250: debugging enabled\n");
++#endif /* CONFIG_KGD_8250_MODULE */
++
++ return 0;
++}
++
++#ifdef CONFIG_KGDB_8250_MODULE
++/* If it is a module the kgdb_io_ops should be a static which
++ * is passed to the KGDB I/O initialization
++ */
++static struct kgdb_io local_kgdb_io_ops = {
++#else /* ! CONFIG_KGDB_8250_MODULE */
++struct kgdb_io kgdb_io_ops = {
++#endif /* ! CONFIG_KGD_8250_MODULE */
++ .read_char = kgdb_get_debug_char,
++ .write_char = kgdb_put_debug_char,
++ .init = kgdb_init_io,
++ .late_init = kgdb8250_late_init,
++};
++
++/**
++ * kgdb8250_add_port - Define a serial port for use with KGDB
++ * @i: The index of the port being added
++ * @serial_req: The &struct uart_port describing the port
++ *
++ * On platforms where we must register the serial device
++ * dynamically, this is the best option if a platform also normally
++ * calls early_serial_setup().
++ */
++void __init kgdb8250_add_port(int i, struct uart_port *serial_req)
++{
++ /* Make sure we've got the built-in data before we override. */
++ kgdb8250_copy_rs_table();
++
++ /* Copy the whole thing over. */
++ if (current_port != &kgdb8250_ports[i])
++ memcpy(&kgdb8250_ports[i], serial_req, sizeof(struct uart_port));
++}
++
++/**
++ * kgdb8250_add_platform_port - Define a serial port for use with KGDB
++ * @i: The index of the port being added
++ * @p: The &struct plat_serial8250_port describing the port
++ *
++ * On platforms where we must register the serial device
++ * dynamically, this is the best option if a platform normally
++ * handles uart setup with an array of &struct plat_serial8250_port.
++ */
++void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *p)
++{
++ /* Make sure we've got the built-in data before we override. */
++ kgdb8250_copy_rs_table();
++
++ kgdb8250_ports[i].iobase = p->iobase;
++ kgdb8250_ports[i].membase = p->membase;
++ kgdb8250_ports[i].irq = p->irq;
++ kgdb8250_ports[i].uartclk = p->uartclk;
++ kgdb8250_ports[i].regshift = p->regshift;
++ kgdb8250_ports[i].iotype = p->iotype;
++ kgdb8250_ports[i].flags = p->flags;
++ kgdb8250_ports[i].mapbase = p->mapbase;
++}
++
++/*
++ * Syntax for this cmdline option is:
++ * kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>"
++ */
++static int __init kgdb8250_opt(char *str)
++{
++ /* We'll fill out and use the first slot. */
++ current_port = &kgdb8250_ports[0];
++
++ if (!strncmp(str, "io", 2)) {
++ current_port->iotype = UPIO_PORT;
++ str += 2;
++ } else if (!strncmp(str, "mmap", 4)) {
++ current_port->iotype = UPIO_MEM;
++ current_port->flags |= UPF_IOREMAP;
++ str += 4;
++ } else if (!strncmp(str, "mmio", 4)) {
++ current_port->iotype = UPIO_MEM;
++ current_port->flags &= ~UPF_IOREMAP;
++ str += 4;
++ } else
++ goto errout;
++
++ if (*str != ',')
++ goto errout;
++ str++;
++
++ if (current_port->iotype == UPIO_PORT)
++ current_port->iobase = simple_strtoul(str, &str, 16);
++ else {
++ if (current_port->flags & UPF_IOREMAP)
++ current_port->mapbase =
++ (unsigned long) simple_strtoul(str, &str, 16);
++ else
++ current_port->membase =
++ (void *) simple_strtoul(str, &str, 16);
++ }
++
++ if (*str != ',')
++ goto errout;
++ str++;
++
++ kgdb8250_baud = simple_strtoul(str, &str, 10);
++ if (!kgdb8250_baud)
++ goto errout;
++
++ if (*str != ',')
++ goto errout;
++ str++;
++
++ current_port->irq = simple_strtoul(str, &str, 10);
++
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++ should_copy_rs_table = 0;
++#endif
++
++ return 0;
++
++ errout:
++ printk(KERN_ERR "Invalid syntax for option kgdb8250=\n");
++ return 1;
++}
++
++#ifdef CONFIG_KGDB_8250_MODULE
++static void cleanup_kgdb8250(void)
++{
++ kgdb_unregister_io_module(&local_kgdb_io_ops);
++
++ /* Clean up the irq and memory */
++ free_irq(current_port->irq, current_port);
++
++ if (kgdb8250_needs_request_mem_region)
++ release_mem_region(current_port->mapbase,
++ 8 << current_port->regshift);
++ /* Hook up the serial port back to what it was previously
++ * hooked up to.
++ */
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE)
++ /* Give the port back to the 8250 driver. */
++ serial8250_register_port(current_port);
++#endif
++}
++
++module_init(kgdb_init_io);
++module_exit(cleanup_kgdb8250);
++#else /* ! CONFIG_KGDB_8250_MODULE */
++early_param("kgdb8250", kgdb8250_opt);
++#endif /* ! CONFIG_KGDB_8250_MODULE */
+diff -Nurb linux-2.6.22-570/drivers/serial/Kconfig linux-2.6.22-591/drivers/serial/Kconfig
+--- linux-2.6.22-570/drivers/serial/Kconfig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -107,7 +107,7 @@
+
+ config SERIAL_8250_NR_UARTS
+ int "Maximum number of 8250/16550 serial ports"
+- depends on SERIAL_8250
++ depends on SERIAL_8250 || KGDB_8250
+ default "4"
+ help
+ Set this to the number of serial ports you want the driver
+diff -Nurb linux-2.6.22-570/drivers/serial/Makefile linux-2.6.22-591/drivers/serial/Makefile
+--- linux-2.6.22-570/drivers/serial/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -23,6 +23,7 @@
+ obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o
+ obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
+ obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
++obj-$(CONFIG_KGDB_AMBA_PL011) += pl011_kgdb.o
+ obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
+ obj-$(CONFIG_SERIAL_PXA) += pxa.o
+ obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o
+@@ -50,10 +51,12 @@
+ obj-$(CONFIG_SERIAL_MPC52xx) += mpc52xx_uart.o
+ obj-$(CONFIG_SERIAL_ICOM) += icom.o
+ obj-$(CONFIG_SERIAL_M32R_SIO) += m32r_sio.o
++obj-$(CONFIG_KGDB_MPSC) += mpsc_kgdb.o
+ obj-$(CONFIG_SERIAL_MPSC) += mpsc.o
+ obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o
+ obj-$(CONFIG_SERIAL_JSM) += jsm/
+ obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o
++obj-$(CONFIG_KGDB_TXX9) += serial_txx9_kgdb.o
+ obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o
+ obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o
+ obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o
+@@ -62,3 +65,4 @@
+ obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
+ obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
+ obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
++obj-$(CONFIG_KGDB_8250) += 8250_kgdb.o
+diff -Nurb linux-2.6.22-570/drivers/serial/amba-pl011.c linux-2.6.22-591/drivers/serial/amba-pl011.c
+--- linux-2.6.22-570/drivers/serial/amba-pl011.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/amba-pl011.c 2007-12-21 15:36:12.000000000 -0500
+@@ -332,7 +332,7 @@
+ /*
+ * Allocate the IRQ
+ */
+- retval = request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap);
++ retval = request_irq(uap->port.irq, pl011_int, SA_SHIRQ, "uart-pl011", uap);
+ if (retval)
+ goto clk_dis;
+
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/Makefile linux-2.6.22-591/drivers/serial/cpm_uart/Makefile
+--- linux-2.6.22-570/drivers/serial/cpm_uart/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/cpm_uart/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -7,5 +7,6 @@
+ # Select the correct platform objects.
+ cpm_uart-objs-$(CONFIG_CPM2) += cpm_uart_cpm2.o
+ cpm_uart-objs-$(CONFIG_8xx) += cpm_uart_cpm1.o
++cpm_uart-objs-$(CONFIG_KGDB_CPM_UART) += cpm_uart_kgdb.o
+
+ cpm_uart-objs := cpm_uart_core.o $(cpm_uart-objs-y)
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h 2007-12-21 15:36:12.000000000 -0500
+@@ -50,6 +50,41 @@
+
+ #define SCC_WAIT_CLOSING 100
+
++#ifdef CONFIG_KGDB_CPM_UART
++
++/* Speed of the debug UART. */
++#if CONFIG_KGDB_BAUDRATE == 9600
++#define KGDB_BAUD B9600
++#elif CONFIG_KGDB_BAUDRATE == 19200
++#define KGDB_BAUD B19200
++#elif CONFIG_KGDB_BAUDRATE == 38400
++#define KGDB_BAUD B38400
++#elif CONFIG_KGDB_BAUDRATE == 57600
++#define KGDB_BAUD B57600
++#elif CONFIG_KGDB_BAUDRATE == 115200
++#define KGDB_BAUD B115200 /* Start with this if not given */
++#else
++#error Unsupported baud rate!
++#endif
++
++#if defined(CONFIG_KGDB_CPM_UART_SCC1)
++#define KGDB_PINFO_INDEX UART_SCC1
++#elif defined(CONFIG_KGDB_CPM_UART_SCC2)
++#define KGDB_PINFO_INDEX UART_SCC2
++#elif defined(CONFIG_KGDB_CPM_UART_SCC3)
++#define KGDB_PINFO_INDEX UART_SCC3
++#elif defined(CONFIG_KGDB_CPM_UART_SCC4)
++#define KGDB_PINFO_INDEX UART_SCC4
++#elif defined(CONFIG_KGDB_CPM_UART_SMC1)
++#define KGDB_PINFO_INDEX UART_SMC1
++#elif defined(CONFIG_KGDB_CPM_UART_SMC2)
++#define KGDB_PINFO_INDEX UART_SMC2
++#else
++#error The port for KGDB is undefined!
++#endif
++
++#endif /* CONFIG_KGDB_CPM_UART */
++
+ struct uart_cpm_port {
+ struct uart_port port;
+ u16 rx_nrfifos;
+@@ -86,6 +121,9 @@
+ extern int cpm_uart_nr;
+ extern struct uart_cpm_port cpm_uart_ports[UART_NR];
+
++void cpm_uart_early_write(int index, const char *s, u_int count);
++int cpm_uart_early_setup(int index,int early);
++
+ /* these are located in their respective files */
+ void cpm_line_cr_cmd(int line, int cmd);
+ int cpm_uart_init_portdesc(void);
+@@ -132,5 +170,4 @@
+ return 0;
+ }
+
+-
+ #endif /* CPM_UART_H */
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1073,22 +1073,17 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_SERIAL_CPM_CONSOLE
+-/*
+- * Print a string to the serial port trying not to disturb
+- * any possible real use of the port...
+- *
+- * Note that this is called with interrupts already disabled
+- */
+-static void cpm_uart_console_write(struct console *co, const char *s,
++void cpm_uart_early_write(int index, const char *s,
+ u_int count)
+ {
+- struct uart_cpm_port *pinfo =
+- &cpm_uart_ports[cpm_uart_port_map[co->index]];
++ struct uart_cpm_port *pinfo;
+ unsigned int i;
+ volatile cbd_t *bdp, *bdbase;
+ volatile unsigned char *cp;
+
++ BUG_ON(index>UART_NR);
++ pinfo = &cpm_uart_ports[index];
++
+ /* Get the address of the host memory buffer.
+ */
+ bdp = pinfo->tx_cur;
+@@ -1152,19 +1147,14 @@
+ pinfo->tx_cur = (volatile cbd_t *) bdp;
+ }
+
+-
+-static int __init cpm_uart_console_setup(struct console *co, char *options)
++int cpm_uart_early_setup(int index, int early)
+ {
++ int ret;
+ struct uart_port *port;
+ struct uart_cpm_port *pinfo;
+- int baud = 38400;
+- int bits = 8;
+- int parity = 'n';
+- int flow = 'n';
+- int ret;
+
+ struct fs_uart_platform_info *pdata;
+- struct platform_device* pdev = early_uart_get_pdev(co->index);
++ struct platform_device* pdev = early_uart_get_pdev(index);
+
+ if (!pdev) {
+ pr_info("cpm_uart: console: compat mode\n");
+@@ -1172,8 +1162,9 @@
+ cpm_uart_init_portdesc();
+ }
+
++ BUG_ON(index>UART_NR);
+ port =
+- (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]];
++ (struct uart_port *)&cpm_uart_ports[index];
+ pinfo = (struct uart_cpm_port *)port;
+ if (!pdev) {
+ if (pinfo->set_lineif)
+@@ -1187,15 +1178,6 @@
+ cpm_uart_drv_get_platform_data(pdev, 1);
+ }
+
+- pinfo->flags |= FLAG_CONSOLE;
+-
+- if (options) {
+- uart_parse_options(options, &baud, &parity, &bits, &flow);
+- } else {
+- if ((baud = uart_baudrate()) == -1)
+- baud = 9600;
+- }
+-
+ if (IS_SMC(pinfo)) {
+ pinfo->smcp->smc_smcm &= ~(SMCM_RX | SMCM_TX);
+ pinfo->smcp->smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
+@@ -1203,8 +1185,7 @@
+ pinfo->sccp->scc_sccm &= ~(UART_SCCM_TX | UART_SCCM_RX);
+ pinfo->sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT);
+ }
+-
+- ret = cpm_uart_allocbuf(pinfo, 1);
++ ret = cpm_uart_allocbuf(pinfo, early);
+
+ if (ret)
+ return ret;
+@@ -1216,6 +1197,62 @@
+ else
+ cpm_uart_init_scc(pinfo);
+
++ return 0;
++}
++
++#ifdef CONFIG_SERIAL_CPM_CONSOLE
++/*
++ * Print a string to the serial port trying not to disturb
++ * any possible real use of the port...
++ *
++ * Note that this is called with interrupts already disabled
++ */
++
++static void cpm_uart_console_write(struct console *co, const char *s,
++ u_int count)
++{
++ cpm_uart_early_write(cpm_uart_port_map[co->index],s,count);
++}
++
++/*
++ * Setup console. Be careful is called early !
++ */
++static int __init cpm_uart_console_setup(struct console *co, char *options)
++{
++ struct uart_port *port;
++ struct uart_cpm_port *pinfo;
++ int baud = 115200;
++ int bits = 8;
++ int parity = 'n';
++ int flow = 'n';
++ int ret;
++
++#ifdef CONFIG_KGDB_CPM_UART
++ /* We are not interested in ports yet utilized by kgdb */
++ if (co->index == KGDB_PINFO_INDEX)
++ return 0;
++#endif
++
++ port =
++ (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]];
++ pinfo = (struct uart_cpm_port *)port;
++
++ pinfo->flags |= FLAG_CONSOLE;
++
++ if (options) {
++ uart_parse_options(options, &baud, &parity, &bits, &flow);
++ } else {
++ bd_t *bd = (bd_t *) __res;
++
++ if (bd->bi_baudrate)
++ baud = bd->bi_baudrate;
++ else
++ baud = 9600;
++ }
++
++ ret = cpm_uart_early_setup(cpm_uart_port_map[co->index], 1);
++ if(ret)
++ return ret;
+ uart_set_options(port, co, baud, parity, bits, flow);
+
+ return 0;
+@@ -1266,6 +1303,12 @@
+
+ pdata = pdev->dev.platform_data;
+
++#ifdef CONFIG_KGDB_CPM_UART
++ /* We are not interested in ports yet utilized by kgdb */
++ if (cpm_uart_id2nr(fs_uart_get_id(pdata)) == KGDB_PINFO_INDEX)
++ return ret;
++#endif
++
+ if ((ret = cpm_uart_drv_get_platform_data(pdev, 0)))
+ return ret;
+
+@@ -1363,6 +1406,12 @@
+
+ for (i = 0; i < cpm_uart_nr; i++) {
+ int con = cpm_uart_port_map[i];
++
++#ifdef CONFIG_KGDB_CPM_UART
++ /* We are not interested in ports yet utilized by kgdb */
++ if (con == KGDB_PINFO_INDEX)
++ continue;
++#endif
+ cpm_uart_ports[con].port.line = i;
+ cpm_uart_ports[con].port.flags = UPF_BOOT_AUTOCONF;
+ if (cpm_uart_ports[con].set_lineif)
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-12-21 15:36:12.000000000 -0500
+@@ -53,6 +53,7 @@
+ {
+ ushort val;
+ volatile cpm8xx_t *cp = cpmp;
++ unsigned *bcsr_io;
+
+ switch (line) {
+ case UART_SMC1:
+@@ -95,12 +96,35 @@
+ {
+ /* XXX SCC1: insert port configuration here */
+ pinfo->brg = 1;
++
++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS)
++ bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
++
++ if (bcsr_io == NULL) {
++ printk(KERN_CRIT "Could not remap BCSR\n");
++ return;
++ }
++ out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_1);
++ iounmap(bcsr_io);
++#endif
+ }
+
+ void scc2_lineif(struct uart_cpm_port *pinfo)
+ {
+ /* XXX SCC2: insert port configuration here */
+ pinfo->brg = 2;
++ unsigned *bcsr_io;
++
++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS)
++ bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
++
++ if (bcsr_io == NULL) {
++ printk(KERN_CRIT "Could not remap BCSR\n");
++ return;
++ }
++ out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_2);
++ iounmap(bcsr_io);
++#endif
+ }
+
+ void scc3_lineif(struct uart_cpm_port *pinfo)
+@@ -189,6 +213,10 @@
+ {
+ pr_debug("CPM uart[-]:init portdesc\n");
+
++ /* Check if we have called this yet. This may happen if early kgdb
++ breakpoint is on */
++ if(cpm_uart_nr)
++ return 0;
+ cpm_uart_nr = 0;
+ #ifdef CONFIG_SERIAL_CPM_SMC1
+ cpm_uart_ports[UART_SMC1].smcp = &cpmp->cp_smc[0];
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-12-21 15:36:12.000000000 -0500
+@@ -289,6 +289,10 @@
+ #endif
+ pr_debug("CPM uart[-]:init portdesc\n");
+
++ /* Check if we have called this yet. This may happen if early kgdb
++ breakpoint is on */
++ if(cpm_uart_nr)
++ return 0;
+ cpm_uart_nr = 0;
+ #ifdef CONFIG_SERIAL_CPM_SMC1
+ cpm_uart_ports[UART_SMC1].smcp = (smc_t *) cpm2_map(im_smc[0]);
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,185 @@
++/*
++ * drivers/serial/cpm_uart/cpm_uart_kgdb.c
++ *
++ * CPM UART interface for kgdb.
++ *
++ * Author: Vitaly Bordug <vbordug@ru.mvista.com>
++ *
++ * Used some bits from drivers/serial/kgdb_8250.c as a template
++ *
++ * 2005-2007 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++#include <linux/kgdb.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/serial.h>
++#include <linux/serial_core.h>
++#include <linux/serial_reg.h>
++
++#include <asm/io.h>
++#include <asm/serial.h> /* For BASE_BAUD and SERIAL_PORT_DFNS */
++
++#include "cpm_uart.h"
++
++#define GDB_BUF_SIZE 512 /* power of 2, please */
++
++
++static char kgdb_buf[GDB_BUF_SIZE], *kgdbp;
++static int kgdb_chars;
++
++/* Forward declarations. */
++
++/*
++ * Receive character from the serial port. This only works well
++ * before the port is initialize for real use.
++ */
++static int kgdb_wait_key(char *obuf)
++{
++ struct uart_cpm_port *pinfo;
++ u_char c, *cp;
++ volatile cbd_t *bdp;
++ int i;
++
++ pinfo = &cpm_uart_ports[KGDB_PINFO_INDEX];
++
++ /* Get the address of the host memory buffer.
++ */
++ bdp = pinfo->rx_cur;
++ while (bdp->cbd_sc & BD_SC_EMPTY);
++
++ /* If the buffer address is in the CPM DPRAM, don't
++ * convert it.
++ */
++ cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo);
++
++ if (obuf) {
++ i = c = bdp->cbd_datlen;
++ while (i-- > 0)
++ *obuf++ = *cp++;
++ } else
++ c = *cp;
++ bdp->cbd_sc |= BD_SC_EMPTY;
++
++ if (bdp->cbd_sc & BD_SC_WRAP)
++ bdp = pinfo->rx_bd_base;
++ else
++ bdp++;
++ pinfo->rx_cur = (cbd_t *)bdp;
++
++ return (int)c;
++}
++
++
++/*
++ * Wait until the interface can accept a char, then write it.
++ */
++static void kgdb_put_debug_char(u8 chr)
++{
++ static char ch[2];
++
++ ch[0] = (char)chr;
++ cpm_uart_early_write(KGDB_PINFO_INDEX, ch, 1);
++}
++
++
++/*
++ * Get a char if available, return -1 if nothing available.
++ * Empty the receive buffer first, then look at the interface hardware.
++ */
++static int kgdb_get_debug_char(void)
++{
++ if (kgdb_chars <= 0) {
++ kgdb_chars = kgdb_wait_key(kgdb_buf);
++ kgdbp = kgdb_buf;
++ }
++ kgdb_chars--;
++
++ return (*kgdbp++);
++}
++
++static void termios_set_options(int index,
++ int baud, int parity, int bits, int flow)
++{
++ struct ktermios termios;
++ struct uart_port *port;
++ struct uart_cpm_port *pinfo;
++
++ BUG_ON(index>UART_NR);
++
++ port = (struct uart_port *)&cpm_uart_ports[index];
++ pinfo = (struct uart_cpm_port *)port;
++
++ /*
++ * Ensure that the serial console lock is initialised
++ * early.
++ */
++ spin_lock_init(&port->lock);
++
++ memset(&termios, 0, sizeof(struct termios));
++
++ termios.c_cflag = CREAD | HUPCL | CLOCAL;
++
++ termios.c_cflag |= baud;
++
++ if (bits == 7)
++ termios.c_cflag |= CS7;
++ else
++ termios.c_cflag |= CS8;
++
++ switch (parity) {
++ case 'o': case 'O':
++ termios.c_cflag |= PARODD;
++ /*fall through*/
++ case 'e': case 'E':
++ termios.c_cflag |= PARENB;
++ break;
++ }
++
++ if (flow == 'r')
++ termios.c_cflag |= CRTSCTS;
++
++ port->ops->set_termios(port, &termios, NULL);
++}
++
++/*
++ * Returns:
++ * 0 on success, 1 on failure.
++ */
++static int kgdb_init(void)
++{
++ struct uart_port *port;
++ struct uart_cpm_port *pinfo;
++ int use_bootmem = 0; /* use dma by default */
++
++ if (!cpm_uart_nr) {
++ use_bootmem = 1;
++ cpm_uart_init_portdesc();
++ }
++ port = (struct uart_port *)&cpm_uart_ports[KGDB_PINFO_INDEX];
++ pinfo = (struct uart_cpm_port *)port;
++
++ if (cpm_uart_early_setup(KGDB_PINFO_INDEX, use_bootmem))
++ return 1;
++
++ termios_set_options(KGDB_PINFO_INDEX, KGDB_BAUD,'n',8,'n');
++ if (IS_SMC(pinfo))
++ pinfo->smcp->smc_smcm |= SMCM_TX;
++ else
++ pinfo->sccp->scc_sccm |= UART_SCCM_TX;
++
++ return 0;
++}
++
++
++struct kgdb_io kgdb_io_ops = {
++ .read_char = kgdb_get_debug_char,
++ .write_char = kgdb_put_debug_char,
++ .init = kgdb_init,
++};
++
+diff -Nurb linux-2.6.22-570/drivers/serial/mpsc_kgdb.c linux-2.6.22-591/drivers/serial/mpsc_kgdb.c
+--- linux-2.6.22-570/drivers/serial/mpsc_kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/mpsc_kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,345 @@
++/*
++ * drivers/serial/mpsc_kgdb.c
++ *
++ * KGDB driver for the Marvell MultiProtocol Serial Controller (MPCS)
++ *
++ * Based on the polled boot loader driver by Ajit Prem (ajit.prem@motorola.com)
++ *
++ * Author: Randy Vinson <rvinson@mvista.com>
++ *
++ * Copyright (C) 2005-2006 MontaVista Software, Inc.
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ */
++
++#include <linux/kgdb.h>
++#include <linux/mv643xx.h>
++#include <linux/device.h>
++#include <asm/mv64x60.h>
++#include <asm/serial.h>
++#include <asm/io.h>
++#include <asm/delay.h>
++
++/* Main MPSC Configuration Register Offsets */
++#define MPSC_MMCRL 0x0000
++#define MPSC_MMCRH 0x0004
++#define MPSC_MPCR 0x0008
++#define MPSC_CHR_1 0x000c
++#define MPSC_CHR_2 0x0010
++#define MPSC_CHR_3 0x0014
++#define MPSC_CHR_4 0x0018
++#define MPSC_CHR_5 0x001c
++#define MPSC_CHR_6 0x0020
++#define MPSC_CHR_7 0x0024
++#define MPSC_CHR_8 0x0028
++#define MPSC_CHR_9 0x002c
++#define MPSC_CHR_10 0x0030
++#define MPSC_CHR_11 0x0034
++
++#define MPSC_MPCR_FRZ (1 << 9)
++#define MPSC_MPCR_CL_5 0
++#define MPSC_MPCR_CL_6 1
++#define MPSC_MPCR_CL_7 2
++#define MPSC_MPCR_CL_8 3
++#define MPSC_MPCR_SBL_1 0
++#define MPSC_MPCR_SBL_2 1
++
++#define MPSC_CHR_2_TEV (1<<1)
++#define MPSC_CHR_2_TA (1<<7)
++#define MPSC_CHR_2_TTCS (1<<9)
++#define MPSC_CHR_2_REV (1<<17)
++#define MPSC_CHR_2_RA (1<<23)
++#define MPSC_CHR_2_CRD (1<<25)
++#define MPSC_CHR_2_EH (1<<31)
++#define MPSC_CHR_2_PAR_ODD 0
++#define MPSC_CHR_2_PAR_SPACE 1
++#define MPSC_CHR_2_PAR_EVEN 2
++#define MPSC_CHR_2_PAR_MARK 3
++
++/* MPSC Signal Routing */
++#define MPSC_MRR 0x0000
++#define MPSC_RCRR 0x0004
++#define MPSC_TCRR 0x0008
++
++/* MPSC Interrupt registers (offset from MV64x60_SDMA_INTR_OFFSET) */
++#define MPSC_INTR_CAUSE 0x0004
++#define MPSC_INTR_MASK 0x0084
++#define MPSC_INTR_CAUSE_RCC (1<<6)
++
++/* Baud Rate Generator Interface Registers */
++#define BRG_BCR 0x0000
++#define BRG_BTR 0x0004
++
++/* Speed of the UART. */
++static int kgdbmpsc_baud = CONFIG_KGDB_BAUDRATE;
++
++/* Index of the UART, matches ttyMX naming. */
++static int kgdbmpsc_ttyMM = CONFIG_KGDB_PORT_NUM;
++
++#define MPSC_INTR_REG_SELECT(x) ((x) + (8 * kgdbmpsc_ttyMM))
++
++static int kgdbmpsc_init(void);
++
++static struct platform_device mpsc_dev, shared_dev;
++
++static void __iomem *mpsc_base;
++static void __iomem *brg_base;
++static void __iomem *routing_base;
++static void __iomem *sdma_base;
++
++static unsigned int mpsc_irq;
++
++static void kgdb_write_debug_char(u8 c)
++{
++ u32 data;
++
++ data = readl(mpsc_base + MPSC_MPCR);
++ writeb(c, mpsc_base + MPSC_CHR_1);
++ mb();
++ data = readl(mpsc_base + MPSC_CHR_2);
++ data |= MPSC_CHR_2_TTCS;
++ writel(data, mpsc_base + MPSC_CHR_2);
++ mb();
++
++ while (readl(mpsc_base + MPSC_CHR_2) & MPSC_CHR_2_TTCS) ;
++}
++
++static int kgdb_get_debug_char(void)
++{
++ unsigned char c;
++
++ while (!(readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) &
++ MPSC_INTR_CAUSE_RCC)) ;
++
++ c = readb(mpsc_base + MPSC_CHR_10 + (1 << 1));
++ mb();
++ writeb(c, mpsc_base + MPSC_CHR_10 + (1 << 1));
++ mb();
++ writel(~MPSC_INTR_CAUSE_RCC, sdma_base +
++ MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE));
++ return (c);
++}
++
++/*
++ * This is the receiver interrupt routine for the GDB stub.
++ * All that we need to do is verify that the interrupt happened on the
++ * line we're in charge of. If this is true, schedule a breakpoint and
++ * return.
++ */
++static irqreturn_t kgdbmpsc_interrupt(int irq, void *dev_id)
++{
++ if (irq != mpsc_irq)
++ return IRQ_NONE;
++ /*
++ * If there is some other CPU in KGDB then this is a
++ * spurious interrupt. so return without even checking a byte
++ */
++ if (atomic_read(&debugger_active))
++ return IRQ_NONE;
++
++ if (readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) &
++ MPSC_INTR_CAUSE_RCC)
++ breakpoint();
++
++ return IRQ_HANDLED;
++}
++
++static int __init kgdbmpsc_init(void)
++{
++ struct mpsc_pdata *pdata;
++ u32 cdv;
++
++ if (!brg_base || !mpsc_base || !routing_base || !sdma_base)
++ return -1;
++
++ /* Set MPSC Routing to enable both ports */
++ writel(0x0, routing_base + MPSC_MRR);
++
++ /* MPSC 0/1 Rx & Tx get clocks BRG0/1 */
++ writel(0x00000100, routing_base + MPSC_RCRR);
++ writel(0x00000100, routing_base + MPSC_TCRR);
++
++ /* Disable all MPSC interrupts and clear any pending interrupts */
++ writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++ writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE));
++
++ pdata = (struct mpsc_pdata *)mpsc_dev.dev.platform_data;
++
++ /* cdv = (clock/(2*16*baud rate)) for 16X mode. */
++ cdv = ((pdata->brg_clk_freq / (32 * kgdbmpsc_baud)) - 1);
++ writel((pdata->brg_clk_src << 18) | (1 << 16) | cdv,
++ brg_base + BRG_BCR);
++
++ /* Put MPSC into UART mode, no null modem, 16x clock mode */
++ writel(0x000004c4, mpsc_base + MPSC_MMCRL);
++ writel(0x04400400, mpsc_base + MPSC_MMCRH);
++
++ writel(0, mpsc_base + MPSC_CHR_1);
++ writel(0, mpsc_base + MPSC_CHR_9);
++ writel(0, mpsc_base + MPSC_CHR_10);
++ writel(4, mpsc_base + MPSC_CHR_3);
++ writel(0x20000000, mpsc_base + MPSC_CHR_4);
++ writel(0x9000, mpsc_base + MPSC_CHR_5);
++ writel(0, mpsc_base + MPSC_CHR_6);
++ writel(0, mpsc_base + MPSC_CHR_7);
++ writel(0, mpsc_base + MPSC_CHR_8);
++
++ /* 8 data bits, 1 stop bit */
++ writel((3 << 12), mpsc_base + MPSC_MPCR);
++
++ /* Enter "hunt" mode */
++ writel((1 << 31), mpsc_base + MPSC_CHR_2);
++
++ udelay(100);
++ return 0;
++}
++
++static void __iomem *__init
++kgdbmpsc_map_resource(struct platform_device *pd, int type, int num)
++{
++ void __iomem *base = NULL;
++ struct resource *r;
++
++ if ((r = platform_get_resource(pd, IORESOURCE_MEM, num)))
++ base = ioremap(r->start, r->end - r->start + 1);
++ return base;
++}
++
++static void __iomem *__init
++kgdbmpsc_unmap_resource(struct platform_device *pd, int type, int num,
++ void __iomem * base)
++{
++ if (base)
++ iounmap(base);
++ return NULL;
++}
++
++static void __init
++kgdbmpsc_reserve_resource(struct platform_device *pd, int type, int num)
++{
++ struct resource *r;
++
++ if ((r = platform_get_resource(pd, IORESOURCE_MEM, num)))
++ request_mem_region(r->start, r->end - r->start + 1, "kgdb");
++}
++
++static int __init kgdbmpsc_local_init(void)
++{
++ if (!mpsc_dev.num_resources || !shared_dev.num_resources)
++ return 1; /* failure */
++
++ mpsc_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BASE_ORDER);
++ brg_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BRG_BASE_ORDER);
++
++ /* get the platform data for the shared registers and get them mapped */
++ routing_base = kgdbmpsc_map_resource(&shared_dev,
++ IORESOURCE_MEM,
++ MPSC_ROUTING_BASE_ORDER);
++ sdma_base =
++ kgdbmpsc_map_resource(&shared_dev, IORESOURCE_MEM,
++ MPSC_SDMA_INTR_BASE_ORDER);
++
++ mpsc_irq = platform_get_irq(&mpsc_dev, 1);
++
++ if (mpsc_base && brg_base && routing_base && sdma_base)
++ return 0; /* success */
++
++ return 1; /* failure */
++}
++
++static void __init kgdbmpsc_local_exit(void)
++{
++ if (sdma_base)
++ sdma_base = kgdbmpsc_unmap_resource(&shared_dev, IORESOURCE_MEM,
++ MPSC_SDMA_INTR_BASE_ORDER,
++ sdma_base);
++ if (routing_base)
++ routing_base = kgdbmpsc_unmap_resource(&shared_dev,
++ IORESOURCE_MEM,
++ MPSC_ROUTING_BASE_ORDER,
++ routing_base);
++ if (brg_base)
++ brg_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BRG_BASE_ORDER,
++ brg_base);
++ if (mpsc_base)
++ mpsc_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BASE_ORDER, mpsc_base);
++}
++
++static void __init kgdbmpsc_update_pdata(struct platform_device *pdev)
++{
++
++ snprintf(pdev->dev.bus_id, BUS_ID_SIZE, "%s.%u", pdev->name, pdev->id);
++}
++
++static int __init kgdbmpsc_pdev_init(void)
++{
++ struct platform_device *pdev;
++
++ /* get the platform data for the specified port. */
++ pdev = mv64x60_early_get_pdev_data(MPSC_CTLR_NAME, kgdbmpsc_ttyMM, 1);
++ if (pdev) {
++ memcpy(&mpsc_dev, pdev, sizeof(struct platform_device));
++ if (platform_notify) {
++ kgdbmpsc_update_pdata(&mpsc_dev);
++ platform_notify(&mpsc_dev.dev);
++ }
++
++ /* get the platform data for the shared registers. */
++ pdev = mv64x60_early_get_pdev_data(MPSC_SHARED_NAME, 0, 0);
++ if (pdev) {
++ memcpy(&shared_dev, pdev,
++ sizeof(struct platform_device));
++ if (platform_notify) {
++ kgdbmpsc_update_pdata(&shared_dev);
++ platform_notify(&shared_dev.dev);
++ }
++ }
++ }
++ return 0;
++}
++
++postcore_initcall(kgdbmpsc_pdev_init);
++
++static int __init kgdbmpsc_init_io(void)
++{
++
++ kgdbmpsc_pdev_init();
++
++ if (kgdbmpsc_local_init()) {
++ kgdbmpsc_local_exit();
++ return -1;
++ }
++
++ if (kgdbmpsc_init() == -1)
++ return -1;
++ return 0;
++}
++
++static void __init kgdbmpsc_hookup_irq(void)
++{
++ unsigned int msk;
++ if (!request_irq(mpsc_irq, kgdbmpsc_interrupt, 0, "kgdb mpsc", NULL)) {
++ /* Enable interrupt */
++ msk = readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++ msk |= MPSC_INTR_CAUSE_RCC;
++ writel(msk, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++
++ kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BASE_ORDER);
++ kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM,
++ MPSC_BRG_BASE_ORDER);
++ }
++}
++
++struct kgdb_io kgdb_io_ops = {
++ .read_char = kgdb_get_debug_char,
++ .write_char = kgdb_write_debug_char,
++ .init = kgdbmpsc_init_io,
++ .late_init = kgdbmpsc_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/pl011_kgdb.c linux-2.6.22-591/drivers/serial/pl011_kgdb.c
+--- linux-2.6.22-570/drivers/serial/pl011_kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/pl011_kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,111 @@
++/*
++ * driver/serial/pl011_kgdb.c
++ *
++ * Support for KGDB on ARM AMBA PL011 UARTs
++ *
++ * Authors: Manish Lachwani <mlachwani@mvista.com>
++ * Deepak Saxena <dsaxena@plexity.net>
++ *
++ * Copyright (c) 2005-2007 MontaVista Software, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether expressor implied.
++ *
++ */
++#include <linux/kgdb.h>
++#include <linux/amba/bus.h>
++#include <linux/amba/serial.h>
++
++#include <asm/io.h>
++#include <asm/processor.h>
++#include <asm/hardware.h>
++
++static int kgdb_irq = CONFIG_KGDB_AMBA_IRQ;
++
++#define UART_DIVISOR (CONFIG_KGDB_AMBA_UARTCLK * 4 / CONFIG_KGDB_BAUDRATE)
++/*
++ * Todo: IO_ADDRESS is not very generic across ARM...
++ */
++static volatile unsigned char *kgdb_port =
++ (unsigned char*)IO_ADDRESS(CONFIG_KGDB_AMBA_BASE);
++
++/*
++ * Init code taken from amba-pl011.c.
++ */
++static int kgdb_serial_init(void)
++{
++ writew(0, kgdb_port + UART010_CR);
++
++ /* Set baud rate */
++ writew(UART_DIVISOR & 0x3f, kgdb_port + UART011_FBRD);
++ writew(UART_DIVISOR >> 6, kgdb_port + UART011_IBRD);
++
++ writew(UART01x_LCRH_WLEN_8 | UART01x_LCRH_FEN, kgdb_port + UART010_LCRH);
++ writew(UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_RXE,
++ kgdb_port + UART010_CR);
++
++ writew(UART011_RXIM, kgdb_port + UART011_IMSC);
++
++ return 0;
++}
++
++static void kgdb_serial_putchar(u8 ch)
++{
++ unsigned int status;
++
++ do {
++ status = readw(kgdb_port + UART01x_FR);
++ } while (status & UART01x_FR_TXFF);
++
++ writew(ch, kgdb_port + UART01x_DR);
++}
++
++static int kgdb_serial_getchar(void)
++{
++ unsigned int status;
++ int ch;
++
++#ifdef CONFIG_DEBUG_LL
++ printascii("Entering serial_getchar loop");
++#endif
++ do {
++ status = readw(kgdb_port + UART01x_FR);
++ } while (status & UART01x_FR_RXFE);
++ ch = readw(kgdb_port + UART01x_DR);
++#ifdef CONFIG_DEBUG_LL
++ printascii("Exited serial_getchar loop");
++ printascii("Read char: ");
++ printch(ch);
++ printascii("\n");
++#endif
++ return ch;
++}
++
++static irqreturn_t kgdb_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++{
++ int status = readw(kgdb_port + UART011_MIS);
++
++#ifdef CONFIG_DEBUG_LL
++ printascii("KGDB irq\n");
++#endif
++ if (irq != kgdb_irq)
++ return IRQ_NONE;
++
++ if (status & 0x40)
++ breakpoint();
++
++ return IRQ_HANDLED;
++}
++
++static void __init kgdb_hookup_irq(void)
++{
++ request_irq(kgdb_irq, kgdb_interrupt, SA_SHIRQ, "KGDB-serial", kgdb_port);
++}
++
++struct kgdb_io kgdb_io_ops = {
++ .init = kgdb_serial_init,
++ .write_char = kgdb_serial_putchar,
++ .read_char = kgdb_serial_getchar,
++ .late_init = kgdb_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/pxa.c linux-2.6.22-591/drivers/serial/pxa.c
+--- linux-2.6.22-570/drivers/serial/pxa.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/pxa.c 2007-12-21 15:36:12.000000000 -0500
+@@ -42,6 +42,9 @@
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/serial_core.h>
++#ifdef CONFIG_KGDB_CONSOLE
++#include <linux/kgdb.h>
++#endif
+
+ #include <asm/io.h>
+ #include <asm/hardware.h>
+@@ -690,6 +693,8 @@
+ console_initcall(serial_pxa_console_init);
+
+ #define PXA_CONSOLE &serial_pxa_console
++#elif defined(CONFIG_KGDB_CONSOLE)
++#define PXA_CONSOLE &kgdbcons
+ #else
+ #define PXA_CONSOLE NULL
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_core.c linux-2.6.22-591/drivers/serial/serial_core.c
+--- linux-2.6.22-570/drivers/serial/serial_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/serial_core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
+ #include <linux/delay.h>
+ #include <linux/mutex.h>
++#include <linux/kgdb.h>
+
+ #include <asm/irq.h>
+ #include <asm/uaccess.h>
+@@ -58,6 +59,12 @@
+ #define uart_console(port) (0)
+ #endif
+
++#ifdef CONFIG_KGDB_CONSOLE
++#define uart_kgdb(port) (port->cons && !strcmp(port->cons->name, "kgdb"))
++#else
++#define uart_kgdb(port) (0)
++#endif
++
+ static void uart_change_speed(struct uart_state *state, struct ktermios *old_termios);
+ static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
+ static void uart_change_pm(struct uart_state *state, int pm_state);
+@@ -1671,6 +1678,9 @@
+ mmio ? "mmio:0x" : "port:",
+ mmio ? port->mapbase : (unsigned long) port->iobase,
+ port->irq);
++ if (port->iotype == UPIO_MEM)
++ ret += sprintf(buf+ret, " membase 0x%08lX",
++ (unsigned long) port->membase);
+
+ if (port->type == PORT_UNKNOWN) {
+ strcat(buf, "\n");
+@@ -2063,7 +2073,8 @@
+ case UPIO_TSI:
+ case UPIO_DWAPB:
+ snprintf(address, sizeof(address),
+- "MMIO 0x%lx", port->mapbase);
++ "MMIO map 0x%lx mem 0x%lx", port->mapbase,
++ (unsigned long) port->membase);
+ break;
+ default:
+ strlcpy(address, "*unknown*", sizeof(address));
+@@ -2118,9 +2129,9 @@
+
+ /*
+ * Power down all ports by default, except the
+- * console if we have one.
++ * console (real or kgdb) if we have one.
+ */
+- if (!uart_console(port))
++ if (!uart_console(port) && !uart_kgdb(port))
+ uart_change_pm(state, 3);
+ }
+ }
+@@ -2311,6 +2322,12 @@
+ */
+ port->flags &= ~UPF_DEAD;
+
++#if defined(CONFIG_KGDB_8250)
++ /* Add any 8250-like ports we find later. */
++ if (port->type <= PORT_MAX_8250)
++ kgdb8250_add_port(port->line, port);
++#endif
++
+ out:
+ mutex_unlock(&state->mutex);
+ mutex_unlock(&port_mutex);
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9.c linux-2.6.22-591/drivers/serial/serial_txx9.c
+--- linux-2.6.22-570/drivers/serial/serial_txx9.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/serial_txx9.c 2007-12-21 15:36:12.000000000 -0500
+@@ -40,6 +40,10 @@
+ static char *serial_version = "1.09";
+ static char *serial_name = "TX39/49 Serial driver";
+
++#ifndef CONFIG_KGDB_TXX9
++#define CONFIG_KGDB_PORT_NUM -1
++#endif
++
+ #define PASS_LIMIT 256
+
+ #if !defined(CONFIG_SERIAL_TXX9_STDSERIAL)
+@@ -471,6 +475,9 @@
+ unsigned long flags;
+ int retval;
+
++ if (up->port.line == CONFIG_KGDB_PORT_NUM)
++ return -EBUSY;
++
+ /*
+ * Clear the FIFO buffers and disable them.
+ * (they will be reenabled in set_termios())
+@@ -799,6 +806,9 @@
+ for (i = 0; i < UART_NR; i++) {
+ struct uart_txx9_port *up = &serial_txx9_ports[i];
+
++ if (up->port.line == CONFIG_KGDB_PORT_NUM)
++ continue;
++
+ up->port.line = i;
+ up->port.ops = &serial_txx9_pops;
+ up->port.dev = dev;
+@@ -967,6 +977,9 @@
+
+ mutex_lock(&serial_txx9_mutex);
+ for (i = 0; i < UART_NR; i++) {
++ if (i == CONFIG_KGDB_PORT_NUM)
++ continue;
++
+ uart = &serial_txx9_ports[i];
+ if (uart_match_port(&uart->port, port)) {
+ uart_remove_one_port(&serial_txx9_reg, &uart->port);
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c
+--- linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,150 @@
++/*
++ * drivers/serial/serial_txx9_kgdb.c
++ *
++ * kgdb interface for gdb
++ *
++ * Author: MontaVista Software, Inc.
++ * source@mvista.com
++ *
++ * Copyright (C) 2005-2006 MontaVista Software Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ */
++
++#include <linux/delay.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <asm/io.h>
++
++/* Speed of the UART. */
++static unsigned int kgdb_txx9_baud = CONFIG_KGDB_BAUDRATE;
++
++#define TXX9_NPORT 4 /* TX4939 has 4 UARTs, others only have 2 */
++
++static struct uart_port kgdb_txx9_ports[TXX9_NPORT];
++static struct uart_port *kgdb_port;
++
++/* TXX9 Serial Registers */
++#define TXX9_SILCR 0x00
++#define TXX9_SIDISR 0x08
++#define TXX9_SISCISR 0x0c
++#define TXX9_SIFCR 0x10
++#define TXX9_SIFLCR 0x14
++#define TXX9_SIBGR 0x18
++#define TXX9_SITFIFO 0x1c
++#define TXX9_SIRFIFO 0x20
++
++/* SILCR : Line Control */
++#define TXX9_SILCR_SCS_IMCLK_BG 0x00000020
++#define TXX9_SILCR_SCS_SCLK_BG 0x00000060
++#define TXX9_SILCR_USBL_1BIT 0x00000000
++#define TXX9_SILCR_UMODE_8BIT 0x00000000
++
++/* SIDISR : DMA/Int. Status */
++#define TXX9_SIDISR_RFDN_MASK 0x0000001f
++
++/* SISCISR : Status Change Int. Status */
++#define TXX9_SISCISR_TRDY 0x00000004
++
++/* SIFCR : FIFO Control */
++#define TXX9_SIFCR_SWRST 0x00008000
++
++/* SIBGR : Baud Rate Control */
++#define TXX9_SIBGR_BCLK_T0 0x00000000
++#define TXX9_SIBGR_BCLK_T2 0x00000100
++#define TXX9_SIBGR_BCLK_T4 0x00000200
++#define TXX9_SIBGR_BCLK_T6 0x00000300
++
++static inline unsigned int sio_in(struct uart_port *port, int offset)
++{
++ return *(volatile u32 *)(port->membase + offset);
++}
++
++static inline void sio_out(struct uart_port *port, int offset, unsigned int value)
++{
++ *(volatile u32 *)(port->membase + offset) = value;
++}
++
++void __init txx9_kgdb_add_port(int n, struct uart_port *port)
++{
++ memcpy(&kgdb_txx9_ports[n], port, sizeof(struct uart_port));
++}
++
++static int txx9_kgdb_init(void)
++{
++ unsigned int quot, sibgr;
++
++ kgdb_port = &kgdb_txx9_ports[CONFIG_KGDB_PORT_NUM];
++
++ if (kgdb_port->iotype != UPIO_MEM &&
++ kgdb_port->iotype != UPIO_MEM32)
++ return -1;
++
++ /* Reset the UART. */
++ sio_out(kgdb_port, TXX9_SIFCR, TXX9_SIFCR_SWRST);
++#ifdef CONFIG_CPU_TX49XX
++ /*
++ * TX4925 BUG WORKAROUND. Accessing SIOC register
++ * immediately after soft reset causes bus error.
++ */
++ iob();
++ udelay(1);
++#endif
++ /* Wait until reset is complete. */
++ while (sio_in(kgdb_port, TXX9_SIFCR) & TXX9_SIFCR_SWRST);
++
++ /* Select the frame format and input clock. */
++ sio_out(kgdb_port, TXX9_SILCR,
++ TXX9_SILCR_UMODE_8BIT | TXX9_SILCR_USBL_1BIT |
++ ((kgdb_port->flags & UPF_MAGIC_MULTIPLIER) ?
++ TXX9_SILCR_SCS_SCLK_BG : TXX9_SILCR_SCS_IMCLK_BG));
++
++ /* Select the input clock prescaler that fits the baud rate. */
++ quot = (kgdb_port->uartclk + 8 * kgdb_txx9_baud) / (16 * kgdb_txx9_baud);
++ if (quot < (256 << 1))
++ sibgr = (quot >> 1) | TXX9_SIBGR_BCLK_T0;
++ else if (quot < ( 256 << 3))
++ sibgr = (quot >> 3) | TXX9_SIBGR_BCLK_T2;
++ else if (quot < ( 256 << 5))
++ sibgr = (quot >> 5) | TXX9_SIBGR_BCLK_T4;
++ else if (quot < ( 256 << 7))
++ sibgr = (quot >> 7) | TXX9_SIBGR_BCLK_T6;
++ else
++ sibgr = 0xff | TXX9_SIBGR_BCLK_T6;
++
++ sio_out(kgdb_port, TXX9_SIBGR, sibgr);
++
++ /* Enable receiver and transmitter. */
++ sio_out(kgdb_port, TXX9_SIFLCR, 0);
++
++ return 0;
++}
++
++static void txx9_kgdb_late_init(void)
++{
++ request_mem_region(kgdb_port->mapbase, 0x40, "serial_txx9(debug)");
++}
++
++static int txx9_kgdb_read(void)
++{
++ while (!(sio_in(kgdb_port, TXX9_SIDISR) & TXX9_SIDISR_RFDN_MASK));
++
++ return sio_in(kgdb_port, TXX9_SIRFIFO);
++}
++
++static void txx9_kgdb_write(u8 ch)
++{
++ while (!(sio_in(kgdb_port, TXX9_SISCISR) & TXX9_SISCISR_TRDY));
++
++ sio_out(kgdb_port, TXX9_SITFIFO, ch);
++}
++
++struct kgdb_io kgdb_io_ops = {
++ .read_char = txx9_kgdb_read,
++ .write_char = txx9_kgdb_write,
++ .init = txx9_kgdb_init,
++ .late_init = txx9_kgdb_late_init
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/sh-sci.c linux-2.6.22-591/drivers/serial/sh-sci.c
+--- linux-2.6.22-570/drivers/serial/sh-sci.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/serial/sh-sci.c 2007-12-21 15:36:12.000000000 -0500
+@@ -118,7 +118,8 @@
+ do {
+ status = sci_in(port, SCxSR);
+ if (status & SCxSR_ERRORS(port)) {
+- handle_error(port);
++ /* Clear error flags. */
++ sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port));
+ continue;
+ }
+ } while (!(status & SCxSR_RDxF(port)));
+@@ -184,18 +185,18 @@
+ int h, l;
+
+ c = *p++;
+- h = highhex(c);
+- l = lowhex(c);
++ h = hexchars[c >> 4];
++ l = hexchars[c % 16];
+ put_char(port, h);
+ put_char(port, l);
+ checksum += h + l;
+ }
+ put_char(port, '#');
+- put_char(port, highhex(checksum));
+- put_char(port, lowhex(checksum));
++ put_char(port, hexchars[checksum >> 4]);
++ put_char(port, hexchars[checksum & 16]);
+ } while (get_char(port) != '+');
+ } else
+-#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */
++#endif /* CONFIG_SH_STANDARD_BIOS */
+ for (i=0; i<count; i++) {
+ if (*p == 10)
+ put_char(port, '\r');
+@@ -547,6 +548,16 @@
+ continue;
+ }
+
++#ifdef CONFIG_KGDB_SH_SCI
++ /* We assume that a ^C on the port KGDB
++ * is using means that KGDB wants to
++ * interrupt the running system.
++ */
++ if (port->line == KGDBPORT.port.line &&
++ c == 3)
++ breakpoint();
++#endif
++
+ /* Store data and status */
+ if (status&SCxSR_FER(port)) {
+ flag = TTY_FRAME;
+@@ -1279,6 +1290,7 @@
+ console_initcall(sci_console_init);
+ #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */
+
++#if 0
+ #ifdef CONFIG_SH_KGDB
+ /*
+ * FIXME: Most of this can go away.. at the moment, we rely on
+diff -Nurb linux-2.6.22-570/drivers/spi/at25.c linux-2.6.22-591/drivers/spi/at25.c
+--- linux-2.6.22-570/drivers/spi/at25.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/spi/at25.c 2007-12-21 15:36:12.000000000 -0500
+@@ -111,7 +111,8 @@
+ }
+
+ static ssize_t
+-at25_bin_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct device *dev;
+ struct at25_data *at25;
+@@ -236,7 +237,8 @@
+ }
+
+ static ssize_t
+-at25_bin_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct device *dev;
+ struct at25_data *at25;
+@@ -314,7 +316,6 @@
+ */
+ at25->bin.attr.name = "eeprom";
+ at25->bin.attr.mode = S_IRUSR;
+- at25->bin.attr.owner = THIS_MODULE;
+ at25->bin.read = at25_bin_read;
+
+ at25->bin.size = at25->chip.byte_len;
+diff -Nurb linux-2.6.22-570/drivers/usb/atm/cxacru.c linux-2.6.22-591/drivers/usb/atm/cxacru.c
+--- linux-2.6.22-570/drivers/usb/atm/cxacru.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/atm/cxacru.c 2007-12-21 15:36:14.000000000 -0500
+@@ -171,7 +171,7 @@
+ struct delayed_work poll_work;
+ u32 card_info[CXINF_MAX];
+ struct mutex poll_state_serialize;
+- int poll_state;
++ enum cxacru_poll_state poll_state;
+
+ /* contol handles */
+ struct mutex cm_serialize;
+@@ -226,58 +226,48 @@
+
+ static ssize_t cxacru_sysfs_showattr_dB(s16 value, char *buf)
+ {
+- if (unlikely(value < 0)) {
+ return snprintf(buf, PAGE_SIZE, "%d.%02u\n",
+- value / 100, -value % 100);
+- } else {
+- return snprintf(buf, PAGE_SIZE, "%d.%02u\n",
+- value / 100, value % 100);
+- }
++ value / 100, abs(value) % 100);
+ }
+
+ static ssize_t cxacru_sysfs_showattr_bool(u32 value, char *buf)
+ {
+- switch (value) {
+- case 0: return snprintf(buf, PAGE_SIZE, "no\n");
+- case 1: return snprintf(buf, PAGE_SIZE, "yes\n");
+- default: return 0;
+- }
++ static char *str[] = { "no", "yes" };
++ if (unlikely(value >= ARRAY_SIZE(str)))
++ return snprintf(buf, PAGE_SIZE, "%u\n", value);
++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ }
+
+ static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf)
+ {
+- switch (value) {
+- case 1: return snprintf(buf, PAGE_SIZE, "not connected\n");
+- case 2: return snprintf(buf, PAGE_SIZE, "connected\n");
+- case 3: return snprintf(buf, PAGE_SIZE, "lost\n");
+- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value);
+- }
++ static char *str[] = { NULL, "not connected", "connected", "lost" };
++ if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL))
++ return snprintf(buf, PAGE_SIZE, "%u\n", value);
++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ }
+
+ static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf)
+ {
+- switch (value) {
+- case 0: return snprintf(buf, PAGE_SIZE, "down\n");
+- case 1: return snprintf(buf, PAGE_SIZE, "attempting to activate\n");
+- case 2: return snprintf(buf, PAGE_SIZE, "training\n");
+- case 3: return snprintf(buf, PAGE_SIZE, "channel analysis\n");
+- case 4: return snprintf(buf, PAGE_SIZE, "exchange\n");
+- case 5: return snprintf(buf, PAGE_SIZE, "up\n");
+- case 6: return snprintf(buf, PAGE_SIZE, "waiting\n");
+- case 7: return snprintf(buf, PAGE_SIZE, "initialising\n");
+- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value);
+- }
++ static char *str[] = { "down", "attempting to activate",
++ "training", "channel analysis", "exchange", "up",
++ "waiting", "initialising"
++ };
++ if (unlikely(value >= ARRAY_SIZE(str)))
++ return snprintf(buf, PAGE_SIZE, "%u\n", value);
++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ }
+
+ static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf)
+ {
+- switch (value) {
+- case 0: return 0;
+- case 1: return snprintf(buf, PAGE_SIZE, "ANSI T1.413\n");
+- case 2: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.1 (G.DMT)\n");
+- case 3: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.2 (G.LITE)\n");
+- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value);
+- }
++ static char *str[] = {
++ NULL,
++ "ANSI T1.413",
++ "ITU-T G.992.1 (G.DMT)",
++ "ITU-T G.992.2 (G.LITE)"
++ };
++ if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL))
++ return snprintf(buf, PAGE_SIZE, "%u\n", value);
++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ }
+
+ /*
+@@ -308,11 +298,10 @@
+ struct cxacru_data *instance = usbatm_instance->driver_data;
+ u32 value = instance->card_info[CXINF_LINE_STARTABLE];
+
+- switch (value) {
+- case 0: return snprintf(buf, PAGE_SIZE, "running\n");
+- case 1: return snprintf(buf, PAGE_SIZE, "stopped\n");
+- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value);
+- }
++ static char *str[] = { "running", "stopped" };
++ if (unlikely(value >= ARRAY_SIZE(str)))
++ return snprintf(buf, PAGE_SIZE, "%u\n", value);
++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ }
+
+ static ssize_t cxacru_sysfs_store_adsl_state(struct device *dev,
+diff -Nurb linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c
+--- linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1168,6 +1168,7 @@
+ struct uea_softc *sc = data;
+ int ret = -EAGAIN;
+
++ set_freezable();
+ uea_enters(INS_TO_USBDEV(sc));
+ while (!kthread_should_stop()) {
+ if (ret < 0 || sc->reset)
+diff -Nurb linux-2.6.22-570/drivers/usb/core/config.c linux-2.6.22-591/drivers/usb/core/config.c
+--- linux-2.6.22-570/drivers/usb/core/config.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/core/config.c 2007-12-21 15:36:14.000000000 -0500
+@@ -274,6 +274,7 @@
+ struct usb_descriptor_header *header;
+ int len, retval;
+ u8 inums[USB_MAXINTERFACES], nalts[USB_MAXINTERFACES];
++ unsigned iad_num = 0;
+
+ memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
+ if (config->desc.bDescriptorType != USB_DT_CONFIG ||
+@@ -351,6 +352,20 @@
+ ++n;
+ }
+
++ } else if (header->bDescriptorType ==
++ USB_DT_INTERFACE_ASSOCIATION) {
++ if (iad_num == USB_MAXIADS) {
++ dev_warn(ddev, "found more Interface "
++ "Association Descriptors "
++ "than allocated for in "
++ "configuration %d\n", cfgno);
++ } else {
++ config->intf_assoc[iad_num] =
++ (struct usb_interface_assoc_descriptor
++ *)header;
++ iad_num++;
++ }
++
+ } else if (header->bDescriptorType == USB_DT_DEVICE ||
+ header->bDescriptorType == USB_DT_CONFIG)
+ dev_warn(ddev, "config %d contains an unexpected "
+diff -Nurb linux-2.6.22-570/drivers/usb/core/devices.c linux-2.6.22-591/drivers/usb/core/devices.c
+--- linux-2.6.22-570/drivers/usb/core/devices.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/core/devices.c 2007-12-21 15:36:14.000000000 -0500
+@@ -102,6 +102,10 @@
+ /* C: #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA */
+ "C:%c #Ifs=%2d Cfg#=%2d Atr=%02x MxPwr=%3dmA\n";
+
++static const char *format_iad =
++/* A: FirstIf#=dd IfCount=dd Cls=xx(sssss) Sub=xx Prot=xx */
++ "A: FirstIf#=%2d IfCount=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x\n";
++
+ static const char *format_iface =
+ /* I: If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=xxxx*/
+ "I:%c If#=%2d Alt=%2d #EPs=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x Driver=%s\n";
+@@ -146,6 +150,7 @@
+ {USB_CLASS_STILL_IMAGE, "still"},
+ {USB_CLASS_CSCID, "scard"},
+ {USB_CLASS_CONTENT_SEC, "c-sec"},
++ {USB_CLASS_VIDEO, "video"},
+ {-1, "unk."} /* leave as last */
+ };
+
+@@ -286,6 +291,21 @@
+ return start;
+ }
+
++static char *usb_dump_iad_descriptor(char *start, char *end,
++ const struct usb_interface_assoc_descriptor *iad)
++{
++ if (start > end)
++ return start;
++ start += sprintf(start, format_iad,
++ iad->bFirstInterface,
++ iad->bInterfaceCount,
++ iad->bFunctionClass,
++ class_decode(iad->bFunctionClass),
++ iad->bFunctionSubClass,
++ iad->bFunctionProtocol);
++ return start;
++}
++
+ /* TBD:
+ * 0. TBDs
+ * 1. marking active interface altsettings (code lists all, but should mark
+@@ -322,6 +342,12 @@
+ if (!config) /* getting these some in 2.3.7; none in 2.3.6 */
+ return start + sprintf(start, "(null Cfg. desc.)\n");
+ start = usb_dump_config_descriptor(start, end, &config->desc, active);
++ for (i = 0; i < USB_MAXIADS; i++) {
++ if (config->intf_assoc[i] == NULL)
++ break;
++ start = usb_dump_iad_descriptor(start, end,
++ config->intf_assoc[i]);
++ }
+ for (i = 0; i < config->desc.bNumInterfaces; i++) {
+ intfc = config->intf_cache[i];
+ interface = config->interface[i];
+diff -Nurb linux-2.6.22-570/drivers/usb/core/hub.c linux-2.6.22-591/drivers/usb/core/hub.c
+--- linux-2.6.22-570/drivers/usb/core/hub.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/usb/core/hub.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2831,6 +2831,7 @@
+
+ static int hub_thread(void *__unused)
+ {
++ set_freezable();
+ do {
+ hub_events();
+ wait_event_interruptible(khubd_wait,
+diff -Nurb linux-2.6.22-570/drivers/usb/core/message.c linux-2.6.22-591/drivers/usb/core/message.c
+--- linux-2.6.22-570/drivers/usb/core/message.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/drivers/usb/core/message.c 2007-12-21 15:36:14.000000000 -0500
+@@ -1409,6 +1409,36 @@
+ .uevent = usb_if_uevent,
+ };
+
++static struct usb_interface_assoc_descriptor *find_iad(struct usb_device *dev,
++ struct usb_host_config *config,
++ u8 inum)
++{
++ struct usb_interface_assoc_descriptor *retval = NULL;
++ struct usb_interface_assoc_descriptor *intf_assoc;
++ int first_intf;
++ int last_intf;
++ int i;
++
++ for (i = 0; (i < USB_MAXIADS && config->intf_assoc[i]); i++) {
++ intf_assoc = config->intf_assoc[i];
++ if (intf_assoc->bInterfaceCount == 0)
++ continue;
++
++ first_intf = intf_assoc->bFirstInterface;
++ last_intf = first_intf + (intf_assoc->bInterfaceCount - 1);
++ if (inum >= first_intf && inum <= last_intf) {
++ if (!retval)
++ retval = intf_assoc;
++ else
++ dev_err(&dev->dev, "Interface #%d referenced"
++ " by multiple IADs\n", inum);
++ }
++ }
++
++ return retval;
++}
++
++
+ /*
+ * usb_set_configuration - Makes a particular device setting be current
+ * @dev: the device whose configuration is being updated
+@@ -1555,6 +1585,7 @@
+ intfc = cp->intf_cache[i];
+ intf->altsetting = intfc->altsetting;
+ intf->num_altsetting = intfc->num_altsetting;
++ intf->intf_assoc = find_iad(dev, cp, i);
+ kref_get(&intfc->ref);
+
+ alt = usb_altnum_to_altsetting(intf, 0);
+diff -Nurb linux-2.6.22-570/drivers/usb/core/sysfs.c linux-2.6.22-591/drivers/usb/core/sysfs.c
+--- linux-2.6.22-570/drivers/usb/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/core/sysfs.c 2007-12-21 15:36:14.000000000 -0500
+@@ -424,6 +424,25 @@
+ sysfs_remove_group(&dev->kobj, &dev_attr_grp);
+ }
+
++/* Interface Accociation Descriptor fields */
++#define usb_intf_assoc_attr(field, format_string) \
++static ssize_t \
++show_iad_##field (struct device *dev, struct device_attribute *attr, \
++ char *buf) \
++{ \
++ struct usb_interface *intf = to_usb_interface (dev); \
++ \
++ return sprintf (buf, format_string, \
++ intf->intf_assoc->field); \
++} \
++static DEVICE_ATTR(iad_##field, S_IRUGO, show_iad_##field, NULL);
++
++usb_intf_assoc_attr (bFirstInterface, "%02x\n")
++usb_intf_assoc_attr (bInterfaceCount, "%02d\n")
++usb_intf_assoc_attr (bFunctionClass, "%02x\n")
++usb_intf_assoc_attr (bFunctionSubClass, "%02x\n")
++usb_intf_assoc_attr (bFunctionProtocol, "%02x\n")
++
+ /* Interface fields */
+ #define usb_intf_attr(field, format_string) \
+ static ssize_t \
+@@ -487,6 +506,18 @@
+ }
+ static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL);
+
++static struct attribute *intf_assoc_attrs[] = {
++ &dev_attr_iad_bFirstInterface.attr,
++ &dev_attr_iad_bInterfaceCount.attr,
++ &dev_attr_iad_bFunctionClass.attr,
++ &dev_attr_iad_bFunctionSubClass.attr,
++ &dev_attr_iad_bFunctionProtocol.attr,
++ NULL,
++};
++static struct attribute_group intf_assoc_attr_grp = {
++ .attrs = intf_assoc_attrs,
++};
++
+ static struct attribute *intf_attrs[] = {
+ &dev_attr_bInterfaceNumber.attr,
+ &dev_attr_bAlternateSetting.attr,
+@@ -538,6 +569,8 @@
+ alt->string = usb_cache_string(udev, alt->desc.iInterface);
+ if (alt->string)
+ retval = device_create_file(dev, &dev_attr_interface);
++ if (intf->intf_assoc)
++ retval = sysfs_create_group(&dev->kobj, &intf_assoc_attr_grp);
+ usb_create_intf_ep_files(intf, udev);
+ return 0;
+ }
+@@ -549,4 +582,5 @@
+ usb_remove_intf_ep_files(intf);
+ device_remove_file(dev, &dev_attr_interface);
+ sysfs_remove_group(&dev->kobj, &intf_attr_grp);
++ sysfs_remove_group(&intf->dev.kobj, &intf_assoc_attr_grp);
+ }
+diff -Nurb linux-2.6.22-570/drivers/usb/gadget/file_storage.c linux-2.6.22-591/drivers/usb/gadget/file_storage.c
+--- linux-2.6.22-570/drivers/usb/gadget/file_storage.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/gadget/file_storage.c 2007-12-21 15:36:12.000000000 -0500
+@@ -3434,6 +3434,9 @@
+ allow_signal(SIGKILL);
+ allow_signal(SIGUSR1);
+
++ /* Allow the thread to be frozen */
++ set_freezable();
++
+ /* Arrange for userspace references to be interpreted as kernel
+ * pointers. That way we can pass a kernel pointer to a routine
+ * that expects a __user pointer and it will work okay. */
+diff -Nurb linux-2.6.22-570/drivers/usb/storage/usb.c linux-2.6.22-591/drivers/usb/storage/usb.c
+--- linux-2.6.22-570/drivers/usb/storage/usb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/usb/storage/usb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -301,8 +301,6 @@
+ struct us_data *us = (struct us_data *)__us;
+ struct Scsi_Host *host = us_to_host(us);
+
+- current->flags |= PF_NOFREEZE;
+-
+ for(;;) {
+ US_DEBUGP("*** thread sleeping.\n");
+ if(down_interruptible(&us->sema))
+@@ -909,6 +907,7 @@
+ printk(KERN_DEBUG
+ "usb-storage: device found at %d\n", us->pusb_dev->devnum);
+
++ set_freezable();
+ /* Wait for the timeout to expire or for a disconnect */
+ if (delay_use > 0) {
+ printk(KERN_DEBUG "usb-storage: waiting for device "
+diff -Nurb linux-2.6.22-570/drivers/video/Kconfig linux-2.6.22-591/drivers/video/Kconfig
+--- linux-2.6.22-570/drivers/video/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -12,6 +12,13 @@
+ tristate
+ default n
+
++config VIDEO_OUTPUT_CONTROL
++ tristate "Lowlevel video output switch controls"
++ default m
++ help
++ This framework adds support for low-level control of the video
++ output switch.
++
+ config FB
+ tristate "Support for frame buffer devices"
+ ---help---
+diff -Nurb linux-2.6.22-570/drivers/video/Makefile linux-2.6.22-591/drivers/video/Makefile
+--- linux-2.6.22-570/drivers/video/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -122,3 +122,6 @@
+
+ # the test framebuffer is last
+ obj-$(CONFIG_FB_VIRTUAL) += vfb.o
++
++#video output switch sysfs driver
++obj-$(CONFIG_VIDEO_OUTPUT_CONTROL) += output.o
+diff -Nurb linux-2.6.22-570/drivers/video/aty/radeon_base.c linux-2.6.22-591/drivers/video/aty/radeon_base.c
+--- linux-2.6.22-570/drivers/video/aty/radeon_base.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/aty/radeon_base.c 2007-12-21 15:36:12.000000000 -0500
+@@ -2102,7 +2102,9 @@
+ }
+
+
+-static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t radeon_show_edid1(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+@@ -2113,7 +2115,9 @@
+ }
+
+
+-static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t radeon_show_edid2(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+@@ -2126,7 +2130,6 @@
+ static struct bin_attribute edid1_attr = {
+ .attr = {
+ .name = "edid1",
+- .owner = THIS_MODULE,
+ .mode = 0444,
+ },
+ .size = EDID_LENGTH,
+@@ -2136,7 +2139,6 @@
+ static struct bin_attribute edid2_attr = {
+ .attr = {
+ .name = "edid2",
+- .owner = THIS_MODULE,
+ .mode = 0444,
+ },
+ .size = EDID_LENGTH,
+diff -Nurb linux-2.6.22-570/drivers/video/backlight/backlight.c linux-2.6.22-591/drivers/video/backlight/backlight.c
+--- linux-2.6.22-570/drivers/video/backlight/backlight.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/backlight/backlight.c 2007-12-21 15:36:12.000000000 -0500
+@@ -172,7 +172,7 @@
+
+ #define DECLARE_ATTR(_name,_mode,_show,_store) \
+ { \
+- .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \
++ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ }
+diff -Nurb linux-2.6.22-570/drivers/video/backlight/lcd.c linux-2.6.22-591/drivers/video/backlight/lcd.c
+--- linux-2.6.22-570/drivers/video/backlight/lcd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/backlight/lcd.c 2007-12-21 15:36:12.000000000 -0500
+@@ -157,7 +157,7 @@
+
+ #define DECLARE_ATTR(_name,_mode,_show,_store) \
+ { \
+- .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \
++ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ }
+diff -Nurb linux-2.6.22-570/drivers/video/ps3fb.c linux-2.6.22-591/drivers/video/ps3fb.c
+--- linux-2.6.22-570/drivers/video/ps3fb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/video/ps3fb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -812,6 +812,7 @@
+
+ static int ps3fbd(void *arg)
+ {
++ set_freezable();
+ while (!kthread_should_stop()) {
+ try_to_freeze();
+ set_current_state(TASK_INTERRUPTIBLE);
+diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c
+--- linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c 2007-12-21 15:36:12.000000000 -0500
+@@ -91,8 +91,9 @@
+ }
+ #endif /* CONFIG_W1_SLAVE_DS2433_CRC */
+
+-static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++static ssize_t w1_f23_read_bin(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ #ifdef CONFIG_W1_SLAVE_DS2433_CRC
+@@ -199,8 +200,9 @@
+ return 0;
+ }
+
+-static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++static ssize_t w1_f23_write_bin(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ int addr, len, idx;
+@@ -252,7 +254,6 @@
+ .attr = {
+ .name = "eeprom",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = W1_EEPROM_SIZE,
+ .read = w1_f23_read_bin,
+diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_therm.c linux-2.6.22-591/drivers/w1/slaves/w1_therm.c
+--- linux-2.6.22-570/drivers/w1/slaves/w1_therm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/w1/slaves/w1_therm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -42,13 +42,13 @@
+ {}
+ };
+
+-static ssize_t w1_therm_read_bin(struct kobject *, char *, loff_t, size_t);
++static ssize_t w1_therm_read_bin(struct kobject *, struct bin_attribute *,
++ char *, loff_t, size_t);
+
+ static struct bin_attribute w1_therm_bin_attr = {
+ .attr = {
+ .name = "w1_slave",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = W1_SLAVE_DATA_SIZE,
+ .read = w1_therm_read_bin,
+@@ -159,7 +159,9 @@
+ return 0;
+ }
+
+-static ssize_t w1_therm_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_therm_read_bin(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ struct w1_master *dev = sl->master;
+diff -Nurb linux-2.6.22-570/drivers/w1/w1.c linux-2.6.22-591/drivers/w1/w1.c
+--- linux-2.6.22-570/drivers/w1/w1.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/w1/w1.c 2007-12-21 15:36:12.000000000 -0500
+@@ -105,7 +105,9 @@
+ return sprintf(buf, "%s\n", sl->name);
+ }
+
+-static ssize_t w1_slave_read_id(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_slave_read_id(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+
+@@ -128,7 +130,6 @@
+ .attr = {
+ .name = "id",
+ .mode = S_IRUGO,
+- .owner = THIS_MODULE,
+ },
+ .size = 8,
+ .read = w1_slave_read_id,
+@@ -136,7 +137,9 @@
+
+ /* Default family */
+
+-static ssize_t w1_default_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_default_write(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+
+@@ -153,7 +156,9 @@
+ return count;
+ }
+
+-static ssize_t w1_default_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_default_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct w1_slave *sl = kobj_to_w1_slave(kobj);
+
+@@ -167,7 +172,6 @@
+ .attr = {
+ .name = "rw",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE,
+ },
+ .size = PAGE_SIZE,
+ .read = w1_default_read,
+@@ -801,6 +805,7 @@
+ struct w1_master *dev, *n;
+ int have_to_wait = 0;
+
++ set_freezable();
+ while (!kthread_should_stop() || have_to_wait) {
+ have_to_wait = 0;
+
+diff -Nurb linux-2.6.22-570/drivers/zorro/zorro-sysfs.c linux-2.6.22-591/drivers/zorro/zorro-sysfs.c
+--- linux-2.6.22-570/drivers/zorro/zorro-sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/drivers/zorro/zorro-sysfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -49,8 +49,9 @@
+
+ static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL);
+
+-static ssize_t zorro_read_config(struct kobject *kobj, char *buf, loff_t off,
+- size_t count)
++static ssize_t zorro_read_config(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct zorro_dev *z = to_zorro_dev(container_of(kobj, struct device,
+ kobj));
+@@ -78,7 +79,6 @@
+ .attr = {
+ .name = "config",
+ .mode = S_IRUGO | S_IWUSR,
+- .owner = THIS_MODULE
+ },
+ .size = sizeof(struct ConfigDev),
+ .read = zorro_read_config,
+diff -Nurb linux-2.6.22-570/ed linux-2.6.22-591/ed
+--- linux-2.6.22-570/ed 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/ed 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,6 @@
++vi -o ./fs/proc/proc_misc.c ./fs/proc/proc_misc.c.rej
++vi -o ./fs/proc/array.c ./fs/proc/array.c.rej
++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej
++vi -o ./kernel/time/timekeeping.c ./kernel/time/timekeeping.c.rej
++vi -o ./kernel/timer.c ./kernel/timer.c.rej
++vi -o ./kernel/fork.c ./kernel/fork.c.rej
+diff -Nurb linux-2.6.22-570/edit linux-2.6.22-591/edit
+--- linux-2.6.22-570/edit 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/edit 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,19 @@
++vi -o ./fs/proc/root.c ./fs/proc/root.c.rej
++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej
++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej
++vi -o ./include/net/inet_timewait_sock.h ./include/net/inet_timewait_sock.h.rej
++vi -o ./include/net/route.h ./include/net/route.h.rej
++vi -o ./include/net/sock.h ./include/net/sock.h.rej
++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej
++vi -o ./lib/Makefile ./lib/Makefile.rej
++vi -o ./net/core/dev.c ./net/core/dev.c.rej
++vi -o ./net/core/rtnetlink.c ./net/core/rtnetlink.c.rej
++vi -o ./net/core/sock.c ./net/core/sock.c.rej
++vi -o ./net/ipv4/af_inet.c ./net/ipv4/af_inet.c.rej
++vi -o ./net/ipv4/inet_connection_sock.c ./net/ipv4/inet_connection_sock.c.rej
++vi -o ./net/ipv4/inet_hashtables.c ./net/ipv4/inet_hashtables.c.rej
++vi -o ./net/ipv4/raw.c ./net/ipv4/raw.c.rej
++vi -o ./net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c.rej
++vi -o ./net/ipv4/udp.c ./net/ipv4/udp.c.rej
++vi -o ./net/ipv6/addrconf.c ./net/ipv6/addrconf.c.rej
++vi -o ./net/unix/af_unix.c ./net/unix/af_unix.c.rej
+diff -Nurb linux-2.6.22-570/fs/Kconfig linux-2.6.22-591/fs/Kconfig
+--- linux-2.6.22-570/fs/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -1030,6 +1030,41 @@
+
+ endmenu
+
++menu "Layered filesystems"
++
++config ECRYPT_FS
++ tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
++ depends on EXPERIMENTAL && KEYS && CRYPTO && NET
++ help
++ Encrypted filesystem that operates on the VFS layer. See
++ <file:Documentation/ecryptfs.txt> to learn more about
++ eCryptfs. Userspace components are required and can be
++ obtained from <http://ecryptfs.sf.net>.
++
++ To compile this file system support as a module, choose M here: the
++ module will be called ecryptfs.
++
++config UNION_FS
++ tristate "Union file system (EXPERIMENTAL)"
++ depends on EXPERIMENTAL
++ help
++ Unionfs is a stackable unification file system, which appears to
++ merge the contents of several directories (branches), while keeping
++ their physical content separate.
++
++ See <http://unionfs.filesystems.org> for details
++
++config UNION_FS_XATTR
++ bool "Unionfs extended attributes"
++ depends on UNION_FS
++ help
++ Extended attributes are name:value pairs associated with inodes by
++ the kernel or by users (see the attr(5) manual page).
++
++ If unsure, say N.
++
++endmenu
++
+ menu "Miscellaneous filesystems"
+
+ config ADFS_FS
+@@ -1082,18 +1117,6 @@
+ To compile this file system support as a module, choose M here: the
+ module will be called affs. If unsure, say N.
+
+-config ECRYPT_FS
+- tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
+- depends on EXPERIMENTAL && KEYS && CRYPTO && NET
+- help
+- Encrypted filesystem that operates on the VFS layer. See
+- <file:Documentation/ecryptfs.txt> to learn more about
+- eCryptfs. Userspace components are required and can be
+- obtained from <http://ecryptfs.sf.net>.
+-
+- To compile this file system support as a module, choose M here: the
+- module will be called ecryptfs.
+-
+ config HFS_FS
+ tristate "Apple Macintosh file system support (EXPERIMENTAL)"
+ depends on BLOCK && EXPERIMENTAL
+diff -Nurb linux-2.6.22-570/fs/Makefile linux-2.6.22-591/fs/Makefile
+--- linux-2.6.22-570/fs/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -19,6 +19,7 @@
+ obj-y += no-block.o
+ endif
+
++obj-$(CONFIG_MMU) += revoke.o revoked_inode.o
+ obj-$(CONFIG_INOTIFY) += inotify.o
+ obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
+ obj-$(CONFIG_EPOLL) += eventpoll.o
+@@ -118,3 +119,4 @@
+ obj-$(CONFIG_DEBUG_FS) += debugfs/
+ obj-$(CONFIG_OCFS2_FS) += ocfs2/
+ obj-$(CONFIG_GFS2_FS) += gfs2/
++obj-$(CONFIG_UNION_FS) += unionfs/
+diff -Nurb linux-2.6.22-570/fs/afs/netdevices.c linux-2.6.22-591/fs/afs/netdevices.c
+--- linux-2.6.22-570/fs/afs/netdevices.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/afs/netdevices.c 2007-12-21 15:36:14.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/inetdevice.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
++#include <net/net_namespace.h>
+ #include "internal.h"
+
+ /*
+@@ -23,7 +24,7 @@
+ BUG();
+
+ rtnl_lock();
+- dev = __dev_getfirstbyhwtype(ARPHRD_ETHER);
++ dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
+ if (dev) {
+ memcpy(mac, dev->dev_addr, maclen);
+ ret = 0;
+@@ -47,7 +48,7 @@
+ ASSERT(maxbufs > 0);
+
+ rtnl_lock();
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
+ continue;
+ idev = __in_dev_get_rtnl(dev);
+diff -Nurb linux-2.6.22-570/fs/buffer.c linux-2.6.22-591/fs/buffer.c
+--- linux-2.6.22-570/fs/buffer.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/buffer.c 2007-12-21 15:36:12.000000000 -0500
+@@ -982,7 +982,7 @@
+ struct buffer_head *bh;
+
+ page = find_or_create_page(inode->i_mapping, index,
+- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
++ (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+ if (!page)
+ return NULL;
+
+@@ -2899,7 +2899,8 @@
+
+ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
+ {
+- struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
++ struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
++ set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
+ if (ret) {
+ INIT_LIST_HEAD(&ret->b_assoc_buffers);
+ get_cpu_var(bh_accounting).nr++;
+diff -Nurb linux-2.6.22-570/fs/cifs/cifsfs.c linux-2.6.22-591/fs/cifs/cifsfs.c
+--- linux-2.6.22-570/fs/cifs/cifsfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/cifs/cifsfs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -849,6 +849,7 @@
+ __u16 netfid;
+ int rc;
+
++ set_freezable();
+ do {
+ if (try_to_freeze())
+ continue;
+diff -Nurb linux-2.6.22-570/fs/cifs/connect.c linux-2.6.22-591/fs/cifs/connect.c
+--- linux-2.6.22-570/fs/cifs/connect.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/cifs/connect.c 2007-12-21 15:36:12.000000000 -0500
+@@ -363,6 +363,7 @@
+ GFP_KERNEL);
+ }
+
++ set_freezable();
+ while (!kthread_should_stop()) {
+ if (try_to_freeze())
+ continue;
+diff -Nurb linux-2.6.22-570/fs/compat_ioctl.c linux-2.6.22-591/fs/compat_ioctl.c
+--- linux-2.6.22-570/fs/compat_ioctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/compat_ioctl.c 2007-12-21 15:36:14.000000000 -0500
+@@ -319,22 +319,21 @@
+
+ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
+ {
+- struct net_device *dev;
+- struct ifreq32 ifr32;
++ struct ifreq __user *uifr;
+ int err;
+
+- if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32)))
++ uifr = compat_alloc_user_space(sizeof(struct ifreq));
++ if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)));
+ return -EFAULT;
+
+- dev = dev_get_by_index(ifr32.ifr_ifindex);
+- if (!dev)
+- return -ENODEV;
++ err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
++ if (err)
++ return err;
+
+- strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name));
+- dev_put(dev);
++ if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
++ return -EFAULT;
+
+- err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32));
+- return (err ? -EFAULT : 0);
++ return 0;
+ }
+
+ static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
+diff -Nurb linux-2.6.22-570/fs/configfs/configfs_internal.h linux-2.6.22-591/fs/configfs/configfs_internal.h
+--- linux-2.6.22-570/fs/configfs/configfs_internal.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/configfs/configfs_internal.h 2007-12-21 15:36:12.000000000 -0500
+@@ -29,6 +29,7 @@
+
+ struct configfs_dirent {
+ atomic_t s_count;
++ int s_dependent_count;
+ struct list_head s_sibling;
+ struct list_head s_children;
+ struct list_head s_links;
+diff -Nurb linux-2.6.22-570/fs/configfs/dir.c linux-2.6.22-591/fs/configfs/dir.c
+--- linux-2.6.22-570/fs/configfs/dir.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/configfs/dir.c 2007-12-21 15:36:12.000000000 -0500
+@@ -355,6 +355,10 @@
+ /* Mark that we've taken i_mutex */
+ sd->s_type |= CONFIGFS_USET_DROPPING;
+
++ /*
++ * Yup, recursive. If there's a problem, blame
++ * deep nesting of default_groups
++ */
+ ret = configfs_detach_prep(sd->s_dentry);
+ if (!ret)
+ continue;
+@@ -714,6 +718,28 @@
+ }
+
+ /*
++ * After the item has been detached from the filesystem view, we are
++ * ready to tear it out of the hierarchy. Notify the client before
++ * we do that so they can perform any cleanup that requires
++ * navigating the hierarchy. A client does not need to provide this
++ * callback. The subsystem semaphore MUST be held by the caller, and
++ * references must be valid for both items. It also assumes the
++ * caller has validated ci_type.
++ */
++static void client_disconnect_notify(struct config_item *parent_item,
++ struct config_item *item)
++{
++ struct config_item_type *type;
++
++ type = parent_item->ci_type;
++ BUG_ON(!type);
++
++ if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
++ type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
++ item);
++}
++
++/*
+ * Drop the initial reference from make_item()/make_group()
+ * This function assumes that reference is held on item
+ * and that item holds a valid reference to the parent. Also, it
+@@ -738,6 +764,239 @@
+ config_item_put(item);
+ }
+
++#ifdef DEBUG
++static void configfs_dump_one(struct configfs_dirent *sd, int level)
++{
++ printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
++
++#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
++ type_print(CONFIGFS_ROOT);
++ type_print(CONFIGFS_DIR);
++ type_print(CONFIGFS_ITEM_ATTR);
++ type_print(CONFIGFS_ITEM_LINK);
++ type_print(CONFIGFS_USET_DIR);
++ type_print(CONFIGFS_USET_DEFAULT);
++ type_print(CONFIGFS_USET_DROPPING);
++#undef type_print
++}
++
++static int configfs_dump(struct configfs_dirent *sd, int level)
++{
++ struct configfs_dirent *child_sd;
++ int ret = 0;
++
++ configfs_dump_one(sd, level);
++
++ if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
++ return 0;
++
++ list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
++ ret = configfs_dump(child_sd, level + 2);
++ if (ret)
++ break;
++ }
++
++ return ret;
++}
++#endif
++
++
++/*
++ * configfs_depend_item() and configfs_undepend_item()
++ *
++ * WARNING: Do not call these from a configfs callback!
++ *
++ * This describes these functions and their helpers.
++ *
++ * Allow another kernel system to depend on a config_item. If this
++ * happens, the item cannot go away until the dependant can live without
++ * it. The idea is to give client modules as simple an interface as
++ * possible. When a system asks them to depend on an item, they just
++ * call configfs_depend_item(). If the item is live and the client
++ * driver is in good shape, we'll happily do the work for them.
++ *
++ * Why is the locking complex? Because configfs uses the VFS to handle
++ * all locking, but this function is called outside the normal
++ * VFS->configfs path. So it must take VFS locks to prevent the
++ * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is
++ * why you can't call these functions underneath configfs callbacks.
++ *
++ * Note, btw, that this can be called at *any* time, even when a configfs
++ * subsystem isn't registered, or when configfs is loading or unloading.
++ * Just like configfs_register_subsystem(). So we take the same
++ * precautions. We pin the filesystem. We lock each i_mutex _in_order_
++ * on our way down the tree. If we can find the target item in the
++ * configfs tree, it must be part of the subsystem tree as well, so we
++ * do not need the subsystem semaphore. Holding the i_mutex chain locks
++ * out mkdir() and rmdir(), who might be racing us.
++ */
++
++/*
++ * configfs_depend_prep()
++ *
++ * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
++ * attributes. This is similar but not the same to configfs_detach_prep().
++ * Note that configfs_detach_prep() expects the parent to be locked when it
++ * is called, but we lock the parent *inside* configfs_depend_prep(). We
++ * do that so we can unlock it if we find nothing.
++ *
++ * Here we do a depth-first search of the dentry hierarchy looking for
++ * our object. We take i_mutex on each step of the way down. IT IS
++ * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
++ * we'll drop the i_mutex.
++ *
++ * If the target is not found, -ENOENT is bubbled up and we have released
++ * all locks. If the target was found, the locks will be cleared by
++ * configfs_depend_rollback().
++ *
++ * This adds a requirement that all config_items be unique!
++ *
++ * This is recursive because the locking traversal is tricky. There isn't
++ * much on the stack, though, so folks that need this function - be careful
++ * about your stack! Patches will be accepted to make it iterative.
++ */
++static int configfs_depend_prep(struct dentry *origin,
++ struct config_item *target)
++{
++ struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
++ int ret = 0;
++
++ BUG_ON(!origin || !sd);
++
++ /* Lock this guy on the way down */
++ mutex_lock(&sd->s_dentry->d_inode->i_mutex);
++ if (sd->s_element == target) /* Boo-yah */
++ goto out;
++
++ list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
++ if (child_sd->s_type & CONFIGFS_DIR) {
++ ret = configfs_depend_prep(child_sd->s_dentry,
++ target);
++ if (!ret)
++ goto out; /* Child path boo-yah */
++ }
++ }
++
++ /* We looped all our children and didn't find target */
++ mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
++ ret = -ENOENT;
++
++out:
++ return ret;
++}
++
++/*
++ * This is ONLY called if configfs_depend_prep() did its job. So we can
++ * trust the entire path from item back up to origin.
++ *
++ * We walk backwards from item, unlocking each i_mutex. We finish by
++ * unlocking origin.
++ */
++static void configfs_depend_rollback(struct dentry *origin,
++ struct config_item *item)
++{
++ struct dentry *dentry = item->ci_dentry;
++
++ while (dentry != origin) {
++ mutex_unlock(&dentry->d_inode->i_mutex);
++ dentry = dentry->d_parent;
++ }
++
++ mutex_unlock(&origin->d_inode->i_mutex);
++}
++
++int configfs_depend_item(struct configfs_subsystem *subsys,
++ struct config_item *target)
++{
++ int ret;
++ struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
++ struct config_item *s_item = &subsys->su_group.cg_item;
++
++ /*
++ * Pin the configfs filesystem. This means we can safely access
++ * the root of the configfs filesystem.
++ */
++ ret = configfs_pin_fs();
++ if (ret)
++ return ret;
++
++ /*
++ * Next, lock the root directory. We're going to check that the
++ * subsystem is really registered, and so we need to lock out
++ * configfs_[un]register_subsystem().
++ */
++ mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
++
++ root_sd = configfs_sb->s_root->d_fsdata;
++
++ list_for_each_entry(p, &root_sd->s_children, s_sibling) {
++ if (p->s_type & CONFIGFS_DIR) {
++ if (p->s_element == s_item) {
++ subsys_sd = p;
++ break;
++ }
++ }
++ }
++
++ if (!subsys_sd) {
++ ret = -ENOENT;
++ goto out_unlock_fs;
++ }
++
++ /* Ok, now we can trust subsys/s_item */
++
++ /* Scan the tree, locking i_mutex recursively, return 0 if found */
++ ret = configfs_depend_prep(subsys_sd->s_dentry, target);
++ if (ret)
++ goto out_unlock_fs;
++
++ /* We hold all i_mutexes from the subsystem down to the target */
++ p = target->ci_dentry->d_fsdata;
++ p->s_dependent_count += 1;
++
++ configfs_depend_rollback(subsys_sd->s_dentry, target);
++
++out_unlock_fs:
++ mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
++
++ /*
++ * If we succeeded, the fs is pinned via other methods. If not,
++ * we're done with it anyway. So release_fs() is always right.
++ */
++ configfs_release_fs();
++
++ return ret;
++}
++EXPORT_SYMBOL(configfs_depend_item);
++
++/*
++ * Release the dependent linkage. This is much simpler than
++ * configfs_depend_item() because we know that that the client driver is
++ * pinned, thus the subsystem is pinned, and therefore configfs is pinned.
++ */
++void configfs_undepend_item(struct configfs_subsystem *subsys,
++ struct config_item *target)
++{
++ struct configfs_dirent *sd;
++
++ /*
++ * Since we can trust everything is pinned, we just need i_mutex
++ * on the item.
++ */
++ mutex_lock(&target->ci_dentry->d_inode->i_mutex);
++
++ sd = target->ci_dentry->d_fsdata;
++ BUG_ON(sd->s_dependent_count < 1);
++
++ sd->s_dependent_count -= 1;
++
++ /*
++ * After this unlock, we cannot trust the item to stay alive!
++ * DO NOT REFERENCE item after this unlock.
++ */
++ mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
++}
++EXPORT_SYMBOL(configfs_undepend_item);
+
+ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ {
+@@ -842,11 +1101,14 @@
+ if (ret) {
+ /* Tear down everything we built up */
+ down(&subsys->su_sem);
++
++ client_disconnect_notify(parent_item, item);
+ if (group)
+ unlink_group(group);
+ else
+ unlink_obj(item);
+ client_drop_item(parent_item, item);
++
+ up(&subsys->su_sem);
+
+ if (module_got)
+@@ -881,6 +1143,13 @@
+ if (sd->s_type & CONFIGFS_USET_DEFAULT)
+ return -EPERM;
+
++ /*
++ * Here's where we check for dependents. We're protected by
++ * i_mutex.
++ */
++ if (sd->s_dependent_count)
++ return -EBUSY;
++
+ /* Get a working ref until we have the child */
+ parent_item = configfs_get_config_item(dentry->d_parent);
+ subsys = to_config_group(parent_item)->cg_subsys;
+@@ -911,11 +1180,13 @@
+ configfs_detach_group(item);
+
+ down(&subsys->su_sem);
++ client_disconnect_notify(parent_item, item);
+ unlink_group(to_config_group(item));
+ } else {
+ configfs_detach_item(item);
+
+ down(&subsys->su_sem);
++ client_disconnect_notify(parent_item, item);
+ unlink_obj(item);
+ }
+
+diff -Nurb linux-2.6.22-570/fs/configfs/file.c linux-2.6.22-591/fs/configfs/file.c
+--- linux-2.6.22-570/fs/configfs/file.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/configfs/file.c 2007-12-21 15:36:12.000000000 -0500
+@@ -27,19 +27,26 @@
+ #include <linux/fs.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/mutex.h>
+ #include <asm/uaccess.h>
+-#include <asm/semaphore.h>
+
+ #include <linux/configfs.h>
+ #include "configfs_internal.h"
+
++/*
++ * A simple attribute can only be 4096 characters. Why 4k? Because the
++ * original code limited it to PAGE_SIZE. That's a bad idea, though,
++ * because an attribute of 16k on ia64 won't work on x86. So we limit to
++ * 4k, our minimum common page size.
++ */
++#define SIMPLE_ATTR_SIZE 4096
+
+ struct configfs_buffer {
+ size_t count;
+ loff_t pos;
+ char * page;
+ struct configfs_item_operations * ops;
+- struct semaphore sem;
++ struct mutex mutex;
+ int needs_read_fill;
+ };
+
+@@ -69,7 +76,7 @@
+
+ count = ops->show_attribute(item,attr,buffer->page);
+ buffer->needs_read_fill = 0;
+- BUG_ON(count > (ssize_t)PAGE_SIZE);
++ BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
+ if (count >= 0)
+ buffer->count = count;
+ else
+@@ -102,7 +109,7 @@
+ struct configfs_buffer * buffer = file->private_data;
+ ssize_t retval = 0;
+
+- down(&buffer->sem);
++ mutex_lock(&buffer->mutex);
+ if (buffer->needs_read_fill) {
+ if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
+ goto out;
+@@ -112,7 +119,7 @@
+ retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
+ buffer->count);
+ out:
+- up(&buffer->sem);
++ mutex_unlock(&buffer->mutex);
+ return retval;
+ }
+
+@@ -137,8 +144,8 @@
+ if (!buffer->page)
+ return -ENOMEM;
+
+- if (count >= PAGE_SIZE)
+- count = PAGE_SIZE - 1;
++ if (count >= SIMPLE_ATTR_SIZE)
++ count = SIMPLE_ATTR_SIZE - 1;
+ error = copy_from_user(buffer->page,buf,count);
+ buffer->needs_read_fill = 1;
+ /* if buf is assumed to contain a string, terminate it by \0,
+@@ -193,13 +200,13 @@
+ struct configfs_buffer * buffer = file->private_data;
+ ssize_t len;
+
+- down(&buffer->sem);
++ mutex_lock(&buffer->mutex);
+ len = fill_write_buffer(buffer, buf, count);
+ if (len > 0)
+ len = flush_write_buffer(file->f_path.dentry, buffer, count);
+ if (len > 0)
+ *ppos += len;
+- up(&buffer->sem);
++ mutex_unlock(&buffer->mutex);
+ return len;
+ }
+
+@@ -253,7 +260,7 @@
+ error = -ENOMEM;
+ goto Enomem;
+ }
+- init_MUTEX(&buffer->sem);
++ mutex_init(&buffer->mutex);
+ buffer->needs_read_fill = 1;
+ buffer->ops = ops;
+ file->private_data = buffer;
+@@ -292,6 +299,7 @@
+ if (buffer) {
+ if (buffer->page)
+ free_page((unsigned long)buffer->page);
++ mutex_destroy(&buffer->mutex);
+ kfree(buffer);
+ }
+ return 0;
+diff -Nurb linux-2.6.22-570/fs/configfs/item.c linux-2.6.22-591/fs/configfs/item.c
+--- linux-2.6.22-570/fs/configfs/item.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/configfs/item.c 2007-12-21 15:36:12.000000000 -0500
+@@ -62,7 +62,6 @@
+ * dynamically allocated string that @item->ci_name points to.
+ * Otherwise, use the static @item->ci_namebuf array.
+ */
+-
+ int config_item_set_name(struct config_item * item, const char * fmt, ...)
+ {
+ int error = 0;
+@@ -139,12 +138,7 @@
+ return item;
+ }
+
+-/**
+- * config_item_cleanup - free config_item resources.
+- * @item: item.
+- */
+-
+-void config_item_cleanup(struct config_item * item)
++static void config_item_cleanup(struct config_item * item)
+ {
+ struct config_item_type * t = item->ci_type;
+ struct config_group * s = item->ci_group;
+@@ -179,12 +173,10 @@
+ kref_put(&item->ci_kref, config_item_release);
+ }
+
+-
+ /**
+ * config_group_init - initialize a group for use
+ * @k: group
+ */
+-
+ void config_group_init(struct config_group *group)
+ {
+ config_item_init(&group->cg_item);
+@@ -201,8 +193,8 @@
+ * looking for a matching config_item. If matching item is found
+ * take a reference and return the item.
+ */
+-
+-struct config_item * config_group_find_obj(struct config_group * group, const char * name)
++struct config_item *config_group_find_obj(struct config_group *group,
++ const char * name)
+ {
+ struct list_head * entry;
+ struct config_item * ret = NULL;
+@@ -219,7 +211,6 @@
+ return ret;
+ }
+
+-
+ EXPORT_SYMBOL(config_item_init);
+ EXPORT_SYMBOL(config_group_init);
+ EXPORT_SYMBOL(config_item_get);
+diff -Nurb linux-2.6.22-570/fs/drop_caches.c linux-2.6.22-591/fs/drop_caches.c
+--- linux-2.6.22-570/fs/drop_caches.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/drop_caches.c 2007-12-21 15:36:12.000000000 -0500
+@@ -3,6 +3,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/fs.h>
+ #include <linux/writeback.h>
+@@ -12,7 +13,7 @@
+ /* A global variable is a bit ugly, but it keeps the code simple */
+ int sysctl_drop_caches;
+
+-static void drop_pagecache_sb(struct super_block *sb)
++void drop_pagecache_sb(struct super_block *sb)
+ {
+ struct inode *inode;
+
+@@ -24,6 +25,7 @@
+ }
+ spin_unlock(&inode_lock);
+ }
++EXPORT_SYMBOL(drop_pagecache_sb);
+
+ void drop_pagecache(void)
+ {
+diff -Nurb linux-2.6.22-570/fs/ecryptfs/inode.c linux-2.6.22-591/fs/ecryptfs/inode.c
+--- linux-2.6.22-570/fs/ecryptfs/inode.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ecryptfs/inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -280,7 +280,9 @@
+ int rc = 0;
+ struct dentry *lower_dir_dentry;
+ struct dentry *lower_dentry;
++ struct dentry *dentry_save;
+ struct vfsmount *lower_mnt;
++ struct vfsmount *mnt_save;
+ char *encoded_name;
+ unsigned int encoded_namelen;
+ struct ecryptfs_crypt_stat *crypt_stat = NULL;
+@@ -308,9 +310,13 @@
+ }
+ ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
+ "= [%d]\n", encoded_name, encoded_namelen);
+- lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
+- encoded_namelen - 1);
++ dentry_save = nd->dentry;
++ mnt_save = nd->mnt;
++ lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry,
++ (encoded_namelen - 1), nd);
+ kfree(encoded_name);
++ nd->mnt = mnt_save;
++ nd->dentry = dentry_save;
+ if (IS_ERR(lower_dentry)) {
+ ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
+ rc = PTR_ERR(lower_dentry);
+diff -Nurb linux-2.6.22-570/fs/ecryptfs/main.c linux-2.6.22-591/fs/ecryptfs/main.c
+--- linux-2.6.22-570/fs/ecryptfs/main.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ecryptfs/main.c 2007-12-21 15:36:12.000000000 -0500
+@@ -840,8 +840,6 @@
+ goto out;
+ }
+ kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
+- sysfs_attr_version.attr.owner = THIS_MODULE;
+- sysfs_attr_version_str.attr.owner = THIS_MODULE;
+ rc = do_sysfs_registration();
+ if (rc) {
+ printk(KERN_ERR "sysfs registration failed\n");
+diff -Nurb linux-2.6.22-570/fs/exec.c linux-2.6.22-591/fs/exec.c
+--- linux-2.6.22-570/fs/exec.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/exec.c 2007-12-21 15:36:14.000000000 -0500
+@@ -861,9 +861,9 @@
+ current->sas_ss_sp = current->sas_ss_size = 0;
+
+ if (current->euid == current->uid && current->egid == current->gid)
+- current->mm->dumpable = 1;
++ set_dumpable(current->mm, 1);
+ else
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+
+ name = bprm->filename;
+
+@@ -889,12 +889,12 @@
+
+ if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
+ suid_keys(current);
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ current->pdeath_signal = 0;
+ } else if (file_permission(bprm->file, MAY_READ) ||
+ (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+ suid_keys(current);
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ }
+
+ /* An exec changes our domain. We are no longer part of the thread
+@@ -1486,6 +1486,55 @@
+ return core_waiters;
+ }
+
++/*
++ * set_dumpable converts traditional three-value dumpable to two flags and
++ * stores them into mm->flags. It modifies lower two bits of mm->flags, but
++ * these bits are not changed atomically. So get_dumpable can observe the
++ * intermediate state. To avoid doing unexpected behavior, get get_dumpable
++ * return either old dumpable or new one by paying attention to the order of
++ * modifying the bits.
++ *
++ * dumpable | mm->flags (binary)
++ * old new | initial interim final
++ * ---------+-----------------------
++ * 0 1 | 00 01 01
++ * 0 2 | 00 10(*) 11
++ * 1 0 | 01 00 00
++ * 1 2 | 01 11 11
++ * 2 0 | 11 10(*) 00
++ * 2 1 | 11 11 01
++ *
++ * (*) get_dumpable regards interim value of 10 as 11.
++ */
++void set_dumpable(struct mm_struct *mm, int value)
++{
++ switch (value) {
++ case 0:
++ clear_bit(MMF_DUMPABLE, &mm->flags);
++ smp_wmb();
++ clear_bit(MMF_DUMP_SECURELY, &mm->flags);
++ break;
++ case 1:
++ set_bit(MMF_DUMPABLE, &mm->flags);
++ smp_wmb();
++ clear_bit(MMF_DUMP_SECURELY, &mm->flags);
++ break;
++ case 2:
++ set_bit(MMF_DUMP_SECURELY, &mm->flags);
++ smp_wmb();
++ set_bit(MMF_DUMPABLE, &mm->flags);
++ break;
++ }
++}
++
++int get_dumpable(struct mm_struct *mm)
++{
++ int ret;
++
++ ret = mm->flags & 0x3;
++ return (ret >= 2) ? 2 : ret;
++}
++
+ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
+ {
+ char corename[CORENAME_MAX_SIZE + 1];
+@@ -1504,7 +1553,7 @@
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ down_write(&mm->mmap_sem);
+- if (!mm->dumpable) {
++ if (!get_dumpable(mm)) {
+ up_write(&mm->mmap_sem);
+ goto fail;
+ }
+@@ -1514,11 +1563,11 @@
+ * process nor do we know its entire history. We only know it
+ * was tainted so we dump it as root in mode 2.
+ */
+- if (mm->dumpable == 2) { /* Setuid core dump mode */
++ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */
+ flag = O_EXCL; /* Stop rewrite attacks */
+ current->fsuid = 0; /* Dump root private */
+ }
+- mm->dumpable = 0;
++ set_dumpable(mm, 0);
+
+ retval = coredump_wait(exit_code);
+ if (retval < 0)
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_address.c linux-2.6.22-591/fs/gfs2/ops_address.c
+--- linux-2.6.22-570/fs/gfs2/ops_address.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/gfs2/ops_address.c 2007-12-21 15:36:12.000000000 -0500
+@@ -250,7 +250,7 @@
+ if (file) {
+ gf = file->private_data;
+ if (test_bit(GFF_EXLOCK, &gf->f_flags))
+- /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
++ /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
+ goto skip_lock;
+ }
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_file.c linux-2.6.22-591/fs/gfs2/ops_file.c
+--- linux-2.6.22-570/fs/gfs2/ops_file.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/gfs2/ops_file.c 2007-12-21 15:36:12.000000000 -0500
+@@ -364,6 +364,8 @@
+ else
+ vma->vm_ops = &gfs2_vm_ops_private;
+
++ vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR;
++
+ gfs2_glock_dq_uninit(&i_gh);
+
+ return error;
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_vm.c linux-2.6.22-591/fs/gfs2/ops_vm.c
+--- linux-2.6.22-570/fs/gfs2/ops_vm.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/gfs2/ops_vm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -27,13 +27,13 @@
+ #include "trans.h"
+ #include "util.h"
+
+-static struct page *gfs2_private_nopage(struct vm_area_struct *area,
+- unsigned long address, int *type)
++static struct page *gfs2_private_fault(struct vm_area_struct *vma,
++ struct fault_data *fdata)
+ {
+- struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
++ struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
+
+ set_bit(GIF_PAGED, &ip->i_flags);
+- return filemap_nopage(area, address, type);
++ return filemap_fault(vma, fdata);
+ }
+
+ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
+@@ -104,16 +104,14 @@
+ return error;
+ }
+
+-static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
+- unsigned long address, int *type)
++static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma,
++ struct fault_data *fdata)
+ {
+- struct file *file = area->vm_file;
++ struct file *file = vma->vm_file;
+ struct gfs2_file *gf = file->private_data;
+ struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+ struct gfs2_holder i_gh;
+ struct page *result = NULL;
+- unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
+- area->vm_pgoff;
+ int alloc_required;
+ int error;
+
+@@ -124,21 +122,27 @@
+ set_bit(GIF_PAGED, &ip->i_flags);
+ set_bit(GIF_SW_PAGED, &ip->i_flags);
+
+- error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT,
++ error = gfs2_write_alloc_required(ip,
++ (u64)fdata->pgoff << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, &alloc_required);
+- if (error)
++ if (error) {
++ fdata->type = VM_FAULT_OOM; /* XXX: are these right? */
+ goto out;
++ }
+
+ set_bit(GFF_EXLOCK, &gf->f_flags);
+- result = filemap_nopage(area, address, type);
++ result = filemap_fault(vma, fdata);
+ clear_bit(GFF_EXLOCK, &gf->f_flags);
+- if (!result || result == NOPAGE_OOM)
++ if (!result)
+ goto out;
+
+ if (alloc_required) {
+ error = alloc_page_backing(ip, result);
+ if (error) {
++ if (vma->vm_flags & VM_CAN_INVALIDATE)
++ unlock_page(result);
+ page_cache_release(result);
++ fdata->type = VM_FAULT_OOM;
+ result = NULL;
+ goto out;
+ }
+@@ -152,10 +156,10 @@
+ }
+
+ struct vm_operations_struct gfs2_vm_ops_private = {
+- .nopage = gfs2_private_nopage,
++ .fault = gfs2_private_fault,
+ };
+
+ struct vm_operations_struct gfs2_vm_ops_sharewrite = {
+- .nopage = gfs2_sharewrite_nopage,
++ .fault = gfs2_sharewrite_fault,
+ };
+
+diff -Nurb linux-2.6.22-570/fs/inode.c linux-2.6.22-591/fs/inode.c
+--- linux-2.6.22-570/fs/inode.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -149,7 +149,7 @@
+ mapping->a_ops = &empty_aops;
+ mapping->host = inode;
+ mapping->flags = 0;
+- mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
++ mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+ mapping->assoc_mapping = NULL;
+ mapping->backing_dev_info = &default_backing_dev_info;
+
+@@ -525,7 +525,13 @@
+ * new_inode - obtain an inode
+ * @sb: superblock
+ *
+- * Allocates a new inode for given superblock.
++ * Allocates a new inode for given superblock. The default gfp_mask
++ * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.
++ * If HIGHMEM pages are unsuitable or it is known that pages allocated
++ * for the page cache are not reclaimable or migratable,
++ * mapping_set_gfp_mask() must be called with suitable flags on the
++ * newly created inode's mapping
++ *
+ */
+ struct inode *new_inode(struct super_block *sb)
+ {
+diff -Nurb linux-2.6.22-570/fs/jbd/journal.c linux-2.6.22-591/fs/jbd/journal.c
+--- linux-2.6.22-570/fs/jbd/journal.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/jbd/journal.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1710,7 +1710,7 @@
+ journal_head_cache = kmem_cache_create("journal_head",
+ sizeof(struct journal_head),
+ 0, /* offset */
+- 0, /* flags */
++ SLAB_TEMPORARY, /* flags */
+ NULL, /* ctor */
+ NULL); /* dtor */
+ retval = 0;
+@@ -2007,7 +2007,7 @@
+ jbd_handle_cache = kmem_cache_create("journal_handle",
+ sizeof(handle_t),
+ 0, /* offset */
+- 0, /* flags */
++ SLAB_TEMPORARY, /* flags */
+ NULL, /* ctor */
+ NULL); /* dtor */
+ if (jbd_handle_cache == NULL) {
+diff -Nurb linux-2.6.22-570/fs/jbd/revoke.c linux-2.6.22-591/fs/jbd/revoke.c
+--- linux-2.6.22-570/fs/jbd/revoke.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/jbd/revoke.c 2007-12-21 15:36:12.000000000 -0500
+@@ -169,13 +169,17 @@
+ {
+ revoke_record_cache = kmem_cache_create("revoke_record",
+ sizeof(struct jbd_revoke_record_s),
+- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ 0,
++ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
++ NULL, NULL);
+ if (revoke_record_cache == 0)
+ return -ENOMEM;
+
+ revoke_table_cache = kmem_cache_create("revoke_table",
+ sizeof(struct jbd_revoke_table_s),
+- 0, 0, NULL, NULL);
++ 0,
++ SLAB_TEMPORARY,
++ NULL, NULL);
+ if (revoke_table_cache == 0) {
+ kmem_cache_destroy(revoke_record_cache);
+ revoke_record_cache = NULL;
+diff -Nurb linux-2.6.22-570/fs/jffs2/background.c linux-2.6.22-591/fs/jffs2/background.c
+--- linux-2.6.22-570/fs/jffs2/background.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/jffs2/background.c 2007-12-21 15:36:12.000000000 -0500
+@@ -81,6 +81,7 @@
+
+ set_user_nice(current, 10);
+
++ set_freezable();
+ for (;;) {
+ allow_signal(SIGHUP);
+
+diff -Nurb linux-2.6.22-570/fs/lockd/host.c linux-2.6.22-591/fs/lockd/host.c
+--- linux-2.6.22-570/fs/lockd/host.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/lockd/host.c 2007-12-21 15:36:12.000000000 -0500
+@@ -161,15 +161,9 @@
+ */
+ nsm_unmonitor(host);
+
+- if ((clnt = host->h_rpcclnt) != NULL) {
+- if (atomic_read(&clnt->cl_users)) {
+- printk(KERN_WARNING
+- "lockd: active RPC handle\n");
+- clnt->cl_dead = 1;
+- } else {
+- rpc_destroy_client(host->h_rpcclnt);
+- }
+- }
++ clnt = host->h_rpcclnt;
++ if (clnt != NULL)
++ rpc_shutdown_client(clnt);
+ kfree(host);
+ }
+
+diff -Nurb linux-2.6.22-570/fs/lockd/mon.c linux-2.6.22-591/fs/lockd/mon.c
+--- linux-2.6.22-570/fs/lockd/mon.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/lockd/mon.c 2007-12-21 15:36:12.000000000 -0500
+@@ -61,6 +61,7 @@
+ status);
+ else
+ status = 0;
++ rpc_shutdown_client(clnt);
+ out:
+ return status;
+ }
+@@ -138,7 +139,6 @@
+ .program = &nsm_program,
+ .version = SM_VERSION,
+ .authflavor = RPC_AUTH_NULL,
+- .flags = (RPC_CLNT_CREATE_ONESHOT),
+ };
+
+ return rpc_create(&args);
+diff -Nurb linux-2.6.22-570/fs/lockd/svc.c linux-2.6.22-591/fs/lockd/svc.c
+--- linux-2.6.22-570/fs/lockd/svc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/lockd/svc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <linux/smp.h>
+ #include <linux/smp_lock.h>
+ #include <linux/mutex.h>
++#include <linux/freezer.h>
+
+ #include <linux/sunrpc/types.h>
+ #include <linux/sunrpc/stats.h>
+@@ -119,13 +120,11 @@
+ complete(&lockd_start_done);
+
+ daemonize("lockd");
++ set_freezable();
+
+ /* Process request with signals blocked, but allow SIGKILL. */
+ allow_signal(SIGKILL);
+
+- /* kick rpciod */
+- rpciod_up();
+-
+ dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
+
+ if (!nlm_timeout)
+@@ -202,9 +201,6 @@
+ /* Exit the RPC thread */
+ svc_exit_thread(rqstp);
+
+- /* release rpciod */
+- rpciod_down();
+-
+ /* Release module */
+ unlock_kernel();
+ module_put_and_exit(0);
+diff -Nurb linux-2.6.22-570/fs/namei.c linux-2.6.22-591/fs/namei.c
+--- linux-2.6.22-570/fs/namei.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/namei.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1386,7 +1386,8 @@
+ return 0;
+ }
+
+-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
++struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
++ int len, struct nameidata *nd)
+ {
+ int err;
+ struct qstr this;
+@@ -1394,7 +1395,7 @@
+ err = __lookup_one_len(name, &this, base, len);
+ if (err)
+ return ERR_PTR(err);
+- return __lookup_hash(&this, base, NULL);
++ return __lookup_hash(&this, base, nd);
+ }
+
+ struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len)
+@@ -3086,7 +3087,7 @@
+ EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
+ EXPORT_SYMBOL(getname);
+ EXPORT_SYMBOL(lock_rename);
+-EXPORT_SYMBOL(lookup_one_len);
++EXPORT_SYMBOL(lookup_one_len_nd);
+ EXPORT_SYMBOL(page_follow_link_light);
+ EXPORT_SYMBOL(page_put_link);
+ EXPORT_SYMBOL(page_readlink);
+diff -Nurb linux-2.6.22-570/fs/namespace.c linux-2.6.22-591/fs/namespace.c
+--- linux-2.6.22-570/fs/namespace.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/namespace.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1538,7 +1538,7 @@
+
+ new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+ if (!new_ns)
+- return NULL;
++ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&new_ns->count, 1);
+ INIT_LIST_HEAD(&new_ns->list);
+@@ -1552,7 +1552,7 @@
+ if (!new_ns->root) {
+ up_write(&namespace_sem);
+ kfree(new_ns);
+- return NULL;
++ return ERR_PTR(-ENOMEM);;
+ }
+ spin_lock(&vfsmount_lock);
+ list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
+@@ -1597,7 +1597,7 @@
+ return new_ns;
+ }
+
+-struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns,
++struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+ struct fs_struct *new_fs)
+ {
+ struct mnt_namespace *new_ns;
+diff -Nurb linux-2.6.22-570/fs/ncpfs/mmap.c linux-2.6.22-591/fs/ncpfs/mmap.c
+--- linux-2.6.22-570/fs/ncpfs/mmap.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ncpfs/mmap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -25,8 +25,8 @@
+ /*
+ * Fill in the supplied page for mmap
+ */
+-static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
+- unsigned long address, int *type)
++static struct page* ncp_file_mmap_fault(struct vm_area_struct *area,
++ struct fault_data *fdata)
+ {
+ struct file *file = area->vm_file;
+ struct dentry *dentry = file->f_path.dentry;
+@@ -40,15 +40,17 @@
+
+ page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages
+ as long as recvmsg and memset works on it */
+- if (!page)
+- return page;
++ if (!page) {
++ fdata->type = VM_FAULT_OOM;
++ return NULL;
++ }
+ pg_addr = kmap(page);
+- address &= PAGE_MASK;
+- pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT);
++ pos = fdata->pgoff << PAGE_SHIFT;
+
+ count = PAGE_SIZE;
+- if (address + PAGE_SIZE > area->vm_end) {
+- count = area->vm_end - address;
++ if (fdata->address + PAGE_SIZE > area->vm_end) {
++ WARN_ON(1); /* shouldn't happen? */
++ count = area->vm_end - fdata->address;
+ }
+ /* what we can read in one go */
+ bufsize = NCP_SERVER(inode)->buffer_size;
+@@ -91,15 +93,14 @@
+ * fetches from the network, here the analogue of disk.
+ * -- wli
+ */
+- if (type)
+- *type = VM_FAULT_MAJOR;
++ fdata->type = VM_FAULT_MAJOR;
+ count_vm_event(PGMAJFAULT);
+ return page;
+ }
+
+ static struct vm_operations_struct ncp_file_mmap =
+ {
+- .nopage = ncp_file_mmap_nopage,
++ .fault = ncp_file_mmap_fault,
+ };
+
+
+@@ -123,6 +124,7 @@
+ return -EFBIG;
+
+ vma->vm_ops = &ncp_file_mmap;
++ vma->vm_flags |= VM_CAN_INVALIDATE;
+ file_accessed(file);
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/fs/nfs/callback.c linux-2.6.22-591/fs/nfs/callback.c
+--- linux-2.6.22-570/fs/nfs/callback.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/callback.c 2007-12-21 15:36:12.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/sunrpc/svcsock.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/mutex.h>
++#include <linux/freezer.h>
+
+ #include <net/inet_sock.h>
+
+@@ -67,6 +68,7 @@
+ daemonize("nfsv4-svc");
+ /* Process request with signals blocked, but allow SIGKILL. */
+ allow_signal(SIGKILL);
++ set_freezable();
+
+ complete(&nfs_callback_info.started);
+
+diff -Nurb linux-2.6.22-570/fs/nfs/client.c linux-2.6.22-591/fs/nfs/client.c
+--- linux-2.6.22-570/fs/nfs/client.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/nfs/client.c 2007-12-21 15:36:12.000000000 -0500
+@@ -102,19 +102,10 @@
+ int nfsversion)
+ {
+ struct nfs_client *clp;
+- int error;
+
+ if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+ goto error_0;
+
+- error = rpciod_up();
+- if (error < 0) {
+- dprintk("%s: couldn't start rpciod! Error = %d\n",
+- __FUNCTION__, error);
+- goto error_1;
+- }
+- __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-
+ if (nfsversion == 4) {
+ if (nfs_callback_up() < 0)
+ goto error_2;
+@@ -154,9 +145,6 @@
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+ error_2:
+- rpciod_down();
+- __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-error_1:
+ kfree(clp);
+ error_0:
+ return NULL;
+@@ -198,9 +186,6 @@
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+
+- if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+- rpciod_down();
+-
+ kfree(clp->cl_hostname);
+ kfree(clp);
+
+diff -Nurb linux-2.6.22-570/fs/nfs/delegation.c linux-2.6.22-591/fs/nfs/delegation.c
+--- linux-2.6.22-570/fs/nfs/delegation.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/delegation.c 2007-12-21 15:36:12.000000000 -0500
+@@ -74,7 +74,7 @@
+ continue;
+ get_nfs_open_context(ctx);
+ spin_unlock(&inode->i_lock);
+- err = nfs4_open_delegation_recall(ctx->dentry, state);
++ err = nfs4_open_delegation_recall(ctx, state);
+ if (err >= 0)
+ err = nfs_delegation_claim_locks(ctx, state);
+ put_nfs_open_context(ctx);
+diff -Nurb linux-2.6.22-570/fs/nfs/delegation.h linux-2.6.22-591/fs/nfs/delegation.h
+--- linux-2.6.22-570/fs/nfs/delegation.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/delegation.h 2007-12-21 15:36:12.000000000 -0500
+@@ -39,7 +39,7 @@
+
+ /* NFSv4 delegation-related procedures */
+ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state);
+ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+
+diff -Nurb linux-2.6.22-570/fs/nfs/dir.c linux-2.6.22-591/fs/nfs/dir.c
+--- linux-2.6.22-570/fs/nfs/dir.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/nfs/dir.c 2007-12-21 15:36:12.000000000 -0500
+@@ -898,14 +898,13 @@
+ return (nd->intent.open.flags & O_EXCL) != 0;
+ }
+
+-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+- struct nfs_fh *fh, struct nfs_fattr *fattr)
++static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
+ {
+ struct nfs_server *server = NFS_SERVER(dir);
+
+ if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
+- /* Revalidate fsid on root dir */
+- return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
++ /* Revalidate fsid using the parent directory */
++ return __nfs_revalidate_inode(server, dir);
+ return 0;
+ }
+
+@@ -947,7 +946,7 @@
+ res = ERR_PTR(error);
+ goto out_unlock;
+ }
+- error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
++ error = nfs_reval_fsid(dir, &fattr);
+ if (error < 0) {
+ res = ERR_PTR(error);
+ goto out_unlock;
+@@ -1247,7 +1246,7 @@
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
+
+- if (nd && (nd->flags & LOOKUP_CREATE))
++ if ((nd->flags & LOOKUP_CREATE) != 0)
+ open_flags = nd->intent.open.flags;
+
+ lock_kernel();
+@@ -1747,8 +1746,8 @@
+ struct nfs_inode *nfsi;
+ struct nfs_access_entry *cache;
+
+- spin_lock(&nfs_access_lru_lock);
+ restart:
++ spin_lock(&nfs_access_lru_lock);
+ list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+ struct inode *inode;
+
+@@ -1773,6 +1772,7 @@
+ clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ }
+ spin_unlock(&inode->i_lock);
++ spin_unlock(&nfs_access_lru_lock);
+ iput(inode);
+ goto restart;
+ }
+diff -Nurb linux-2.6.22-570/fs/nfs/direct.c linux-2.6.22-591/fs/nfs/direct.c
+--- linux-2.6.22-570/fs/nfs/direct.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/direct.c 2007-12-21 15:36:12.000000000 -0500
+@@ -266,7 +266,7 @@
+ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+ {
+ struct nfs_open_context *ctx = dreq->ctx;
+- struct inode *inode = ctx->dentry->d_inode;
++ struct inode *inode = ctx->path.dentry->d_inode;
+ size_t rsize = NFS_SERVER(inode)->rsize;
+ unsigned int pgbase;
+ int result;
+@@ -295,10 +295,15 @@
+ break;
+ }
+ if ((unsigned)result < data->npages) {
++ bytes = result * PAGE_SIZE;
++ if (bytes <= pgbase) {
+ nfs_direct_release_pages(data->pagevec, result);
+ nfs_readdata_release(data);
+ break;
+ }
++ bytes -= pgbase;
++ data->npages = result;
++ }
+
+ get_dreq(dreq);
+
+@@ -601,7 +606,7 @@
+ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
+ {
+ struct nfs_open_context *ctx = dreq->ctx;
+- struct inode *inode = ctx->dentry->d_inode;
++ struct inode *inode = ctx->path.dentry->d_inode;
+ size_t wsize = NFS_SERVER(inode)->wsize;
+ unsigned int pgbase;
+ int result;
+@@ -630,10 +635,15 @@
+ break;
+ }
+ if ((unsigned)result < data->npages) {
++ bytes = result * PAGE_SIZE;
++ if (bytes <= pgbase) {
+ nfs_direct_release_pages(data->pagevec, result);
+ nfs_writedata_release(data);
+ break;
+ }
++ bytes -= pgbase;
++ data->npages = result;
++ }
+
+ get_dreq(dreq);
+
+@@ -763,10 +773,8 @@
+ (unsigned long) count, (long long) pos);
+
+ if (nr_segs != 1)
+- return -EINVAL;
+-
+- if (count < 0)
+ goto out;
++
+ retval = -EFAULT;
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ goto out;
+@@ -814,7 +822,7 @@
+ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+ {
+- ssize_t retval;
++ ssize_t retval = -EINVAL;
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ /* XXX: temporary */
+@@ -827,7 +835,7 @@
+ (unsigned long) count, (long long) pos);
+
+ if (nr_segs != 1)
+- return -EINVAL;
++ goto out;
+
+ retval = generic_write_checks(file, &pos, &count, 0);
+ if (retval)
+diff -Nurb linux-2.6.22-570/fs/nfs/inode.c linux-2.6.22-591/fs/nfs/inode.c
+--- linux-2.6.22-570/fs/nfs/inode.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/nfs/inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -466,14 +466,14 @@
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (ctx != NULL) {
+- atomic_set(&ctx->count, 1);
+- ctx->dentry = dget(dentry);
+- ctx->vfsmnt = mntget(mnt);
++ ctx->path.dentry = dget(dentry);
++ ctx->path.mnt = mntget(mnt);
+ ctx->cred = get_rpccred(cred);
+ ctx->state = NULL;
+ ctx->lockowner = current->files;
+ ctx->error = 0;
+ ctx->dir_cookie = 0;
++ kref_init(&ctx->kref);
+ }
+ return ctx;
+ }
+@@ -481,27 +481,33 @@
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ if (ctx != NULL)
+- atomic_inc(&ctx->count);
++ kref_get(&ctx->kref);
+ return ctx;
+ }
+
+-void put_nfs_open_context(struct nfs_open_context *ctx)
++static void nfs_free_open_context(struct kref *kref)
+ {
+- if (atomic_dec_and_test(&ctx->count)) {
++ struct nfs_open_context *ctx = container_of(kref,
++ struct nfs_open_context, kref);
++
+ if (!list_empty(&ctx->list)) {
+- struct inode *inode = ctx->dentry->d_inode;
++ struct inode *inode = ctx->path.dentry->d_inode;
+ spin_lock(&inode->i_lock);
+ list_del(&ctx->list);
+ spin_unlock(&inode->i_lock);
+ }
+ if (ctx->state != NULL)
+- nfs4_close_state(ctx->state, ctx->mode);
++ nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+ if (ctx->cred != NULL)
+ put_rpccred(ctx->cred);
+- dput(ctx->dentry);
+- mntput(ctx->vfsmnt);
++ dput(ctx->path.dentry);
++ mntput(ctx->path.mnt);
+ kfree(ctx);
+- }
++}
++
++void put_nfs_open_context(struct nfs_open_context *ctx)
++{
++ kref_put(&ctx->kref, nfs_free_open_context);
+ }
+
+ /*
+@@ -977,8 +983,8 @@
+ goto out_changed;
+
+ server = NFS_SERVER(inode);
+- /* Update the fsid if and only if this is the root directory */
+- if (inode == inode->i_sb->s_root->d_inode
++ /* Update the fsid? */
++ if (S_ISDIR(inode->i_mode)
+ && !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+ server->fsid = fattr->fsid;
+
+@@ -1125,27 +1131,10 @@
+ */
+ void nfs4_clear_inode(struct inode *inode)
+ {
+- struct nfs_inode *nfsi = NFS_I(inode);
+-
+ /* If we are holding a delegation, return it! */
+ nfs_inode_return_delegation(inode);
+ /* First call standard NFS clear_inode() code */
+ nfs_clear_inode(inode);
+- /* Now clear out any remaining state */
+- while (!list_empty(&nfsi->open_states)) {
+- struct nfs4_state *state;
+-
+- state = list_entry(nfsi->open_states.next,
+- struct nfs4_state,
+- inode_states);
+- dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
+- __FUNCTION__,
+- inode->i_sb->s_id,
+- (long long)NFS_FILEID(inode),
+- state);
+- BUG_ON(atomic_read(&state->count) != 1);
+- nfs4_close_state(state, state->state);
+- }
+ }
+ #endif
+
+@@ -1188,14 +1177,11 @@
+
+ inode_init_once(&nfsi->vfs_inode);
+ spin_lock_init(&nfsi->req_lock);
+- INIT_LIST_HEAD(&nfsi->dirty);
+- INIT_LIST_HEAD(&nfsi->commit);
+ INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
+ INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+ atomic_set(&nfsi->data_updates, 0);
+- nfsi->ndirty = 0;
+ nfsi->ncommit = 0;
+ nfsi->npages = 0;
+ nfs4_init_once(nfsi);
+diff -Nurb linux-2.6.22-570/fs/nfs/mount_clnt.c linux-2.6.22-591/fs/nfs/mount_clnt.c
+--- linux-2.6.22-570/fs/nfs/mount_clnt.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/mount_clnt.c 2007-12-21 15:36:12.000000000 -0500
+@@ -69,6 +69,7 @@
+ msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+ status = rpc_call_sync(mnt_clnt, &msg, 0);
++ rpc_shutdown_client(mnt_clnt);
+ return status < 0? status : (result.status? -EACCES : 0);
+ }
+
+@@ -84,8 +85,7 @@
+ .program = &mnt_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+- .flags = (RPC_CLNT_CREATE_ONESHOT |
+- RPC_CLNT_CREATE_INTR),
++ .flags = RPC_CLNT_CREATE_INTR,
+ };
+
+ return rpc_create(&args);
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs3proc.c linux-2.6.22-591/fs/nfs/nfs3proc.c
+--- linux-2.6.22-570/fs/nfs/nfs3proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/nfs3proc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -335,9 +335,7 @@
+ * not sure this buys us anything (and I'd have
+ * to revamp the NFSv3 XDR code) */
+ status = nfs3_proc_setattr(dentry, &fattr, sattr);
+- if (status == 0)
+- nfs_setattr_update_inode(dentry->d_inode, sattr);
+- nfs_refresh_inode(dentry->d_inode, &fattr);
++ nfs_post_op_update_inode(dentry->d_inode, &fattr);
+ dprintk("NFS reply setattr (post-create): %d\n", status);
+ }
+ if (status != 0)
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4_fs.h linux-2.6.22-591/fs/nfs/nfs4_fs.h
+--- linux-2.6.22-570/fs/nfs/nfs4_fs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/nfs4_fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -165,7 +165,7 @@
+ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -196,7 +196,7 @@
+ extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+ extern void nfs4_put_open_state(struct nfs4_state *);
+-extern void nfs4_close_state(struct nfs4_state *, mode_t);
++extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
+ extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
+ extern void nfs4_schedule_state_recovery(struct nfs_client *);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+@@ -222,7 +222,7 @@
+
+ #else
+
+-#define nfs4_close_state(a, b) do { } while (0)
++#define nfs4_close_state(a, b, c) do { } while (0)
+
+ #endif /* CONFIG_NFS_V4 */
+ #endif /* __LINUX_FS_NFS_NFS4_FS.H */
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4proc.c linux-2.6.22-591/fs/nfs/nfs4proc.c
+--- linux-2.6.22-570/fs/nfs/nfs4proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/nfs4proc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -214,14 +214,14 @@
+ }
+
+ struct nfs4_opendata {
+- atomic_t count;
++ struct kref kref;
+ struct nfs_openargs o_arg;
+ struct nfs_openres o_res;
+ struct nfs_open_confirmargs c_arg;
+ struct nfs_open_confirmres c_res;
+ struct nfs_fattr f_attr;
+ struct nfs_fattr dir_attr;
+- struct dentry *dentry;
++ struct path path;
+ struct dentry *dir;
+ struct nfs4_state_owner *owner;
+ struct iattr attrs;
+@@ -230,11 +230,11 @@
+ int cancelled;
+ };
+
+-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
++static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ struct nfs4_state_owner *sp, int flags,
+ const struct iattr *attrs)
+ {
+- struct dentry *parent = dget_parent(dentry);
++ struct dentry *parent = dget_parent(path->dentry);
+ struct inode *dir = parent->d_inode;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs4_opendata *p;
+@@ -245,8 +245,8 @@
+ p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+ if (p->o_arg.seqid == NULL)
+ goto err_free;
+- atomic_set(&p->count, 1);
+- p->dentry = dget(dentry);
++ p->path.mnt = mntget(path->mnt);
++ p->path.dentry = dget(path->dentry);
+ p->dir = parent;
+ p->owner = sp;
+ atomic_inc(&sp->so_count);
+@@ -254,7 +254,7 @@
+ p->o_arg.open_flags = flags,
+ p->o_arg.clientid = server->nfs_client->cl_clientid;
+ p->o_arg.id = sp->so_id;
+- p->o_arg.name = &dentry->d_name;
++ p->o_arg.name = &p->path.dentry->d_name;
+ p->o_arg.server = server;
+ p->o_arg.bitmask = server->attr_bitmask;
+ p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+@@ -274,6 +274,7 @@
+ p->c_arg.fh = &p->o_res.fh;
+ p->c_arg.stateid = &p->o_res.stateid;
+ p->c_arg.seqid = p->o_arg.seqid;
++ kref_init(&p->kref);
+ return p;
+ err_free:
+ kfree(p);
+@@ -282,27 +283,23 @@
+ return NULL;
+ }
+
+-static void nfs4_opendata_free(struct nfs4_opendata *p)
++static void nfs4_opendata_free(struct kref *kref)
+ {
+- if (p != NULL && atomic_dec_and_test(&p->count)) {
++ struct nfs4_opendata *p = container_of(kref,
++ struct nfs4_opendata, kref);
++
+ nfs_free_seqid(p->o_arg.seqid);
+ nfs4_put_state_owner(p->owner);
+ dput(p->dir);
+- dput(p->dentry);
++ dput(p->path.dentry);
++ mntput(p->path.mnt);
+ kfree(p);
+- }
+ }
+
+-/* Helper for asynchronous RPC calls */
+-static int nfs4_call_async(struct rpc_clnt *clnt,
+- const struct rpc_call_ops *tk_ops, void *calldata)
++static void nfs4_opendata_put(struct nfs4_opendata *p)
+ {
+- struct rpc_task *task;
+-
+- if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
+- return -ENOMEM;
+- rpc_execute(task);
+- return 0;
++ if (p != NULL)
++ kref_put(&p->kref, nfs4_opendata_free);
+ }
+
+ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
+@@ -451,7 +448,7 @@
+ opendata->owner->so_cred,
+ &opendata->o_res);
+ }
+- nfs4_close_state(newstate, opendata->o_arg.open_flags);
++ nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags);
+ }
+ if (newstate != state)
+ return -ESTALE;
+@@ -462,7 +459,7 @@
+ * OPEN_RECLAIM:
+ * reclaim state on the server after a reboot.
+ */
+-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+ struct nfs4_opendata *opendata;
+@@ -478,7 +475,7 @@
+ }
+ delegation_type = delegation->type;
+ }
+- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
+@@ -486,17 +483,17 @@
+ nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+ opendata->o_arg.u.delegation_type = delegation_type;
+ status = nfs4_open_recover(opendata, state);
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return status;
+ }
+
+-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+- err = _nfs4_do_open_reclaim(sp, state, dentry);
++ err = _nfs4_do_open_reclaim(ctx, state);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, err, &exception);
+@@ -512,12 +509,12 @@
+ ctx = nfs4_state_find_open_context(state);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+- ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
++ ret = nfs4_do_open_reclaim(ctx, state);
+ put_nfs_open_context(ctx);
+ return ret;
+ }
+
+-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct nfs4_state_owner *sp = state->owner;
+ struct nfs4_opendata *opendata;
+@@ -525,24 +522,24 @@
+
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ return 0;
+- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+ memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+ sizeof(opendata->o_arg.u.delegation.data));
+ ret = nfs4_open_recover(opendata, state);
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return ret;
+ }
+
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct nfs4_exception exception = { };
+- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ int err;
+ do {
+- err = _nfs4_open_delegation_recall(dentry, state);
++ err = _nfs4_open_delegation_recall(ctx, state);
+ switch (err) {
+ case 0:
+ return err;
+@@ -601,9 +598,9 @@
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ state = nfs4_opendata_to_nfs4_state(data);
+ if (state != NULL)
+- nfs4_close_state(state, data->o_arg.open_flags);
++ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+- nfs4_opendata_free(data);
++ nfs4_opendata_put(data);
+ }
+
+ static const struct rpc_call_ops nfs4_open_confirm_ops = {
+@@ -621,7 +618,7 @@
+ struct rpc_task *task;
+ int status;
+
+- atomic_inc(&data->count);
++ kref_get(&data->kref);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+@@ -704,9 +701,9 @@
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ state = nfs4_opendata_to_nfs4_state(data);
+ if (state != NULL)
+- nfs4_close_state(state, data->o_arg.open_flags);
++ nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+- nfs4_opendata_free(data);
++ nfs4_opendata_put(data);
+ }
+
+ static const struct rpc_call_ops nfs4_open_ops = {
+@@ -727,7 +724,7 @@
+ struct rpc_task *task;
+ int status;
+
+- atomic_inc(&data->count);
++ kref_get(&data->kref);
+ /*
+ * If rpc_run_task() ends up calling ->rpc_release(), we
+ * want to ensure that it takes the 'error' code path.
+@@ -811,7 +808,7 @@
+ * reclaim state on the server after a network partition.
+ * Assumes caller holds the appropriate lock
+ */
+-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ struct inode *inode = state->inode;
+ struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+@@ -820,34 +817,34 @@
+ int ret;
+
+ if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+- ret = _nfs4_do_access(inode, sp->so_cred, openflags);
++ ret = _nfs4_do_access(inode, ctx->cred, openflags);
+ if (ret < 0)
+ return ret;
+ memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ return 0;
+ }
+- opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL);
+ if (opendata == NULL)
+ return -ENOMEM;
+ ret = nfs4_open_recover(opendata, state);
+ if (ret == -ESTALE) {
+ /* Invalidate the state owner so we don't ever use it again */
+- nfs4_drop_state_owner(sp);
+- d_drop(dentry);
++ nfs4_drop_state_owner(state->owner);
++ d_drop(ctx->path.dentry);
+ }
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ return ret;
+ }
+
+-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+- err = _nfs4_open_expired(sp, state, dentry);
++ err = _nfs4_open_expired(ctx, state);
+ if (err == -NFS4ERR_DELAY)
+ nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+@@ -862,7 +859,7 @@
+ ctx = nfs4_state_find_open_context(state);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+- ret = nfs4_do_open_expired(sp, state, ctx->dentry);
++ ret = nfs4_do_open_expired(ctx, state);
+ put_nfs_open_context(ctx);
+ return ret;
+ }
+@@ -953,9 +950,25 @@
+ }
+
+ /*
++ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
++ * fields corresponding to attributes that were used to store the verifier.
++ * Make sure we clobber those fields in the later setattr call
++ */
++static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
++{
++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
++ !(sattr->ia_valid & ATTR_ATIME_SET))
++ sattr->ia_valid |= ATTR_ATIME;
++
++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
++ !(sattr->ia_valid & ATTR_MTIME_SET))
++ sattr->ia_valid |= ATTR_MTIME;
++}
++
++/*
+ * Returns a referenced nfs4_state
+ */
+-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
++static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+ {
+ struct nfs4_state_owner *sp;
+ struct nfs4_state *state = NULL;
+@@ -975,27 +988,30 @@
+ goto err_put_state_owner;
+ down_read(&clp->cl_sem);
+ status = -ENOMEM;
+- opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
++ opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
+ if (opendata == NULL)
+ goto err_release_rwsem;
+
+ status = _nfs4_proc_open(opendata);
+ if (status != 0)
+- goto err_opendata_free;
++ goto err_opendata_put;
++
++ if (opendata->o_arg.open_flags & O_EXCL)
++ nfs4_exclusive_attrset(opendata, sattr);
+
+ status = -ENOMEM;
+ state = nfs4_opendata_to_nfs4_state(opendata);
+ if (state == NULL)
+- goto err_opendata_free;
++ goto err_opendata_put;
+ if (opendata->o_res.delegation_type != 0)
+ nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
+- nfs4_opendata_free(opendata);
++ nfs4_opendata_put(opendata);
+ nfs4_put_state_owner(sp);
+ up_read(&clp->cl_sem);
+ *res = state;
+ return 0;
+-err_opendata_free:
+- nfs4_opendata_free(opendata);
++err_opendata_put:
++ nfs4_opendata_put(opendata);
+ err_release_rwsem:
+ up_read(&clp->cl_sem);
+ err_put_state_owner:
+@@ -1006,14 +1022,14 @@
+ }
+
+
+-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
++static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
+ {
+ struct nfs4_exception exception = { };
+ struct nfs4_state *res;
+ int status;
+
+ do {
+- status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
++ status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
+ if (status == 0)
+ break;
+ /* NOTE: BAD_SEQID means the server and client disagree about the
+@@ -1101,6 +1117,7 @@
+ }
+
+ struct nfs4_closedata {
++ struct path path;
+ struct inode *inode;
+ struct nfs4_state *state;
+ struct nfs_closeargs arg;
+@@ -1117,6 +1134,8 @@
+ nfs4_put_open_state(calldata->state);
+ nfs_free_seqid(calldata->arg.seqid);
+ nfs4_put_state_owner(sp);
++ dput(calldata->path.dentry);
++ mntput(calldata->path.mnt);
+ kfree(calldata);
+ }
+
+@@ -1209,18 +1228,20 @@
+ *
+ * NOTE: Caller must be holding the sp->so_owner semaphore!
+ */
+-int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
++int nfs4_do_close(struct path *path, struct nfs4_state *state)
+ {
+- struct nfs_server *server = NFS_SERVER(inode);
++ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_closedata *calldata;
++ struct nfs4_state_owner *sp = state->owner;
++ struct rpc_task *task;
+ int status = -ENOMEM;
+
+ calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+ if (calldata == NULL)
+ goto out;
+- calldata->inode = inode;
++ calldata->inode = state->inode;
+ calldata->state = state;
+- calldata->arg.fh = NFS_FH(inode);
++ calldata->arg.fh = NFS_FH(state->inode);
+ calldata->arg.stateid = &state->stateid;
+ /* Serialization for the sequence id */
+ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+@@ -1229,36 +1250,44 @@
+ calldata->arg.bitmask = server->attr_bitmask;
+ calldata->res.fattr = &calldata->fattr;
+ calldata->res.server = server;
++ calldata->path.mnt = mntget(path->mnt);
++ calldata->path.dentry = dget(path->dentry);
+
+- status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
+- if (status == 0)
+- goto out;
+-
+- nfs_free_seqid(calldata->arg.seqid);
++ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
++ rpc_put_task(task);
++ return 0;
+ out_free_calldata:
+ kfree(calldata);
+ out:
++ nfs4_put_open_state(state);
++ nfs4_put_state_owner(sp);
+ return status;
+ }
+
+-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
++static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
+ {
+ struct file *filp;
+
+- filp = lookup_instantiate_filp(nd, dentry, NULL);
++ filp = lookup_instantiate_filp(nd, path->dentry, NULL);
+ if (!IS_ERR(filp)) {
+ struct nfs_open_context *ctx;
+ ctx = (struct nfs_open_context *)filp->private_data;
+ ctx->state = state;
+ return 0;
+ }
+- nfs4_close_state(state, nd->intent.open.flags);
++ nfs4_close_state(path, state, nd->intent.open.flags);
+ return PTR_ERR(filp);
+ }
+
+ struct dentry *
+ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct iattr attr;
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+@@ -1277,7 +1306,7 @@
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ if (IS_ERR(cred))
+ return (struct dentry *)cred;
+- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ if (PTR_ERR(state) == -ENOENT)
+@@ -1287,13 +1316,17 @@
+ res = d_add_unique(dentry, igrab(state->inode));
+ if (res != NULL)
+ dentry = res;
+- nfs4_intent_set_file(nd, dentry, state);
++ nfs4_intent_set_file(nd, &path, state);
+ return res;
+ }
+
+ int
+ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+
+@@ -1302,7 +1335,7 @@
+ return PTR_ERR(cred);
+ state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+ if (IS_ERR(state))
+- state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
++ state = nfs4_do_open(dir, &path, openflags, NULL, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ switch (PTR_ERR(state)) {
+@@ -1318,10 +1351,10 @@
+ }
+ }
+ if (state->inode == dentry->d_inode) {
+- nfs4_intent_set_file(nd, dentry, state);
++ nfs4_intent_set_file(nd, &path, state);
+ return 1;
+ }
+- nfs4_close_state(state, openflags);
++ nfs4_close_state(&path, state, openflags);
+ out_drop:
+ d_drop(dentry);
+ return 0;
+@@ -1752,6 +1785,10 @@
+ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags, struct nameidata *nd)
+ {
++ struct path path = {
++ .mnt = nd->mnt,
++ .dentry = dentry,
++ };
+ struct nfs4_state *state;
+ struct rpc_cred *cred;
+ int status = 0;
+@@ -1761,7 +1798,7 @@
+ status = PTR_ERR(cred);
+ goto out;
+ }
+- state = nfs4_do_open(dir, dentry, flags, sattr, cred);
++ state = nfs4_do_open(dir, &path, flags, sattr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ status = PTR_ERR(state);
+@@ -1773,11 +1810,12 @@
+ status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
+ if (status == 0)
+ nfs_setattr_update_inode(state->inode, sattr);
++ nfs_post_op_update_inode(state->inode, &fattr);
+ }
+- if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+- status = nfs4_intent_set_file(nd, dentry, state);
++ if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
++ status = nfs4_intent_set_file(nd, &path, state);
+ else
+- nfs4_close_state(state, flags);
++ nfs4_close_state(&path, state, flags);
+ out:
+ return status;
+ }
+@@ -3285,7 +3323,7 @@
+ memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+ sizeof(data->lsp->ls_stateid.data));
+ data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+- renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
++ renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ }
+ nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
+ out:
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4state.c linux-2.6.22-591/fs/nfs/nfs4state.c
+--- linux-2.6.22-570/fs/nfs/nfs4state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/nfs4state.c 2007-12-21 15:36:12.000000000 -0500
+@@ -341,7 +341,7 @@
+ /*
+ * Close the current file.
+ */
+-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
++void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+ {
+ struct inode *inode = state->inode;
+ struct nfs4_state_owner *owner = state->owner;
+@@ -375,10 +375,11 @@
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&owner->so_lock);
+
+- if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+- return;
++ if (oldstate == newstate) {
+ nfs4_put_open_state(state);
+ nfs4_put_state_owner(owner);
++ } else
++ nfs4_do_close(path, state);
+ }
+
+ /*
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4xdr.c linux-2.6.22-591/fs/nfs/nfs4xdr.c
+--- linux-2.6.22-570/fs/nfs/nfs4xdr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/nfs4xdr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -3269,7 +3269,7 @@
+ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+ {
+ __be32 *p;
+- uint32_t bmlen;
++ uint32_t savewords, bmlen, i;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OPEN);
+@@ -3287,7 +3287,12 @@
+ goto xdr_error;
+
+ READ_BUF(bmlen << 2);
+- p += bmlen;
++ savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
++ for (i = 0; i < savewords; ++i)
++ READ32(res->attrset[i]);
++
++ p += (bmlen - savewords);
++
+ return decode_delegation(xdr, res);
+ xdr_error:
+ dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
+diff -Nurb linux-2.6.22-570/fs/nfs/pagelist.c linux-2.6.22-591/fs/nfs/pagelist.c
+--- linux-2.6.22-570/fs/nfs/pagelist.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/pagelist.c 2007-12-21 15:36:12.000000000 -0500
+@@ -85,9 +85,8 @@
+ req->wb_offset = offset;
+ req->wb_pgbase = offset;
+ req->wb_bytes = count;
+- atomic_set(&req->wb_count, 1);
+ req->wb_context = get_nfs_open_context(ctx);
+-
++ kref_init(&req->wb_kref);
+ return req;
+ }
+
+@@ -109,29 +108,29 @@
+ }
+
+ /**
+- * nfs_set_page_writeback_locked - Lock a request for writeback
++ * nfs_set_page_tag_locked - Tag a request as locked
+ * @req:
+ */
+-int nfs_set_page_writeback_locked(struct nfs_page *req)
++static int nfs_set_page_tag_locked(struct nfs_page *req)
+ {
+- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
+
+ if (!nfs_lock_request(req))
+ return 0;
+- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
++ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ return 1;
+ }
+
+ /**
+- * nfs_clear_page_writeback - Unlock request and wake up sleepers
++ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
+ */
+-void nfs_clear_page_writeback(struct nfs_page *req)
++void nfs_clear_page_tag_locked(struct nfs_page *req)
+ {
+- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
+
+ if (req->wb_page != NULL) {
+ spin_lock(&nfsi->req_lock);
+- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
++ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ spin_unlock(&nfsi->req_lock);
+ }
+ nfs_unlock_request(req);
+@@ -160,11 +159,9 @@
+ *
+ * Note: Should never be called with the spinlock held!
+ */
+-void
+-nfs_release_request(struct nfs_page *req)
++static void nfs_free_request(struct kref *kref)
+ {
+- if (!atomic_dec_and_test(&req->wb_count))
+- return;
++ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+
+ /* Release struct file or cached credential */
+ nfs_clear_request(req);
+@@ -172,6 +169,11 @@
+ nfs_page_free(req);
+ }
+
++void nfs_release_request(struct nfs_page *req)
++{
++ kref_put(&req->wb_kref, nfs_free_request);
++}
++
+ static int nfs_wait_bit_interruptible(void *word)
+ {
+ int ret = 0;
+@@ -193,7 +195,7 @@
+ int
+ nfs_wait_on_request(struct nfs_page *req)
+ {
+- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
++ struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
+ sigset_t oldmask;
+ int ret = 0;
+
+@@ -379,10 +381,10 @@
+ /**
+ * nfs_scan_list - Scan a list for matching requests
+ * @nfsi: NFS inode
+- * @head: One of the NFS inode request lists
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
++ * @tag: tag to scan for
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+@@ -390,9 +392,9 @@
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
++int nfs_scan_list(struct nfs_inode *nfsi,
+ struct list_head *dst, pgoff_t idx_start,
+- unsigned int npages)
++ unsigned int npages, int tag)
+ {
+ struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ struct nfs_page *req;
+@@ -407,9 +409,9 @@
+ idx_end = idx_start + npages - 1;
+
+ for (;;) {
+- found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
++ found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+ (void **)&pgvec[0], idx_start,
+- NFS_SCAN_MAXENTRIES);
++ NFS_SCAN_MAXENTRIES, tag);
+ if (found <= 0)
+ break;
+ for (i = 0; i < found; i++) {
+@@ -417,15 +419,18 @@
+ if (req->wb_index > idx_end)
+ goto out;
+ idx_start = req->wb_index + 1;
+- if (req->wb_list_head != head)
+- continue;
+- if (nfs_set_page_writeback_locked(req)) {
++ if (nfs_set_page_tag_locked(req)) {
+ nfs_list_remove_request(req);
++ radix_tree_tag_clear(&nfsi->nfs_page_tree,
++ req->wb_index, tag);
+ nfs_list_add_request(req, dst);
+ res++;
++ if (res == INT_MAX)
++ goto out;
+ }
+ }
+-
++ /* for latency reduction */
++ cond_resched_lock(&nfsi->req_lock);
+ }
+ out:
+ return res;
+diff -Nurb linux-2.6.22-570/fs/nfs/read.c linux-2.6.22-591/fs/nfs/read.c
+--- linux-2.6.22-570/fs/nfs/read.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/read.c 2007-12-21 15:36:12.000000000 -0500
+@@ -145,8 +145,8 @@
+ unlock_page(req->wb_page);
+
+ dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ nfs_clear_request(req);
+@@ -164,7 +164,7 @@
+ int flags;
+
+ data->req = req;
+- data->inode = inode = req->wb_context->dentry->d_inode;
++ data->inode = inode = req->wb_context->path.dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+@@ -483,17 +483,19 @@
+ */
+ error = nfs_wb_page(inode, page);
+ if (error)
+- goto out_error;
++ goto out_unlock;
++ if (PageUptodate(page))
++ goto out_unlock;
+
+ error = -ESTALE;
+ if (NFS_STALE(inode))
+- goto out_error;
++ goto out_unlock;
+
+ if (file == NULL) {
+ error = -EBADF;
+ ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ if (ctx == NULL)
+- goto out_error;
++ goto out_unlock;
+ } else
+ ctx = get_nfs_open_context((struct nfs_open_context *)
+ file->private_data);
+@@ -502,8 +504,7 @@
+
+ put_nfs_open_context(ctx);
+ return error;
+-
+-out_error:
++out_unlock:
+ unlock_page(page);
+ return error;
+ }
+@@ -520,21 +521,32 @@
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *new;
+ unsigned int len;
++ int error;
++
++ error = nfs_wb_page(inode, page);
++ if (error)
++ goto out_unlock;
++ if (PageUptodate(page))
++ goto out_unlock;
+
+- nfs_wb_page(inode, page);
+ len = nfs_page_length(page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
++
+ new = nfs_create_request(desc->ctx, inode, page, 0, len);
+- if (IS_ERR(new)) {
+- SetPageError(page);
+- unlock_page(page);
+- return PTR_ERR(new);
+- }
++ if (IS_ERR(new))
++ goto out_error;
++
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+ nfs_pageio_add_request(desc->pgio, new);
+ return 0;
++out_error:
++ error = PTR_ERR(new);
++ SetPageError(page);
++out_unlock:
++ unlock_page(page);
++ return error;
+ }
+
+ int nfs_readpages(struct file *filp, struct address_space *mapping,
+diff -Nurb linux-2.6.22-570/fs/nfs/super.c linux-2.6.22-591/fs/nfs/super.c
+--- linux-2.6.22-570/fs/nfs/super.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/nfs/super.c 2007-12-21 15:36:12.000000000 -0500
+@@ -292,6 +292,7 @@
+ { NFS_MOUNT_NONLM, ",nolock", "" },
+ { NFS_MOUNT_NOACL, ",noacl", "" },
+ { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
++ { NFS_MOUNT_UNSHARED, ",nosharecache", ""},
+ { NFS_MOUNT_TAGGED, ",tag", "" },
+ { 0, NULL, NULL }
+ };
+@@ -432,7 +433,20 @@
+ */
+ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
+ {
++ struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
++ struct rpc_clnt *rpc;
++
+ shrink_submounts(vfsmnt, &nfs_automount_list);
++
++ if (!(flags & MNT_FORCE))
++ return;
++ /* -EIO all pending I/O */
++ rpc = server->client_acl;
++ if (!IS_ERR(rpc))
++ rpc_killall_tasks(rpc);
++ rpc = server->client;
++ if (!IS_ERR(rpc))
++ rpc_killall_tasks(rpc);
+ }
+
+ /*
+@@ -602,13 +616,51 @@
+ {
+ struct nfs_server *server = data, *old = NFS_SB(sb);
+
+- if (old->nfs_client != server->nfs_client)
++ if (memcmp(&old->nfs_client->cl_addr,
++ &server->nfs_client->cl_addr,
++ sizeof(old->nfs_client->cl_addr)) != 0)
++ return 0;
++ /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
++ if (old->flags & NFS_MOUNT_UNSHARED)
+ return 0;
+ if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+ return 0;
+ return 1;
+ }
+
++#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
++
++static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
++{
++ const struct nfs_server *a = s->s_fs_info;
++ const struct rpc_clnt *clnt_a = a->client;
++ const struct rpc_clnt *clnt_b = b->client;
++
++ if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
++ goto Ebusy;
++ if (a->nfs_client != b->nfs_client)
++ goto Ebusy;
++ if (a->flags != b->flags)
++ goto Ebusy;
++ if (a->wsize != b->wsize)
++ goto Ebusy;
++ if (a->rsize != b->rsize)
++ goto Ebusy;
++ if (a->acregmin != b->acregmin)
++ goto Ebusy;
++ if (a->acregmax != b->acregmax)
++ goto Ebusy;
++ if (a->acdirmin != b->acdirmin)
++ goto Ebusy;
++ if (a->acdirmax != b->acdirmax)
++ goto Ebusy;
++ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
++ goto Ebusy;
++ return 0;
++Ebusy:
++ return -EBUSY;
++}
++
+ static int nfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+ {
+@@ -617,6 +669,7 @@
+ struct nfs_fh mntfh;
+ struct nfs_mount_data *data = raw_data;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ int error;
+
+ /* Validate the mount data */
+@@ -631,16 +684,22 @@
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+ }
+
+ if (s->s_fs_info != server) {
++ error = nfs_compare_mount_options(s, server, flags);
+ nfs_free_server(server);
+ server = NULL;
++ if (error < 0)
++ goto error_splat_super;
+ }
+
+ if (!s->s_root) {
+@@ -693,6 +752,7 @@
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs_xdev_get_sb()\n");
+@@ -704,8 +764,11 @@
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+@@ -810,6 +873,7 @@
+ struct dentry *mntroot;
+ char *mntpath = NULL, *hostname = NULL, ip_addr[16];
+ void *p;
++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ int error;
+
+ if (data == NULL) {
+@@ -881,16 +945,22 @@
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_free;
+ }
+
+ if (s->s_fs_info != server) {
++ error = nfs_compare_mount_options(s, server, flags);
+ nfs_free_server(server);
+ server = NULL;
++ if (error < 0)
++ goto error_splat_super;
+ }
+
+ if (!s->s_root) {
+@@ -951,6 +1021,7 @@
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs4_xdev_get_sb()\n");
+@@ -962,8 +1033,11 @@
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+@@ -1018,6 +1092,7 @@
+ struct nfs_server *server;
+ struct dentry *mntroot;
+ struct nfs_fh mntfh;
++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ int error;
+
+ dprintk("--> nfs4_referral_get_sb()\n");
+@@ -1029,8 +1104,11 @@
+ goto out_err_noserver;
+ }
+
++ if (server->flags & NFS4_MOUNT_UNSHARED)
++ compare_super = NULL;
++
+ /* Get a superblock - note that we may end up sharing one that already exists */
+- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+diff -Nurb linux-2.6.22-570/fs/nfs/write.c linux-2.6.22-591/fs/nfs/write.c
+--- linux-2.6.22-570/fs/nfs/write.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfs/write.c 2007-12-21 15:36:12.000000000 -0500
+@@ -117,7 +117,7 @@
+ if (PagePrivate(page)) {
+ req = (struct nfs_page *)page_private(page);
+ if (req != NULL)
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ }
+ return req;
+ }
+@@ -191,8 +191,6 @@
+ }
+ /* Update file length */
+ nfs_grow_file(page, offset, count);
+- /* Set the PG_uptodate flag? */
+- nfs_mark_uptodate(page, offset, count);
+ nfs_unlock_request(req);
+ return 0;
+ }
+@@ -291,7 +289,7 @@
+ BUG();
+ }
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+- NFS_PAGE_TAG_WRITEBACK);
++ NFS_PAGE_TAG_LOCKED);
+ ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+ spin_unlock(req_lock);
+ nfs_pageio_add_request(pgio, req);
+@@ -400,7 +398,7 @@
+ if (PageDirty(req->wb_page))
+ set_bit(PG_NEED_FLUSH, &req->wb_flags);
+ nfsi->npages++;
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ return 0;
+ }
+
+@@ -409,7 +407,7 @@
+ */
+ static void nfs_inode_remove_request(struct nfs_page *req)
+ {
+- struct inode *inode = req->wb_context->dentry->d_inode;
++ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ BUG_ON (!NFS_WBACK_BUSY(req));
+@@ -457,13 +455,15 @@
+ static void
+ nfs_mark_request_commit(struct nfs_page *req)
+ {
+- struct inode *inode = req->wb_context->dentry->d_inode;
++ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ spin_lock(&nfsi->req_lock);
+- nfs_list_add_request(req, &nfsi->commit);
+ nfsi->ncommit++;
+ set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
++ radix_tree_tag_set(&nfsi->nfs_page_tree,
++ req->wb_index,
++ NFS_PAGE_TAG_COMMIT);
+ spin_unlock(&nfsi->req_lock);
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+@@ -526,14 +526,14 @@
+ idx_end = idx_start + npages - 1;
+
+ next = idx_start;
+- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
++ while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
+ if (req->wb_index > idx_end)
+ break;
+
+ next = req->wb_index + 1;
+ BUG_ON(!NFS_WBACK_BUSY(req));
+
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ spin_unlock(&nfsi->req_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+@@ -577,10 +577,9 @@
+ int res = 0;
+
+ if (nfsi->ncommit != 0) {
+- res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
++ res = nfs_scan_list(nfsi, dst, idx_start, npages,
++ NFS_PAGE_TAG_COMMIT);
+ nfsi->ncommit -= res;
+- if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+- printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ }
+ return res;
+ }
+@@ -751,12 +750,17 @@
+ static void nfs_writepage_release(struct nfs_page *req)
+ {
+
+- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
++ if (PageError(req->wb_page)) {
++ nfs_end_page_writeback(req->wb_page);
++ nfs_inode_remove_request(req);
++ } else if (!nfs_reschedule_unstable_write(req)) {
++ /* Set the PG_uptodate flag */
++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
+ nfs_end_page_writeback(req->wb_page);
+ nfs_inode_remove_request(req);
+ } else
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+
+ static inline int flush_task_priority(int how)
+@@ -786,7 +790,7 @@
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->req = req;
+- data->inode = inode = req->wb_context->dentry->d_inode;
++ data->inode = inode = req->wb_context->path.dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+@@ -885,7 +889,7 @@
+ }
+ nfs_redirty_request(req);
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ return -ENOMEM;
+ }
+
+@@ -928,7 +932,7 @@
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ nfs_end_page_writeback(req->wb_page);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ return -ENOMEM;
+ }
+@@ -954,8 +958,8 @@
+ struct page *page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+@@ -1020,8 +1024,8 @@
+ page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+@@ -1039,12 +1043,14 @@
+ dprintk(" marked for commit\n");
+ goto next;
+ }
++ /* Set the PG_uptodate flag? */
++ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ dprintk(" OK\n");
+ remove_request:
+ nfs_end_page_writeback(page);
+ nfs_inode_remove_request(req);
+ next:
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ }
+
+@@ -1157,7 +1163,7 @@
+
+ list_splice_init(head, &data->pages);
+ first = nfs_list_entry(data->pages.next);
+- inode = first->wb_context->dentry->d_inode;
++ inode = first->wb_context->path.dentry->d_inode;
+
+ data->inode = inode;
+ data->cred = first->wb_context->cred;
+@@ -1207,7 +1213,7 @@
+ nfs_list_remove_request(req);
+ nfs_mark_request_commit(req);
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ return -ENOMEM;
+ }
+@@ -1234,8 +1240,8 @@
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+
+ dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+- req->wb_context->dentry->d_inode->i_sb->s_id,
+- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++ req->wb_context->path.dentry->d_inode->i_sb->s_id,
++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ if (task->tk_status < 0) {
+@@ -1249,6 +1255,9 @@
+ * returned by the server against all stored verfs. */
+ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ /* We have a match */
++ /* Set the PG_uptodate flag */
++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
++ req->wb_bytes);
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+@@ -1257,7 +1266,7 @@
+ dprintk(" mismatch\n");
+ nfs_redirty_request(req);
+ next:
+- nfs_clear_page_writeback(req);
++ nfs_clear_page_tag_locked(req);
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4callback.c linux-2.6.22-591/fs/nfsd/nfs4callback.c
+--- linux-2.6.22-570/fs/nfsd/nfs4callback.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfsd/nfs4callback.c 2007-12-21 15:36:12.000000000 -0500
+@@ -429,29 +429,23 @@
+ goto out_err;
+ }
+
+- /* Kick rpciod, put the call on the wire. */
+- if (rpciod_up() != 0)
+- goto out_clnt;
+-
+ /* the task holds a reference to the nfs4_client struct */
+ atomic_inc(&clp->cl_count);
+
+ msg.rpc_cred = nfsd4_lookupcred(clp,0);
+ if (IS_ERR(msg.rpc_cred))
+- goto out_rpciod;
++ goto out_release_clp;
+ status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+ put_rpccred(msg.rpc_cred);
+
+ if (status != 0) {
+ dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+- goto out_rpciod;
++ goto out_release_clp;
+ }
+ return;
+
+-out_rpciod:
++out_release_clp:
+ atomic_dec(&clp->cl_count);
+- rpciod_down();
+-out_clnt:
+ rpc_shutdown_client(cb->cb_client);
+ out_err:
+ cb->cb_client = NULL;
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4state.c linux-2.6.22-591/fs/nfsd/nfs4state.c
+--- linux-2.6.22-570/fs/nfsd/nfs4state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfsd/nfs4state.c 2007-12-21 15:36:12.000000000 -0500
+@@ -378,7 +378,6 @@
+ if (clnt) {
+ clp->cl_callback.cb_client = NULL;
+ rpc_shutdown_client(clnt);
+- rpciod_down();
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfssvc.c linux-2.6.22-591/fs/nfsd/nfssvc.c
+--- linux-2.6.22-570/fs/nfsd/nfssvc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/nfsd/nfssvc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/slab.h>
+ #include <linux/smp.h>
+ #include <linux/smp_lock.h>
++#include <linux/freezer.h>
+ #include <linux/fs_struct.h>
+
+ #include <linux/sunrpc/types.h>
+@@ -432,6 +433,7 @@
+ * dirty pages.
+ */
+ current->flags |= PF_LESS_THROTTLE;
++ set_freezable();
+
+ /*
+ * The main request loop
+diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.c linux-2.6.22-591/fs/ocfs2/alloc.c
+--- linux-2.6.22-570/fs/ocfs2/alloc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/alloc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -50,6 +50,8 @@
+ #include "buffer_head_io.h"
+
+ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
++ struct ocfs2_extent_block *eb);
+
+ /*
+ * Structures which describe a path through a btree, and functions to
+@@ -117,6 +119,31 @@
+ }
+
+ /*
++ * All the elements of src into dest. After this call, src could be freed
++ * without affecting dest.
++ *
++ * Both paths should have the same root. Any non-root elements of dest
++ * will be freed.
++ */
++static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
++{
++ int i;
++
++ BUG_ON(path_root_bh(dest) != path_root_bh(src));
++ BUG_ON(path_root_el(dest) != path_root_el(src));
++
++ ocfs2_reinit_path(dest, 1);
++
++ for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
++ dest->p_node[i].bh = src->p_node[i].bh;
++ dest->p_node[i].el = src->p_node[i].el;
++
++ if (dest->p_node[i].bh)
++ get_bh(dest->p_node[i].bh);
++ }
++}
++
++/*
+ * Make the *dest path the same as src and re-initialize src path to
+ * have a root only.
+ */
+@@ -212,10 +239,41 @@
+ return ret;
+ }
+
++/*
++ * Return the index of the extent record which contains cluster #v_cluster.
++ * -1 is returned if it was not found.
++ *
++ * Should work fine on interior and exterior nodes.
++ */
++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
++{
++ int ret = -1;
++ int i;
++ struct ocfs2_extent_rec *rec;
++ u32 rec_end, rec_start, clusters;
++
++ for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
++ rec = &el->l_recs[i];
++
++ rec_start = le32_to_cpu(rec->e_cpos);
++ clusters = ocfs2_rec_clusters(el, rec);
++
++ rec_end = rec_start + clusters;
++
++ if (v_cluster >= rec_start && v_cluster < rec_end) {
++ ret = i;
++ break;
++ }
++ }
++
++ return ret;
++}
++
+ enum ocfs2_contig_type {
+ CONTIG_NONE = 0,
+ CONTIG_LEFT,
+- CONTIG_RIGHT
++ CONTIG_RIGHT,
++ CONTIG_LEFTRIGHT,
+ };
+
+
+@@ -253,6 +311,14 @@
+ {
+ u64 blkno = le64_to_cpu(insert_rec->e_blkno);
+
++ /*
++ * Refuse to coalesce extent records with different flag
++ * fields - we don't want to mix unwritten extents with user
++ * data.
++ */
++ if (ext->e_flags != insert_rec->e_flags)
++ return CONTIG_NONE;
++
+ if (ocfs2_extents_adjacent(ext, insert_rec) &&
+ ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
+ return CONTIG_RIGHT;
+@@ -277,7 +343,14 @@
+ APPEND_TAIL,
+ };
+
++enum ocfs2_split_type {
++ SPLIT_NONE = 0,
++ SPLIT_LEFT,
++ SPLIT_RIGHT,
++};
++
+ struct ocfs2_insert_type {
++ enum ocfs2_split_type ins_split;
+ enum ocfs2_append_type ins_appending;
+ enum ocfs2_contig_type ins_contig;
+ int ins_contig_index;
+@@ -285,6 +358,13 @@
+ int ins_tree_depth;
+ };
+
++struct ocfs2_merge_ctxt {
++ enum ocfs2_contig_type c_contig_type;
++ int c_has_empty_extent;
++ int c_split_covers_rec;
++ int c_used_tail_recs;
++};
++
+ /*
+ * How many free extents have we got before we need more meta data?
+ */
+@@ -384,13 +464,7 @@
+ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+ eb->h_blkno = cpu_to_le64(first_blkno);
+ eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
+-
+-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
+- /* we always use slot zero's suballocator */
+- eb->h_suballoc_slot = 0;
+-#else
+ eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
+-#endif
+ eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+ eb->h_list.l_count =
+ cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
+@@ -461,7 +535,7 @@
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ struct buffer_head *eb_bh,
+- struct buffer_head *last_eb_bh,
++ struct buffer_head **last_eb_bh,
+ struct ocfs2_alloc_context *meta_ac)
+ {
+ int status, new_blocks, i;
+@@ -476,7 +550,7 @@
+
+ mlog_entry_void();
+
+- BUG_ON(!last_eb_bh);
++ BUG_ON(!last_eb_bh || !*last_eb_bh);
+
+ fe = (struct ocfs2_dinode *) fe_bh->b_data;
+
+@@ -507,7 +581,7 @@
+ goto bail;
+ }
+
+- eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
++ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
+ new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
+
+ /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
+@@ -568,7 +642,7 @@
+ * journal_dirty erroring as it won't unless we've aborted the
+ * handle (in which case we would never be here) so reserving
+ * the write with journal_access is all we need to do. */
+- status = ocfs2_journal_access(handle, inode, last_eb_bh,
++ status = ocfs2_journal_access(handle, inode, *last_eb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+@@ -601,10 +675,10 @@
+ * next_leaf on the previously last-extent-block. */
+ fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
+
+- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
++ eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
+ eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
+
+- status = ocfs2_journal_dirty(handle, last_eb_bh);
++ status = ocfs2_journal_dirty(handle, *last_eb_bh);
+ if (status < 0)
+ mlog_errno(status);
+ status = ocfs2_journal_dirty(handle, fe_bh);
+@@ -616,6 +690,14 @@
+ mlog_errno(status);
+ }
+
++ /*
++ * Some callers want to track the rightmost leaf so pass it
++ * back here.
++ */
++ brelse(*last_eb_bh);
++ get_bh(new_eb_bhs[0]);
++ *last_eb_bh = new_eb_bhs[0];
++
+ status = 0;
+ bail:
+ if (new_eb_bhs) {
+@@ -829,6 +911,87 @@
+ }
+
+ /*
++ * Grow a b-tree so that it has more records.
++ *
++ * We might shift the tree depth in which case existing paths should
++ * be considered invalid.
++ *
++ * Tree depth after the grow is returned via *final_depth.
++ *
++ * *last_eb_bh will be updated by ocfs2_add_branch().
++ */
++static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
++ struct buffer_head *di_bh, int *final_depth,
++ struct buffer_head **last_eb_bh,
++ struct ocfs2_alloc_context *meta_ac)
++{
++ int ret, shift;
++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++ int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++ struct buffer_head *bh = NULL;
++
++ BUG_ON(meta_ac == NULL);
++
++ shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
++ if (shift < 0) {
++ ret = shift;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ /* We traveled all the way to the bottom of the allocation tree
++ * and didn't find room for any more extents - we need to add
++ * another tree level */
++ if (shift) {
++ BUG_ON(bh);
++ mlog(0, "need to shift tree depth (current = %d)\n", depth);
++
++ /* ocfs2_shift_tree_depth will return us a buffer with
++ * the new extent block (so we can pass that to
++ * ocfs2_add_branch). */
++ ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
++ meta_ac, &bh);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
++ }
++ depth++;
++ if (depth == 1) {
++ /*
++ * Special case: we have room now if we shifted from
++ * tree_depth 0, so no more work needs to be done.
++ *
++ * We won't be calling add_branch, so pass
++ * back *last_eb_bh as the new leaf. At depth
++ * zero, it should always be null so there's
++ * no reason to brelse.
++ */
++ BUG_ON(*last_eb_bh);
++ get_bh(bh);
++ *last_eb_bh = bh;
++ goto out;
++ }
++ }
++
++ /* call ocfs2_add_branch to add the final part of the tree with
++ * the new data. */
++ mlog(0, "add branch. bh = %p\n", bh);
++ ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
++ meta_ac);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++out:
++ if (final_depth)
++ *final_depth = depth;
++ brelse(bh);
++ return ret;
++}
++
++/*
+ * This is only valid for leaf nodes, which are the only ones that can
+ * have empty extents anyway.
+ */
+@@ -934,6 +1097,22 @@
+
+ }
+
++static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
++{
++ int size, num_recs = le16_to_cpu(el->l_next_free_rec);
++
++ BUG_ON(num_recs == 0);
++
++ if (ocfs2_is_empty_extent(&el->l_recs[0])) {
++ num_recs--;
++ size = num_recs * sizeof(struct ocfs2_extent_rec);
++ memmove(&el->l_recs[0], &el->l_recs[1], size);
++ memset(&el->l_recs[num_recs], 0,
++ sizeof(struct ocfs2_extent_rec));
++ el->l_next_free_rec = cpu_to_le16(num_recs);
++ }
++}
++
+ /*
+ * Create an empty extent record .
+ *
+@@ -1211,6 +1390,10 @@
+ * immediately to their right.
+ */
+ left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
++ if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
++ BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
++ left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
++ }
+ left_clusters -= le32_to_cpu(left_rec->e_cpos);
+ left_rec->e_int_clusters = cpu_to_le32(left_clusters);
+
+@@ -1531,10 +1714,16 @@
+ return ret;
+ }
+
++/*
++ * Extend the transaction by enough credits to complete the rotation,
++ * and still leave at least the original number of credits allocated
++ * to this transaction.
++ */
+ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
++ int op_credits,
+ struct ocfs2_path *path)
+ {
+- int credits = (path->p_tree_depth - subtree_depth) * 2 + 1;
++ int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
+
+ if (handle->h_buffer_credits < credits)
+ return ocfs2_extend_trans(handle, credits);
+@@ -1568,6 +1757,29 @@
+ return 0;
+ }
+
++static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
++{
++ int next_free = le16_to_cpu(el->l_next_free_rec);
++ unsigned int range;
++ struct ocfs2_extent_rec *rec;
++
++ if (next_free == 0)
++ return 0;
++
++ rec = &el->l_recs[0];
++ if (ocfs2_is_empty_extent(rec)) {
++ /* Empty list. */
++ if (next_free == 1)
++ return 0;
++ rec = &el->l_recs[1];
++ }
++
++ range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
++ if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
++ return 1;
++ return 0;
++}
++
+ /*
+ * Rotate all the records in a btree right one record, starting at insert_cpos.
+ *
+@@ -1586,11 +1798,12 @@
+ */
+ static int ocfs2_rotate_tree_right(struct inode *inode,
+ handle_t *handle,
++ enum ocfs2_split_type split,
+ u32 insert_cpos,
+ struct ocfs2_path *right_path,
+ struct ocfs2_path **ret_left_path)
+ {
+- int ret, start;
++ int ret, start, orig_credits = handle->h_buffer_credits;
+ u32 cpos;
+ struct ocfs2_path *left_path = NULL;
+
+@@ -1657,9 +1870,9 @@
+ (unsigned long long)
+ path_leaf_bh(left_path)->b_blocknr);
+
+- if (ocfs2_rotate_requires_path_adjustment(left_path,
++ if (split == SPLIT_NONE &&
++ ocfs2_rotate_requires_path_adjustment(left_path,
+ insert_cpos)) {
+- mlog(0, "Path adjustment required\n");
+
+ /*
+ * We've rotated the tree as much as we
+@@ -1687,7 +1900,7 @@
+ right_path->p_tree_depth);
+
+ ret = ocfs2_extend_rotate_transaction(handle, start,
+- right_path);
++ orig_credits, right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+@@ -1700,6 +1913,24 @@
+ goto out;
+ }
+
++ if (split != SPLIT_NONE &&
++ ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
++ insert_cpos)) {
++ /*
++ * A rotate moves the rightmost left leaf
++ * record over to the leftmost right leaf
++ * slot. If we're doing an extent split
++ * instead of a real insert, then we have to
++ * check that the extent to be split wasn't
++ * just moved over. If it was, then we can
++ * exit here, passing left_path back -
++ * ocfs2_split_extent() is smart enough to
++ * search both leaves.
++ */
++ *ret_left_path = left_path;
++ goto out_ret_path;
++ }
++
+ /*
+ * There is no need to re-read the next right path
+ * as we know that it'll be our current left
+@@ -1722,124 +1953,935 @@
+ return ret;
+ }
+
+-/*
+- * Do the final bits of extent record insertion at the target leaf
+- * list. If this leaf is part of an allocation tree, it is assumed
+- * that the tree above has been prepared.
+- */
+-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+- struct ocfs2_extent_list *el,
+- struct ocfs2_insert_type *insert,
+- struct inode *inode)
++static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
++ struct ocfs2_path *path)
+ {
+- int i = insert->ins_contig_index;
+- unsigned int range;
++ int i, idx;
+ struct ocfs2_extent_rec *rec;
++ struct ocfs2_extent_list *el;
++ struct ocfs2_extent_block *eb;
++ u32 range;
+
+- BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
++ /* Path should always be rightmost. */
++ eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
++ BUG_ON(eb->h_next_leaf_blk != 0ULL);
+
+- /*
+- * Contiguous insert - either left or right.
+- */
+- if (insert->ins_contig != CONTIG_NONE) {
+- rec = &el->l_recs[i];
+- if (insert->ins_contig == CONTIG_LEFT) {
+- rec->e_blkno = insert_rec->e_blkno;
+- rec->e_cpos = insert_rec->e_cpos;
+- }
+- le16_add_cpu(&rec->e_leaf_clusters,
+- le16_to_cpu(insert_rec->e_leaf_clusters));
+- return;
+- }
++ el = &eb->h_list;
++ BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
++ idx = le16_to_cpu(el->l_next_free_rec) - 1;
++ rec = &el->l_recs[idx];
++ range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
+
+- /*
+- * Handle insert into an empty leaf.
+- */
+- if (le16_to_cpu(el->l_next_free_rec) == 0 ||
+- ((le16_to_cpu(el->l_next_free_rec) == 1) &&
+- ocfs2_is_empty_extent(&el->l_recs[0]))) {
+- el->l_recs[0] = *insert_rec;
+- el->l_next_free_rec = cpu_to_le16(1);
+- return;
+- }
++ for (i = 0; i < path->p_tree_depth; i++) {
++ el = path->p_node[i].el;
++ idx = le16_to_cpu(el->l_next_free_rec) - 1;
++ rec = &el->l_recs[idx];
+
+- /*
+- * Appending insert.
+- */
+- if (insert->ins_appending == APPEND_TAIL) {
+- i = le16_to_cpu(el->l_next_free_rec) - 1;
+- rec = &el->l_recs[i];
+- range = le32_to_cpu(rec->e_cpos)
+- + le16_to_cpu(rec->e_leaf_clusters);
+- BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
++ rec->e_int_clusters = cpu_to_le32(range);
++ le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
+
+- mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
+- le16_to_cpu(el->l_count),
+- "inode %lu, depth %u, count %u, next free %u, "
+- "rec.cpos %u, rec.clusters %u, "
+- "insert.cpos %u, insert.clusters %u\n",
+- inode->i_ino,
+- le16_to_cpu(el->l_tree_depth),
+- le16_to_cpu(el->l_count),
+- le16_to_cpu(el->l_next_free_rec),
+- le32_to_cpu(el->l_recs[i].e_cpos),
+- le16_to_cpu(el->l_recs[i].e_leaf_clusters),
+- le32_to_cpu(insert_rec->e_cpos),
+- le16_to_cpu(insert_rec->e_leaf_clusters));
+- i++;
+- el->l_recs[i] = *insert_rec;
+- le16_add_cpu(&el->l_next_free_rec, 1);
+- return;
++ ocfs2_journal_dirty(handle, path->p_node[i].bh);
+ }
+-
+- /*
+- * Ok, we have to rotate.
+- *
+- * At this point, it is safe to assume that inserting into an
+- * empty leaf and appending to a leaf have both been handled
+- * above.
+- *
+- * This leaf needs to have space, either by the empty 1st
+- * extent record, or by virtue of an l_next_rec < l_count.
+- */
+- ocfs2_rotate_leaf(el, insert_rec);
+-}
+-
+-static inline void ocfs2_update_dinode_clusters(struct inode *inode,
+- struct ocfs2_dinode *di,
+- u32 clusters)
+-{
+- le32_add_cpu(&di->i_clusters, clusters);
+- spin_lock(&OCFS2_I(inode)->ip_lock);
+- OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
+- spin_unlock(&OCFS2_I(inode)->ip_lock);
+ }
+
+-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
+- struct ocfs2_extent_rec *insert_rec,
++static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
++ struct ocfs2_path *left_path,
+ struct ocfs2_path *right_path,
+- struct ocfs2_path **ret_left_path)
++ int subtree_index,
++ struct ocfs2_cached_dealloc_ctxt *dealloc)
+ {
+- int ret, i, next_free;
+- struct buffer_head *bh;
++ int ret, i;
++ struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
++ struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
+ struct ocfs2_extent_list *el;
+- struct ocfs2_path *left_path = NULL;
++ struct ocfs2_extent_block *eb;
++ struct buffer_head *bh;
+
+- *ret_left_path = NULL;
++ el = path_leaf_el(left_path);
+
+- /*
+- * This shouldn't happen for non-trees. The extent rec cluster
+- * count manipulation below only works for interior nodes.
+- */
+- BUG_ON(right_path->p_tree_depth == 0);
++ eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
+
+- /*
+- * If our appending insert is at the leftmost edge of a leaf,
+- * then we might need to update the rightmost records of the
+- * neighboring path.
+- */
+- el = path_leaf_el(right_path);
+- next_free = le16_to_cpu(el->l_next_free_rec);
++ for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
++ if (root_el->l_recs[i].e_blkno == eb->h_blkno)
++ break;
++
++ BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
++
++ memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
++ le16_add_cpu(&root_el->l_next_free_rec, -1);
++
++ eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
++ eb->h_next_leaf_blk = 0;
++
++ ocfs2_journal_dirty(handle, root_bh);
++ ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
++
++ for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
++ bh = right_path->p_node[i].bh;
++
++ eb = (struct ocfs2_extent_block *)bh->b_data;
++ /*
++ * Not all nodes might have had their final count
++ * decremented by the caller - handle this here.
++ */
++ el = &eb->h_list;
++ if (le16_to_cpu(el->l_next_free_rec) > 1) {
++ mlog(ML_ERROR,
++ "Inode %llu, attempted to remove extent block "
++ "%llu with %u records\n",
++ (unsigned long long)OCFS2_I(inode)->ip_blkno,
++ (unsigned long long)le64_to_cpu(eb->h_blkno),
++ le16_to_cpu(el->l_next_free_rec));
++
++ ocfs2_journal_dirty(handle, bh);
++ ocfs2_remove_from_cache(inode, bh);
++ continue;
++ }
++
++ el->l_next_free_rec = 0;
++ memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++
++ ocfs2_journal_dirty(handle, bh);
++
++ ret = ocfs2_cache_extent_block_free(dealloc, eb);
++ if (ret)
++ mlog_errno(ret);
++
++ ocfs2_remove_from_cache(inode, bh);
++ }
++}
++
++static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
++ struct ocfs2_path *left_path,
++ struct ocfs2_path *right_path,
++ int subtree_index,
++ struct ocfs2_cached_dealloc_ctxt *dealloc,
++ int *deleted)
++{
++ int ret, i, del_right_subtree = 0;
++ struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++ struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
++ struct ocfs2_extent_block *eb;
++
++ *deleted = 0;
++
++ right_leaf_el = path_leaf_el(right_path);
++ left_leaf_el = path_leaf_el(left_path);
++ root_bh = left_path->p_node[subtree_index].bh;
++ BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
++
++ if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
++ return 0;
++
++ if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0]))
++ return -EAGAIN;
++
++ eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
++ if (eb->h_next_leaf_blk == 0ULL &&
++ le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
++ /*
++ * We have to update i_last_eb_blk during the meta
++ * data delete.
++ */
++ ret = ocfs2_journal_access(handle, inode, di_bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ del_right_subtree = 1;
++ }
++
++ ret = ocfs2_journal_access(handle, inode, root_bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
++ ret = ocfs2_journal_access(handle, inode,
++ right_path->p_node[i].bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_journal_access(handle, inode,
++ left_path->p_node[i].bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++ }
++
++ ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
++ memset(&right_leaf_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++ if (eb->h_next_leaf_blk == 0ULL) {
++ /*
++ * XXX: move recs over to get rid of empty extent,
++ * decrease next_free. how does this play with the
++ * delete code below?
++ */
++ ocfs2_remove_empty_extent(right_leaf_el);
++ }
++
++ ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
++ if (ret)
++ mlog_errno(ret);
++ ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
++ if (ret)
++ mlog_errno(ret);
++
++ if (del_right_subtree) {
++ ocfs2_unlink_subtree(inode, handle, left_path, right_path,
++ subtree_index, dealloc);
++ ocfs2_update_edge_lengths(inode, handle, left_path);
++
++ eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
++ di->i_last_eb_blk = eb->h_blkno;
++ ret = ocfs2_journal_dirty(handle, di_bh);
++ if (ret)
++ mlog_errno(ret);
++
++ *deleted = 1;
++ } else
++ ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
++ subtree_index);
++
++out:
++ return ret;
++}
++
++/*
++ * Given a full path, determine what cpos value would return us a path
++ * containing the leaf immediately to the right of the current one.
++ *
++ * Will return zero if the path passed in is already the rightmost path.
++ *
++ * This looks similar, but is subtly different to
++ * ocfs2_find_cpos_for_left_leaf().
++ */
++static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
++ struct ocfs2_path *path, u32 *cpos)
++{
++ int i, j, ret = 0;
++ u64 blkno;
++ struct ocfs2_extent_list *el;
++
++ *cpos = 0;
++
++ if (path->p_tree_depth == 0)
++ return 0;
++
++ blkno = path_leaf_bh(path)->b_blocknr;
++
++ /* Start at the tree node just above the leaf and work our way up. */
++ i = path->p_tree_depth - 1;
++ while (i >= 0) {
++ int next_free;
++
++ el = path->p_node[i].el;
++
++ /*
++ * Find the extent record just after the one in our
++ * path.
++ */
++ next_free = le16_to_cpu(el->l_next_free_rec);
++ for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
++ if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
++ if (j == (next_free - 1)) {
++ if (i == 0) {
++ /*
++ * We've determined that the
++ * path specified is already
++ * the rightmost one - return a
++ * cpos of zero.
++ */
++ goto out;
++ }
++ /*
++ * The rightmost record points to our
++ * leaf - we need to travel up the
++ * tree one level.
++ */
++ goto next_node;
++ }
++
++ *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
++ goto out;
++ }
++ }
++
++ /*
++ * If we got here, we never found a valid node where
++ * the tree indicated one should be.
++ */
++ ocfs2_error(sb,
++ "Invalid extent tree at extent block %llu\n",
++ (unsigned long long)blkno);
++ ret = -EROFS;
++ goto out;
++
++next_node:
++ blkno = path->p_node[i].bh->b_blocknr;
++ i--;
++ }
++
++out:
++ return ret;
++}
++
++static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
++ handle_t *handle,
++ struct buffer_head *bh,
++ struct ocfs2_extent_list *el,
++ int *rotated_any)
++{
++ int ret;
++
++ if (rotated_any)
++ *rotated_any = 0;
++
++ if (!ocfs2_is_empty_extent(&el->l_recs[0]))
++ return 0;
++
++ if (le16_to_cpu(el->l_next_free_rec) == 1)
++ return -EAGAIN;
++
++ ret = ocfs2_journal_access(handle, inode, bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ocfs2_remove_empty_extent(el);
++
++ ret = ocfs2_journal_dirty(handle, bh);
++ if (ret)
++ mlog_errno(ret);
++
++ if (rotated_any)
++ *rotated_any = 1;
++out:
++ return ret;
++}
++
++/*
++ * Left rotation of btree records.
++ *
++ * In many ways, this is (unsurprisingly) the opposite of right
++ * rotation. We start at some non-rightmost path containing an empty
++ * extent in the leaf block. The code works its way to the rightmost
++ * path by rotating records to the left in every subtree.
++ *
++ * There are a few places where we might want to do this:
++ * - merging extent records
++ * - rightleft contiguousness during insert
++ * - merging two previously unwritten extents
++ * - truncate
++ * - via ocfs2_truncate, if we ever fix it up to use this code
++ * - via ioctl at the request of user (reverse fallocate)
++ * - "compressing" a tree with empty extents
++ * - as a result of a user defrag request
++ * - perhaps as a preventative measure if we notice a tree needs
++ * this during any of the above operations.
++ *
++ * The major difference between those states above are the ability to
++ * lock one of the meta data allocators so that we can remove unused
++ * extent blocks. It might be unrealistic for us to assume that any
++ * merging cases will want to lock the meta data allocator. Luckily,
++ * the merges are an optimization.
++ *
++ * So, merging won't happen if it would result in an empty rightmost
++ * path (this is illegal).
++ *
++ * This function will move extents left until it runs out of leaves to
++ * rotate, or it hits a right leaf that already contains an empty
++ * extent, in which case it will exit early. This means that we might
++ * never rotate anything if the 1st right leaf contains an empty
++ * extent.
++ *
++ * Truncate cases will have to happen as a second step. I'm not
++ * completely sure how we want to handle those yet.
++ */
++static int ocfs2_rotate_tree_left(struct inode *inode,
++ handle_t *handle,
++ struct ocfs2_path *path,
++ struct ocfs2_cached_dealloc_ctxt *dealloc,
++ int *rotated_any)
++{
++ int ret, subtree_root, deleted, orig_credits = handle->h_buffer_credits;
++ u32 right_cpos;
++ struct ocfs2_path *left_path = NULL;
++ struct ocfs2_path *right_path = NULL;
++
++ BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
++
++ if (rotated_any)
++ *rotated_any = 0;
++
++ ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
++ &right_cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (path->p_tree_depth == 0 || right_cpos == 0) {
++ /*
++ * Two cases where rotation of adjacent leaves isn't
++ * necessary:
++ * - in-inode extents (no btree)
++ * - path passed is already rightmost
++ */
++ ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
++ path_leaf_bh(path),
++ path_leaf_el(path),
++ rotated_any);
++ if (ret)
++ mlog_errno(ret);
++ goto out;
++ }
++
++ left_path = ocfs2_new_path(path_root_bh(path),
++ path_root_el(path));
++ if (!left_path) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ocfs2_cp_path(left_path, path);
++
++ right_path = ocfs2_new_path(path_root_bh(path),
++ path_root_el(path));
++ if (!right_path) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ while (right_cpos) {
++ ret = ocfs2_find_path(inode, right_path, right_cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ subtree_root = ocfs2_find_subtree_root(inode, left_path,
++ right_path);
++
++ mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
++ subtree_root,
++ (unsigned long long)
++ right_path->p_node[subtree_root].bh->b_blocknr,
++ right_path->p_tree_depth);
++
++ ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
++ orig_credits, left_path);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
++ right_path, subtree_root,
++ dealloc, &deleted);
++ if (ret) {
++ if (ret != -EAGAIN)
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (rotated_any)
++ *rotated_any = 1;
++
++ /*
++ * The subtree rotate might have removed records on
++ * the rightmost edge. If so, then rotation is
++ * complete.
++ */
++ if (deleted)
++ break;
++
++ ocfs2_mv_path(left_path, right_path);
++
++ ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
++ &right_cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++ }
++
++out:
++ ocfs2_free_path(right_path);
++ ocfs2_free_path(left_path);
++
++ return ret;
++}
++
++static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
++ int index)
++{
++ struct ocfs2_extent_rec *rec = &el->l_recs[index];
++ unsigned int size;
++
++ if (rec->e_leaf_clusters == 0) {
++ /*
++ * We consumed all of the merged-from record. An empty
++ * extent cannot exist anywhere but the 1st array
++ * position, so move things over if the merged-from
++ * record doesn't occupy that position.
++ *
++ * This creates a new empty extent so the caller
++ * should be smart enough to have removed any existing
++ * ones.
++ */
++ if (index > 0) {
++ BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
++ size = index * sizeof(struct ocfs2_extent_rec);
++ memmove(&el->l_recs[1], &el->l_recs[0], size);
++ }
++
++ /*
++ * Always memset - the caller doesn't check whether it
++ * created an empty extent, so there could be junk in
++ * the other fields.
++ */
++ memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++ }
++}
++
++/*
++ * Remove split_rec clusters from the record at index and merge them
++ * onto the beginning of the record at index + 1.
++ */
++static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
++ handle_t *handle,
++ struct ocfs2_extent_rec *split_rec,
++ struct ocfs2_extent_list *el, int index)
++{
++ int ret;
++ unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
++ struct ocfs2_extent_rec *left_rec;
++ struct ocfs2_extent_rec *right_rec;
++
++ BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
++
++ left_rec = &el->l_recs[index];
++ right_rec = &el->l_recs[index + 1];
++
++ ret = ocfs2_journal_access(handle, inode, bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters);
++
++ le32_add_cpu(&right_rec->e_cpos, -split_clusters);
++ le64_add_cpu(&right_rec->e_blkno,
++ -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
++ le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
++
++ ocfs2_cleanup_merge(el, index);
++
++ ret = ocfs2_journal_dirty(handle, bh);
++ if (ret)
++ mlog_errno(ret);
++
++out:
++ return ret;
++}
++
++/*
++ * Remove split_rec clusters from the record at index and merge them
++ * onto the tail of the record at index - 1.
++ */
++static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
++ handle_t *handle,
++ struct ocfs2_extent_rec *split_rec,
++ struct ocfs2_extent_list *el, int index)
++{
++ int ret, has_empty_extent = 0;
++ unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
++ struct ocfs2_extent_rec *left_rec;
++ struct ocfs2_extent_rec *right_rec;
++
++ BUG_ON(index <= 0);
++
++ left_rec = &el->l_recs[index - 1];
++ right_rec = &el->l_recs[index];
++ if (ocfs2_is_empty_extent(&el->l_recs[0]))
++ has_empty_extent = 1;
++
++ ret = ocfs2_journal_access(handle, inode, bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (has_empty_extent && index == 1) {
++ /*
++ * The easy case - we can just plop the record right in.
++ */
++ *left_rec = *split_rec;
++
++ has_empty_extent = 0;
++ } else {
++ le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
++ }
++
++ le32_add_cpu(&right_rec->e_cpos, split_clusters);
++ le64_add_cpu(&right_rec->e_blkno,
++ ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
++ le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
++
++ ocfs2_cleanup_merge(el, index);
++
++ ret = ocfs2_journal_dirty(handle, bh);
++ if (ret)
++ mlog_errno(ret);
++
++out:
++ return ret;
++}
++
++static int ocfs2_try_to_merge_extent(struct inode *inode,
++ handle_t *handle,
++ struct ocfs2_path *left_path,
++ int split_index,
++ struct ocfs2_extent_rec *split_rec,
++ struct ocfs2_cached_dealloc_ctxt *dealloc,
++ struct ocfs2_merge_ctxt *ctxt)
++
++{
++ int ret = 0, rotated, delete_tail_recs = 0;
++ struct ocfs2_extent_list *el = path_leaf_el(left_path);
++ struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
++
++ BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
++
++ if (ctxt->c_split_covers_rec) {
++ delete_tail_recs++;
++
++ if (ctxt->c_contig_type == CONTIG_LEFTRIGHT ||
++ ctxt->c_has_empty_extent)
++ delete_tail_recs++;
++
++ if (ctxt->c_has_empty_extent) {
++ /*
++ * The merge code will need to create an empty
++ * extent to take the place of the newly
++ * emptied slot. Remove any pre-existing empty
++ * extents - having more than one in a leaf is
++ * illegal.
++ */
++ ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++ dealloc, &rotated);
++ if (rotated) {
++ split_index--;
++ rec = &el->l_recs[split_index];
++ }
++ if (ret) {
++ if (ret == -EAGAIN) {
++ ret = 0;
++ goto straight_insert;
++ }
++
++ mlog_errno(ret);
++ goto out;
++ }
++ }
++ }
++
++ if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
++ /*
++ * Left-right contig implies this.
++ */
++ BUG_ON(!ctxt->c_split_covers_rec);
++ BUG_ON(split_index == 0);
++
++ /*
++ * Since the leftright insert always covers the entire
++ * extent, this call will delete the insert record
++ * entirely, resulting in an empty extent record added to
++ * the extent block.
++ *
++ * Since the adding of an empty extent shifts
++ * everything back to the right, there's no need to
++ * update split_index here.
++ */
++ ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
++ handle, split_rec, el, split_index);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ /*
++ * We can only get this from logic error above.
++ */
++ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
++
++ /*
++ * The left merge left us with an empty extent, remove
++ * it.
++ */
++ ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++ dealloc, &rotated);
++ if (rotated) {
++ split_index--;
++ rec = &el->l_recs[split_index];
++ }
++ if (ret) {
++ if (ret == -EAGAIN) {
++ ret = 0;
++ goto straight_insert;
++ }
++
++ mlog_errno(ret);
++ goto out;
++ }
++
++ /*
++ * Note that we don't pass split_rec here on purpose -
++ * we've merged it into the left side.
++ */
++ ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
++ handle, rec, el, split_index);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
++
++ ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++ dealloc, NULL);
++ /*
++ * Error from this last rotate is not critical, so
++ * print but don't bubble it up.
++ */
++ if (ret && ret != -EAGAIN)
++ mlog_errno(ret);
++ ret = 0;
++ } else {
++ /*
++ * Merge a record to the left or right.
++ *
++ * 'contig_type' is relative to the existing record,
++ * so for example, if we're "right contig", it's to
++ * the record on the left (hence the left merge).
++ */
++ if (ctxt->c_contig_type == CONTIG_RIGHT) {
++ ret = ocfs2_merge_rec_left(inode,
++ path_leaf_bh(left_path),
++ handle, split_rec, el,
++ split_index);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++ } else {
++ ret = ocfs2_merge_rec_right(inode,
++ path_leaf_bh(left_path),
++ handle, split_rec, el,
++ split_index);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++ }
++
++ if (ctxt->c_split_covers_rec) {
++ /*
++ * The merge may have left an empty extent in
++ * our leaf. Try to rotate it away.
++ */
++ ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++ dealloc, &rotated);
++ if (ret)
++ mlog_errno(ret);
++ ret = 0;
++ }
++ }
++
++out:
++ return ret;
++
++straight_insert:
++ el->l_recs[split_index] = *split_rec;
++ goto out;
++}
++
++static void ocfs2_subtract_from_rec(struct super_block *sb,
++ enum ocfs2_split_type split,
++ struct ocfs2_extent_rec *rec,
++ struct ocfs2_extent_rec *split_rec)
++{
++ u64 len_blocks;
++
++ len_blocks = ocfs2_clusters_to_blocks(sb,
++ le16_to_cpu(split_rec->e_leaf_clusters));
++
++ if (split == SPLIT_LEFT) {
++ /*
++ * Region is on the left edge of the existing
++ * record.
++ */
++ le32_add_cpu(&rec->e_cpos,
++ le16_to_cpu(split_rec->e_leaf_clusters));
++ le64_add_cpu(&rec->e_blkno, len_blocks);
++ le16_add_cpu(&rec->e_leaf_clusters,
++ -le16_to_cpu(split_rec->e_leaf_clusters));
++ } else {
++ /*
++ * Region is on the right edge of the existing
++ * record.
++ */
++ le16_add_cpu(&rec->e_leaf_clusters,
++ -le16_to_cpu(split_rec->e_leaf_clusters));
++ }
++}
++
++/*
++ * Do the final bits of extent record insertion at the target leaf
++ * list. If this leaf is part of an allocation tree, it is assumed
++ * that the tree above has been prepared.
++ */
++static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
++ struct ocfs2_extent_list *el,
++ struct ocfs2_insert_type *insert,
++ struct inode *inode)
++{
++ int i = insert->ins_contig_index;
++ unsigned int range;
++ struct ocfs2_extent_rec *rec;
++
++ BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
++
++ if (insert->ins_split != SPLIT_NONE) {
++ i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
++ BUG_ON(i == -1);
++ rec = &el->l_recs[i];
++ ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
++ insert_rec);
++ goto rotate;
++ }
++
++ /*
++ * Contiguous insert - either left or right.
++ */
++ if (insert->ins_contig != CONTIG_NONE) {
++ rec = &el->l_recs[i];
++ if (insert->ins_contig == CONTIG_LEFT) {
++ rec->e_blkno = insert_rec->e_blkno;
++ rec->e_cpos = insert_rec->e_cpos;
++ }
++ le16_add_cpu(&rec->e_leaf_clusters,
++ le16_to_cpu(insert_rec->e_leaf_clusters));
++ return;
++ }
++
++ /*
++ * Handle insert into an empty leaf.
++ */
++ if (le16_to_cpu(el->l_next_free_rec) == 0 ||
++ ((le16_to_cpu(el->l_next_free_rec) == 1) &&
++ ocfs2_is_empty_extent(&el->l_recs[0]))) {
++ el->l_recs[0] = *insert_rec;
++ el->l_next_free_rec = cpu_to_le16(1);
++ return;
++ }
++
++ /*
++ * Appending insert.
++ */
++ if (insert->ins_appending == APPEND_TAIL) {
++ i = le16_to_cpu(el->l_next_free_rec) - 1;
++ rec = &el->l_recs[i];
++ range = le32_to_cpu(rec->e_cpos)
++ + le16_to_cpu(rec->e_leaf_clusters);
++ BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
++
++ mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
++ le16_to_cpu(el->l_count),
++ "inode %lu, depth %u, count %u, next free %u, "
++ "rec.cpos %u, rec.clusters %u, "
++ "insert.cpos %u, insert.clusters %u\n",
++ inode->i_ino,
++ le16_to_cpu(el->l_tree_depth),
++ le16_to_cpu(el->l_count),
++ le16_to_cpu(el->l_next_free_rec),
++ le32_to_cpu(el->l_recs[i].e_cpos),
++ le16_to_cpu(el->l_recs[i].e_leaf_clusters),
++ le32_to_cpu(insert_rec->e_cpos),
++ le16_to_cpu(insert_rec->e_leaf_clusters));
++ i++;
++ el->l_recs[i] = *insert_rec;
++ le16_add_cpu(&el->l_next_free_rec, 1);
++ return;
++ }
++
++rotate:
++ /*
++ * Ok, we have to rotate.
++ *
++ * At this point, it is safe to assume that inserting into an
++ * empty leaf and appending to a leaf have both been handled
++ * above.
++ *
++ * This leaf needs to have space, either by the empty 1st
++ * extent record, or by virtue of an l_next_rec < l_count.
++ */
++ ocfs2_rotate_leaf(el, insert_rec);
++}
++
++static inline void ocfs2_update_dinode_clusters(struct inode *inode,
++ struct ocfs2_dinode *di,
++ u32 clusters)
++{
++ le32_add_cpu(&di->i_clusters, clusters);
++ spin_lock(&OCFS2_I(inode)->ip_lock);
++ OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
++ spin_unlock(&OCFS2_I(inode)->ip_lock);
++}
++
++static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
++ struct ocfs2_extent_rec *insert_rec,
++ struct ocfs2_path *right_path,
++ struct ocfs2_path **ret_left_path)
++{
++ int ret, i, next_free;
++ struct buffer_head *bh;
++ struct ocfs2_extent_list *el;
++ struct ocfs2_path *left_path = NULL;
++
++ *ret_left_path = NULL;
++
++ /*
++ * This shouldn't happen for non-trees. The extent rec cluster
++ * count manipulation below only works for interior nodes.
++ */
++ BUG_ON(right_path->p_tree_depth == 0);
++
++ /*
++ * If our appending insert is at the leftmost edge of a leaf,
++ * then we might need to update the rightmost records of the
++ * neighboring path.
++ */
++ el = path_leaf_el(right_path);
++ next_free = le16_to_cpu(el->l_next_free_rec);
+ if (next_free == 0 ||
+ (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
+ u32 left_cpos;
+@@ -1931,6 +2973,83 @@
+ return ret;
+ }
+
++static void ocfs2_split_record(struct inode *inode,
++ struct ocfs2_path *left_path,
++ struct ocfs2_path *right_path,
++ struct ocfs2_extent_rec *split_rec,
++ enum ocfs2_split_type split)
++{
++ int index;
++ u32 cpos = le32_to_cpu(split_rec->e_cpos);
++ struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
++ struct ocfs2_extent_rec *rec, *tmprec;
++
++ right_el = path_leaf_el(right_path);;
++ if (left_path)
++ left_el = path_leaf_el(left_path);
++
++ el = right_el;
++ insert_el = right_el;
++ index = ocfs2_search_extent_list(el, cpos);
++ if (index != -1) {
++ if (index == 0 && left_path) {
++ BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
++
++ /*
++ * This typically means that the record
++ * started in the left path but moved to the
++ * right as a result of rotation. We either
++ * move the existing record to the left, or we
++ * do the later insert there.
++ *
++ * In this case, the left path should always
++ * exist as the rotate code will have passed
++ * it back for a post-insert update.
++ */
++
++ if (split == SPLIT_LEFT) {
++ /*
++ * It's a left split. Since we know
++ * that the rotate code gave us an
++ * empty extent in the left path, we
++ * can just do the insert there.
++ */
++ insert_el = left_el;
++ } else {
++ /*
++ * Right split - we have to move the
++ * existing record over to the left
++ * leaf. The insert will be into the
++ * newly created empty extent in the
++ * right leaf.
++ */
++ tmprec = &right_el->l_recs[index];
++ ocfs2_rotate_leaf(left_el, tmprec);
++ el = left_el;
++
++ memset(tmprec, 0, sizeof(*tmprec));
++ index = ocfs2_search_extent_list(left_el, cpos);
++ BUG_ON(index == -1);
++ }
++ }
++ } else {
++ BUG_ON(!left_path);
++ BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0]));
++ /*
++ * Left path is easy - we can just allow the insert to
++ * happen.
++ */
++ el = left_el;
++ insert_el = left_el;
++ index = ocfs2_search_extent_list(el, cpos);
++ BUG_ON(index == -1);
++ }
++
++ rec = &el->l_recs[index];
++ ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
++ ocfs2_rotate_leaf(insert_el, split_rec);
++}
++
+ /*
+ * This function only does inserts on an allocation b-tree. For dinode
+ * lists, ocfs2_insert_at_leaf() is called directly.
+@@ -1948,7 +3067,6 @@
+ {
+ int ret, subtree_index;
+ struct buffer_head *leaf_bh = path_leaf_bh(right_path);
+- struct ocfs2_extent_list *el;
+
+ /*
+ * Pass both paths to the journal. The majority of inserts
+@@ -1984,9 +3102,18 @@
+ }
+ }
+
+- el = path_leaf_el(right_path);
++ if (insert->ins_split != SPLIT_NONE) {
++ /*
++ * We could call ocfs2_insert_at_leaf() for some types
++ * of splits, but it's easier to just let one seperate
++ * function sort it all out.
++ */
++ ocfs2_split_record(inode, left_path, right_path,
++ insert_rec, insert->ins_split);
++ } else
++ ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
++ insert, inode);
+
+- ocfs2_insert_at_leaf(insert_rec, el, insert, inode);
+ ret = ocfs2_journal_dirty(handle, leaf_bh);
+ if (ret)
+ mlog_errno(ret);
+@@ -2075,7 +3202,7 @@
+ * can wind up skipping both of these two special cases...
+ */
+ if (rotate) {
+- ret = ocfs2_rotate_tree_right(inode, handle,
++ ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
+ le32_to_cpu(insert_rec->e_cpos),
+ right_path, &left_path);
+ if (ret) {
+@@ -2100,6 +3227,7 @@
+ }
+
+ out_update_clusters:
++ if (type->ins_split == SPLIT_NONE)
+ ocfs2_update_dinode_clusters(inode, di,
+ le16_to_cpu(insert_rec->e_leaf_clusters));
+
+@@ -2114,6 +3242,44 @@
+ return ret;
+ }
+
++static enum ocfs2_contig_type
++ocfs2_figure_merge_contig_type(struct inode *inode,
++ struct ocfs2_extent_list *el, int index,
++ struct ocfs2_extent_rec *split_rec)
++{
++ struct ocfs2_extent_rec *rec;
++ enum ocfs2_contig_type ret = CONTIG_NONE;
++
++ /*
++ * We're careful to check for an empty extent record here -
++ * the merge code will know what to do if it sees one.
++ */
++
++ if (index > 0) {
++ rec = &el->l_recs[index - 1];
++ if (index == 1 && ocfs2_is_empty_extent(rec)) {
++ if (split_rec->e_cpos == el->l_recs[index].e_cpos)
++ ret = CONTIG_RIGHT;
++ } else {
++ ret = ocfs2_extent_contig(inode, rec, split_rec);
++ }
++ }
++
++ if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
++ enum ocfs2_contig_type contig_type;
++
++ rec = &el->l_recs[index + 1];
++ contig_type = ocfs2_extent_contig(inode, rec, split_rec);
++
++ if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
++ ret = CONTIG_LEFTRIGHT;
++ else if (ret == CONTIG_NONE)
++ ret = contig_type;
++ }
++
++ return ret;
++}
++
+ static void ocfs2_figure_contig_type(struct inode *inode,
+ struct ocfs2_insert_type *insert,
+ struct ocfs2_extent_list *el,
+@@ -2205,6 +3371,8 @@
+ struct ocfs2_path *path = NULL;
+ struct buffer_head *bh = NULL;
+
++ insert->ins_split = SPLIT_NONE;
++
+ el = &di->id2.i_list;
+ insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
+
+@@ -2305,130 +3473,425 @@
+ ocfs2_figure_appending_type(insert, el, insert_rec);
+ }
+
+-out:
+- ocfs2_free_path(path);
++out:
++ ocfs2_free_path(path);
++
++ if (ret == 0)
++ *last_eb_bh = bh;
++ else
++ brelse(bh);
++ return ret;
++}
++
++/*
++ * Insert an extent into an inode btree.
++ *
++ * The caller needs to update fe->i_clusters
++ */
++int ocfs2_insert_extent(struct ocfs2_super *osb,
++ handle_t *handle,
++ struct inode *inode,
++ struct buffer_head *fe_bh,
++ u32 cpos,
++ u64 start_blk,
++ u32 new_clusters,
++ u8 flags,
++ struct ocfs2_alloc_context *meta_ac)
++{
++ int status;
++ struct buffer_head *last_eb_bh = NULL;
++ struct buffer_head *bh = NULL;
++ struct ocfs2_insert_type insert = {0, };
++ struct ocfs2_extent_rec rec;
++
++ mlog(0, "add %u clusters at position %u to inode %llu\n",
++ new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
++
++ mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
++ (OCFS2_I(inode)->ip_clusters != cpos),
++ "Device %s, asking for sparse allocation: inode %llu, "
++ "cpos %u, clusters %u\n",
++ osb->dev_str,
++ (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
++ OCFS2_I(inode)->ip_clusters);
++
++ memset(&rec, 0, sizeof(rec));
++ rec.e_cpos = cpu_to_le32(cpos);
++ rec.e_blkno = cpu_to_le64(start_blk);
++ rec.e_leaf_clusters = cpu_to_le16(new_clusters);
++ rec.e_flags = flags;
++
++ status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
++ &insert);
++ if (status < 0) {
++ mlog_errno(status);
++ goto bail;
++ }
++
++ mlog(0, "Insert.appending: %u, Insert.Contig: %u, "
++ "Insert.contig_index: %d, Insert.free_records: %d, "
++ "Insert.tree_depth: %d\n",
++ insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
++ insert.ins_free_records, insert.ins_tree_depth);
++
++ if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) {
++ status = ocfs2_grow_tree(inode, handle, fe_bh,
++ &insert.ins_tree_depth, &last_eb_bh,
++ meta_ac);
++ if (status) {
++ mlog_errno(status);
++ goto bail;
++ }
++ }
++
++ /* Finally, we can add clusters. This might rotate the tree for us. */
++ status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
++ if (status < 0)
++ mlog_errno(status);
++ else
++ ocfs2_extent_map_insert_rec(inode, &rec);
++
++bail:
++ if (bh)
++ brelse(bh);
++
++ if (last_eb_bh)
++ brelse(last_eb_bh);
++
++ mlog_exit(status);
++ return status;
++}
++
++static int ocfs2_split_and_insert(struct inode *inode,
++ handle_t *handle,
++ struct ocfs2_path *path,
++ struct buffer_head *di_bh,
++ struct buffer_head **last_eb_bh,
++ int split_index,
++ struct ocfs2_extent_rec *orig_split_rec,
++ struct ocfs2_alloc_context *meta_ac)
++{
++ int ret = 0, depth;
++ unsigned int insert_range, rec_range, do_leftright = 0;
++ struct ocfs2_extent_rec tmprec;
++ struct ocfs2_extent_list *rightmost_el;
++ struct ocfs2_extent_rec rec;
++ struct ocfs2_extent_rec split_rec = *orig_split_rec;
++ struct ocfs2_insert_type insert;
++ struct ocfs2_extent_block *eb;
++ struct ocfs2_dinode *di;
++
++leftright:
++ /*
++ * Store a copy of the record on the stack - it might move
++ * around as the tree is manipulated below.
++ */
++ rec = path_leaf_el(path)->l_recs[split_index];
++
++ di = (struct ocfs2_dinode *)di_bh->b_data;
++ rightmost_el = &di->id2.i_list;
++
++ depth = le16_to_cpu(rightmost_el->l_tree_depth);
++ if (depth) {
++ BUG_ON(!(*last_eb_bh));
++ eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
++ rightmost_el = &eb->h_list;
++ }
++
++ if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
++ le16_to_cpu(rightmost_el->l_count)) {
++ int old_depth = depth;
++
++ ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
++ meta_ac);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (old_depth != depth) {
++ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
++ rightmost_el = &eb->h_list;
++ }
++ }
++
++ memset(&insert, 0, sizeof(struct ocfs2_insert_type));
++ insert.ins_appending = APPEND_NONE;
++ insert.ins_contig = CONTIG_NONE;
++ insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
++ - le16_to_cpu(rightmost_el->l_next_free_rec);
++ insert.ins_tree_depth = depth;
++
++ insert_range = le32_to_cpu(split_rec.e_cpos) +
++ le16_to_cpu(split_rec.e_leaf_clusters);
++ rec_range = le32_to_cpu(rec.e_cpos) +
++ le16_to_cpu(rec.e_leaf_clusters);
++
++ if (split_rec.e_cpos == rec.e_cpos) {
++ insert.ins_split = SPLIT_LEFT;
++ } else if (insert_range == rec_range) {
++ insert.ins_split = SPLIT_RIGHT;
++ } else {
++ /*
++ * Left/right split. We fake this as a right split
++ * first and then make a second pass as a left split.
++ */
++ insert.ins_split = SPLIT_RIGHT;
++
++ memset(&tmprec, 0, sizeof(tmprec));
++
++ tmprec.e_cpos = cpu_to_le32(insert_range);
++ tmprec.e_leaf_clusters = cpu_to_le16(rec_range - insert_range);
++ tmprec.e_flags = rec.e_flags;
++ tmprec.e_blkno = split_rec.e_blkno;
++ le64_add_cpu(&tmprec.e_blkno,
++ ocfs2_clusters_to_blocks(inode->i_sb,
++ le16_to_cpu(split_rec.e_leaf_clusters)));
++ split_rec = tmprec;
++
++ BUG_ON(do_leftright);
++ do_leftright = 1;
++ }
++
++ ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
++ &insert);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (do_leftright == 1) {
++ u32 cpos;
++ struct ocfs2_extent_list *el;
++
++ do_leftright++;
++ split_rec = *orig_split_rec;
++
++ ocfs2_reinit_path(path, 1);
++
++ cpos = le32_to_cpu(split_rec.e_cpos);
++ ret = ocfs2_find_path(inode, path, cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ el = path_leaf_el(path);
++ split_index = ocfs2_search_extent_list(el, cpos);
++ goto leftright;
++ }
++out:
++
++ return ret;
++}
++
++/*
++ * Mark part or all of the extent record at split_index in the leaf
++ * pointed to by path as written. This removes the unwritten
++ * extent flag.
++ *
++ * Care is taken to handle contiguousness so as to not grow the tree.
++ *
++ * meta_ac is not strictly necessary - we only truly need it if growth
++ * of the tree is required. All other cases will degrade into a less
++ * optimal tree layout.
++ *
++ * last_eb_bh should be the rightmost leaf block for any inode with a
++ * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
++ *
++ * This code is optimized for readability - several passes might be
++ * made over certain portions of the tree. All of those blocks will
++ * have been brought into cache (and pinned via the journal), so the
++ * extra overhead is not expressed in terms of disk reads.
++ */
++static int __ocfs2_mark_extent_written(struct inode *inode,
++ struct buffer_head *di_bh,
++ handle_t *handle,
++ struct ocfs2_path *path,
++ int split_index,
++ struct ocfs2_extent_rec *split_rec,
++ struct ocfs2_alloc_context *meta_ac,
++ struct ocfs2_cached_dealloc_ctxt *dealloc)
++{
++ int ret = 0;
++ struct ocfs2_extent_list *el = path_leaf_el(path);
++ struct buffer_head *eb_bh, *last_eb_bh = NULL;
++ struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
++ struct ocfs2_merge_ctxt ctxt;
++ struct ocfs2_extent_list *rightmost_el;
++
++ if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) {
++ ret = -EIO;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
++ ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
++ (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
++ ret = -EIO;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ eb_bh = path_leaf_bh(path);
++ ret = ocfs2_journal_access(handle, inode, eb_bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
++ split_index,
++ split_rec);
++
++ /*
++ * The core merge / split code wants to know how much room is
++ * left in this inodes allocation tree, so we pass the
++ * rightmost extent list.
++ */
++ if (path->p_tree_depth) {
++ struct ocfs2_extent_block *eb;
++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++
++ ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
++ le64_to_cpu(di->i_last_eb_blk),
++ &last_eb_bh, OCFS2_BH_CACHED, inode);
++ if (ret) {
++ mlog_exit(ret);
++ goto out;
++ }
++
++ eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
++ if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
++ OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
++ ret = -EROFS;
++ goto out;
++ }
+
+- if (ret == 0)
+- *last_eb_bh = bh;
++ rightmost_el = &eb->h_list;
++ } else
++ rightmost_el = path_root_el(path);
++
++ ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
++ if (ctxt.c_used_tail_recs > 0 &&
++ ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
++ ctxt.c_used_tail_recs--;
++
++ if (rec->e_cpos == split_rec->e_cpos &&
++ rec->e_leaf_clusters == split_rec->e_leaf_clusters)
++ ctxt.c_split_covers_rec = 1;
+ else
+- brelse(bh);
++ ctxt.c_split_covers_rec = 0;
++
++ ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
++
++ mlog(0, "index: %d, contig: %u, used_tail_recs: %u, "
++ "has_empty: %u, split_covers: %u\n", split_index,
++ ctxt.c_contig_type, ctxt.c_used_tail_recs,
++ ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
++
++ if (ctxt.c_contig_type == CONTIG_NONE) {
++ if (ctxt.c_split_covers_rec)
++ el->l_recs[split_index] = *split_rec;
++ else
++ ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
++ &last_eb_bh, split_index,
++ split_rec, meta_ac);
++ if (ret)
++ mlog_errno(ret);
++ } else {
++ ret = ocfs2_try_to_merge_extent(inode, handle, path,
++ split_index, split_rec,
++ dealloc, &ctxt);
++ if (ret)
++ mlog_errno(ret);
++ }
++
++ ocfs2_journal_dirty(handle, eb_bh);
++
++out:
++ brelse(last_eb_bh);
+ return ret;
+ }
+
+ /*
+- * Insert an extent into an inode btree.
++ * Mark the already-existing extent at cpos as written for len clusters.
+ *
+- * The caller needs to update fe->i_clusters
++ * If the existing extent is larger than the request, initiate a
++ * split. An attempt will be made at merging with adjacent extents.
++ *
++ * The caller is responsible for passing down meta_ac if we'll need it.
+ */
+-int ocfs2_insert_extent(struct ocfs2_super *osb,
+- handle_t *handle,
+- struct inode *inode,
+- struct buffer_head *fe_bh,
+- u32 cpos,
+- u64 start_blk,
+- u32 new_clusters,
+- struct ocfs2_alloc_context *meta_ac)
++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
++ handle_t *handle, u32 cpos, u32 len, u32 phys,
++ struct ocfs2_alloc_context *meta_ac,
++ struct ocfs2_cached_dealloc_ctxt *dealloc)
+ {
+- int status, shift;
+- struct buffer_head *last_eb_bh = NULL;
+- struct buffer_head *bh = NULL;
+- struct ocfs2_insert_type insert = {0, };
+- struct ocfs2_extent_rec rec;
+-
+- mlog(0, "add %u clusters at position %u to inode %llu\n",
+- new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+-
+- mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
+- (OCFS2_I(inode)->ip_clusters != cpos),
+- "Device %s, asking for sparse allocation: inode %llu, "
+- "cpos %u, clusters %u\n",
+- osb->dev_str,
+- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
+- OCFS2_I(inode)->ip_clusters);
++ int ret, index;
++ u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
++ struct ocfs2_extent_rec split_rec;
++ struct ocfs2_path *left_path = NULL;
++ struct ocfs2_extent_list *el;
+
+- memset(&rec, 0, sizeof(rec));
+- rec.e_cpos = cpu_to_le32(cpos);
+- rec.e_blkno = cpu_to_le64(start_blk);
+- rec.e_leaf_clusters = cpu_to_le16(new_clusters);
++ mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
++ inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
+
+- status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
+- &insert);
+- if (status < 0) {
+- mlog_errno(status);
+- goto bail;
++ if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
++ ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
++ "that are being written to, but the feature bit "
++ "is not set in the super block.",
++ (unsigned long long)OCFS2_I(inode)->ip_blkno);
++ ret = -EROFS;
++ goto out;
+ }
+
+- mlog(0, "Insert.appending: %u, Insert.Contig: %u, "
+- "Insert.contig_index: %d, Insert.free_records: %d, "
+- "Insert.tree_depth: %d\n",
+- insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
+- insert.ins_free_records, insert.ins_tree_depth);
+-
+ /*
+- * Avoid growing the tree unless we're out of records and the
+- * insert type requres one.
++ * XXX: This should be fixed up so that we just re-insert the
++ * next extent records.
+ */
+- if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records)
+- goto out_add;
++ ocfs2_extent_map_trunc(inode, 0);
+
+- shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
+- if (shift < 0) {
+- status = shift;
+- mlog_errno(status);
+- goto bail;
++ left_path = ocfs2_new_inode_path(di_bh);
++ if (!left_path) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
+ }
+
+- /* We traveled all the way to the bottom of the allocation tree
+- * and didn't find room for any more extents - we need to add
+- * another tree level */
+- if (shift) {
+- BUG_ON(bh);
+- mlog(0, "need to shift tree depth "
+- "(current = %d)\n", insert.ins_tree_depth);
+-
+- /* ocfs2_shift_tree_depth will return us a buffer with
+- * the new extent block (so we can pass that to
+- * ocfs2_add_branch). */
+- status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
+- meta_ac, &bh);
+- if (status < 0) {
+- mlog_errno(status);
+- goto bail;
+- }
+- insert.ins_tree_depth++;
+- /* Special case: we have room now if we shifted from
+- * tree_depth 0 */
+- if (insert.ins_tree_depth == 1)
+- goto out_add;
++ ret = ocfs2_find_path(inode, left_path, cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
+ }
++ el = path_leaf_el(left_path);
+
+- /* call ocfs2_add_branch to add the final part of the tree with
+- * the new data. */
+- mlog(0, "add branch. bh = %p\n", bh);
+- status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
+- meta_ac);
+- if (status < 0) {
+- mlog_errno(status);
+- goto bail;
++ index = ocfs2_search_extent_list(el, cpos);
++ if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
++ ocfs2_error(inode->i_sb,
++ "Inode %llu has an extent at cpos %u which can no "
++ "longer be found.\n",
++ (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
++ ret = -EROFS;
++ goto out;
+ }
+
+-out_add:
+- /* Finally, we can add clusters. This might rotate the tree for us. */
+- status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
+- if (status < 0)
+- mlog_errno(status);
+- else
+- ocfs2_extent_map_insert_rec(inode, &rec);
+-
+-bail:
+- if (bh)
+- brelse(bh);
++ memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
++ split_rec.e_cpos = cpu_to_le32(cpos);
++ split_rec.e_leaf_clusters = cpu_to_le16(len);
++ split_rec.e_blkno = cpu_to_le64(start_blkno);
++ split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
++ split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
+
+- if (last_eb_bh)
+- brelse(last_eb_bh);
++ ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
++ index, &split_rec, meta_ac, dealloc);
++ if (ret)
++ mlog_errno(ret);
+
+- mlog_exit(status);
+- return status;
++out:
++ ocfs2_free_path(left_path);
++ return ret;
+ }
+
+ static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
+@@ -2957,6 +4420,219 @@
+ return status;
+ }
+
++/*
++ * Delayed de-allocation of suballocator blocks.
++ *
++ * Some sets of block de-allocations might involve multiple suballocator inodes.
++ *
++ * The locking for this can get extremely complicated, especially when
++ * the suballocator inodes to delete from aren't known until deep
++ * within an unrelated codepath.
++ *
++ * ocfs2_extent_block structures are a good example of this - an inode
++ * btree could have been grown by any number of nodes each allocating
++ * out of their own suballoc inode.
++ *
++ * These structures allow the delay of block de-allocation until a
++ * later time, when locking of multiple cluster inodes won't cause
++ * deadlock.
++ */
++
++/*
++ * Describes a single block free from a suballocator
++ */
++struct ocfs2_cached_block_free {
++ struct ocfs2_cached_block_free *free_next;
++ u64 free_blk;
++ unsigned int free_bit;
++};
++
++struct ocfs2_per_slot_free_list {
++ struct ocfs2_per_slot_free_list *f_next_suballocator;
++ int f_inode_type;
++ int f_slot;
++ struct ocfs2_cached_block_free *f_first;
++};
++
++static int ocfs2_free_cached_items(struct ocfs2_super *osb,
++ int sysfile_type,
++ int slot,
++ struct ocfs2_cached_block_free *head)
++{
++ int ret;
++ u64 bg_blkno;
++ handle_t *handle;
++ struct inode *inode;
++ struct buffer_head *di_bh = NULL;
++ struct ocfs2_cached_block_free *tmp;
++
++ inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
++ if (!inode) {
++ ret = -EINVAL;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ mutex_lock(&inode->i_mutex);
++
++ ret = ocfs2_meta_lock(inode, &di_bh, 1);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_mutex;
++ }
++
++ handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
++ if (IS_ERR(handle)) {
++ ret = PTR_ERR(handle);
++ mlog_errno(ret);
++ goto out_unlock;
++ }
++
++ while (head) {
++ bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
++ head->free_bit);
++ mlog(0, "Free bit: (bit %u, blkno %llu)\n",
++ head->free_bit, (unsigned long long)head->free_blk);
++
++ ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
++ head->free_bit, bg_blkno, 1);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_journal;
++ }
++
++ ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_journal;
++ }
++
++ tmp = head;
++ head = head->free_next;
++ kfree(tmp);
++ }
++
++out_journal:
++ ocfs2_commit_trans(osb, handle);
++
++out_unlock:
++ ocfs2_meta_unlock(inode, 1);
++ brelse(di_bh);
++out_mutex:
++ mutex_unlock(&inode->i_mutex);
++ iput(inode);
++out:
++ while(head) {
++ /* Premature exit may have left some dangling items. */
++ tmp = head;
++ head = head->free_next;
++ kfree(tmp);
++ }
++
++ return ret;
++}
++
++int ocfs2_run_deallocs(struct ocfs2_super *osb,
++ struct ocfs2_cached_dealloc_ctxt *ctxt)
++{
++ int ret = 0, ret2;
++ struct ocfs2_per_slot_free_list *fl;
++
++ if (!ctxt)
++ return 0;
++
++ while (ctxt->c_first_suballocator) {
++ fl = ctxt->c_first_suballocator;
++
++ if (fl->f_first) {
++ mlog(0, "Free items: (type %u, slot %d)\n",
++ fl->f_inode_type, fl->f_slot);
++ ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
++ fl->f_slot, fl->f_first);
++ if (ret2)
++ mlog_errno(ret2);
++ if (!ret)
++ ret = ret2;
++ }
++
++ ctxt->c_first_suballocator = fl->f_next_suballocator;
++ kfree(fl);
++ }
++
++ return ret;
++}
++
++static struct ocfs2_per_slot_free_list *
++ocfs2_find_per_slot_free_list(int type,
++ int slot,
++ struct ocfs2_cached_dealloc_ctxt *ctxt)
++{
++ struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
++
++ while (fl) {
++ if (fl->f_inode_type == type && fl->f_slot == slot)
++ return fl;
++
++ fl = fl->f_next_suballocator;
++ }
++
++ fl = kmalloc(sizeof(*fl), GFP_NOFS);
++ if (fl) {
++ fl->f_inode_type = type;
++ fl->f_slot = slot;
++ fl->f_first = NULL;
++ fl->f_next_suballocator = ctxt->c_first_suballocator;
++
++ ctxt->c_first_suballocator = fl;
++ }
++ return fl;
++}
++
++static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
++ int type, int slot, u64 blkno,
++ unsigned int bit)
++{
++ int ret;
++ struct ocfs2_per_slot_free_list *fl;
++ struct ocfs2_cached_block_free *item;
++
++ fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
++ if (fl == NULL) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ item = kmalloc(sizeof(*item), GFP_NOFS);
++ if (item == NULL) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
++ type, slot, bit, (unsigned long long)blkno);
++
++ item->free_blk = blkno;
++ item->free_bit = bit;
++ item->free_next = fl->f_first;
++
++ fl->f_first = item;
++
++ ret = 0;
++out:
++ return ret;
++}
++
++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
++ struct ocfs2_extent_block *eb)
++{
++ return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
++ le16_to_cpu(eb->h_suballoc_slot),
++ le64_to_cpu(eb->h_blkno),
++ le16_to_cpu(eb->h_suballoc_bit));
++}
++
+ /* This function will figure out whether the currently last extent
+ * block will be deleted, and if it will, what the new last extent
+ * block will be so we can update his h_next_leaf_blk field, as well
+@@ -3238,27 +4914,10 @@
+ BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
+ BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
+
+- if (le16_to_cpu(eb->h_suballoc_slot) == 0) {
+- /*
+- * This code only understands how to
+- * lock the suballocator in slot 0,
+- * which is fine because allocation is
+- * only ever done out of that
+- * suballocator too. A future version
+- * might change that however, so avoid
+- * a free if we don't know how to
+- * handle it. This way an fs incompat
+- * bit will not be necessary.
+- */
+- ret = ocfs2_free_extent_block(handle,
+- tc->tc_ext_alloc_inode,
+- tc->tc_ext_alloc_bh,
+- eb);
+-
++ ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
+ /* An error here is not fatal. */
+ if (ret < 0)
+ mlog_errno(ret);
+- }
+ } else {
+ deleted_eb = 0;
+ }
+@@ -3631,8 +5290,6 @@
+
+ mlog_entry_void();
+
+- down_write(&OCFS2_I(inode)->ip_alloc_sem);
+-
+ new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
+ i_size_read(inode));
+
+@@ -3754,7 +5411,6 @@
+ goto start;
+
+ bail:
+- up_write(&OCFS2_I(inode)->ip_alloc_sem);
+
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+
+@@ -3764,6 +5420,8 @@
+ if (handle)
+ ocfs2_commit_trans(osb, handle);
+
++ ocfs2_run_deallocs(osb, &tc->tc_dealloc);
++
+ ocfs2_free_path(path);
+
+ /* This will drop the ext_alloc cluster lock for us */
+@@ -3774,23 +5432,18 @@
+ }
+
+ /*
+- * Expects the inode to already be locked. This will figure out which
+- * inodes need to be locked and will put them on the returned truncate
+- * context.
++ * Expects the inode to already be locked.
+ */
+ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
+ struct inode *inode,
+ struct buffer_head *fe_bh,
+ struct ocfs2_truncate_context **tc)
+ {
+- int status, metadata_delete, i;
++ int status;
+ unsigned int new_i_clusters;
+ struct ocfs2_dinode *fe;
+ struct ocfs2_extent_block *eb;
+- struct ocfs2_extent_list *el;
+ struct buffer_head *last_eb_bh = NULL;
+- struct inode *ext_alloc_inode = NULL;
+- struct buffer_head *ext_alloc_bh = NULL;
+
+ mlog_entry_void();
+
+@@ -3810,12 +5463,9 @@
+ mlog_errno(status);
+ goto bail;
+ }
++ ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
+
+- metadata_delete = 0;
+ if (fe->id2.i_list.l_tree_depth) {
+- /* If we have a tree, then the truncate may result in
+- * metadata deletes. Figure this out from the
+- * rightmost leaf block.*/
+ status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
+ &last_eb_bh, OCFS2_BH_CACHED, inode);
+ if (status < 0) {
+@@ -3830,43 +5480,10 @@
+ status = -EIO;
+ goto bail;
+ }
+- el = &(eb->h_list);
+-
+- i = 0;
+- if (ocfs2_is_empty_extent(&el->l_recs[0]))
+- i = 1;
+- /*
+- * XXX: Should we check that next_free_rec contains
+- * the extent?
+- */
+- if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters)
+- metadata_delete = 1;
+ }
+
+ (*tc)->tc_last_eb_bh = last_eb_bh;
+
+- if (metadata_delete) {
+- mlog(0, "Will have to delete metadata for this trunc. "
+- "locking allocator.\n");
+- ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
+- if (!ext_alloc_inode) {
+- status = -ENOMEM;
+- mlog_errno(status);
+- goto bail;
+- }
+-
+- mutex_lock(&ext_alloc_inode->i_mutex);
+- (*tc)->tc_ext_alloc_inode = ext_alloc_inode;
+-
+- status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1);
+- if (status < 0) {
+- mlog_errno(status);
+- goto bail;
+- }
+- (*tc)->tc_ext_alloc_bh = ext_alloc_bh;
+- (*tc)->tc_ext_alloc_locked = 1;
+- }
+-
+ status = 0;
+ bail:
+ if (status < 0) {
+@@ -3880,16 +5497,13 @@
+
+ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
+ {
+- if (tc->tc_ext_alloc_inode) {
+- if (tc->tc_ext_alloc_locked)
+- ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
+-
+- mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
+- iput(tc->tc_ext_alloc_inode);
+- }
+-
+- if (tc->tc_ext_alloc_bh)
+- brelse(tc->tc_ext_alloc_bh);
++ /*
++ * The caller is responsible for completing deallocation
++ * before freeing the context.
++ */
++ if (tc->tc_dealloc.c_first_suballocator != NULL)
++ mlog(ML_NOTICE,
++ "Truncate completion has non-empty dealloc context\n");
+
+ if (tc->tc_last_eb_bh)
+ brelse(tc->tc_last_eb_bh);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.h linux-2.6.22-591/fs/ocfs2/alloc.h
+--- linux-2.6.22-570/fs/ocfs2/alloc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/alloc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -34,7 +34,13 @@
+ u32 cpos,
+ u64 start_blk,
+ u32 new_clusters,
++ u8 flags,
+ struct ocfs2_alloc_context *meta_ac);
++struct ocfs2_cached_dealloc_ctxt;
++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
++ handle_t *handle, u32 cpos, u32 len, u32 phys,
++ struct ocfs2_alloc_context *meta_ac,
++ struct ocfs2_cached_dealloc_ctxt *dealloc);
+ int ocfs2_num_free_extents(struct ocfs2_super *osb,
+ struct inode *inode,
+ struct ocfs2_dinode *fe);
+@@ -63,9 +69,27 @@
+ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
+ struct ocfs2_dinode *tl_copy);
+
++/*
++ * Process local structure which describes the block unlinks done
++ * during an operation. This is populated via
++ * ocfs2_cache_block_dealloc().
++ *
++ * ocfs2_run_deallocs() should be called after the potentially
++ * de-allocating routines. No journal handles should be open, and most
++ * locks should have been dropped.
++ */
++struct ocfs2_cached_dealloc_ctxt {
++ struct ocfs2_per_slot_free_list *c_first_suballocator;
++};
++static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
++{
++ c->c_first_suballocator = NULL;
++}
++int ocfs2_run_deallocs(struct ocfs2_super *osb,
++ struct ocfs2_cached_dealloc_ctxt *ctxt);
++
+ struct ocfs2_truncate_context {
+- struct inode *tc_ext_alloc_inode;
+- struct buffer_head *tc_ext_alloc_bh;
++ struct ocfs2_cached_dealloc_ctxt tc_dealloc;
+ int tc_ext_alloc_locked; /* is it cluster locked? */
+ /* these get destroyed once it's passed to ocfs2_commit_truncate. */
+ struct buffer_head *tc_last_eb_bh;
+@@ -84,6 +108,7 @@
+
+ int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
+ u32 cpos, struct buffer_head **leaf_bh);
++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
+
+ /*
+ * Helper function to look at the # of clusters in an extent record.
+diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.c linux-2.6.22-591/fs/ocfs2/aops.c
+--- linux-2.6.22-570/fs/ocfs2/aops.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/aops.c 2007-12-21 15:36:12.000000000 -0500
+@@ -232,7 +232,7 @@
+ * might now be discovering a truncate that hit on another node.
+ * block_read_full_page->get_block freaks out if it is asked to read
+ * beyond the end of a file, so we check here. Callers
+- * (generic_file_read, fault->nopage) are clever enough to check i_size
++ * (generic_file_read, vm_ops->fault) are clever enough to check i_size
+ * and notice that the page they just read isn't needed.
+ *
+ * XXX sys_readahead() seems to get that wrong?
+@@ -705,6 +705,8 @@
+ bh = bh->b_this_page, block_start += bsize) {
+ block_end = block_start + bsize;
+
++ clear_buffer_new(bh);
++
+ /*
+ * Ignore blocks outside of our i/o range -
+ * they may belong to unallocated clusters.
+@@ -719,9 +721,8 @@
+ * For an allocating write with cluster size >= page
+ * size, we always write the entire page.
+ */
+-
+- if (buffer_new(bh))
+- clear_buffer_new(bh);
++ if (new)
++ set_buffer_new(bh);
+
+ if (!buffer_mapped(bh)) {
+ map_bh(bh, inode->i_sb, *p_blkno);
+@@ -760,18 +761,13 @@
+ bh = head;
+ block_start = 0;
+ do {
+- void *kaddr;
+-
+ block_end = block_start + bsize;
+ if (block_end <= from)
+ goto next_bh;
+ if (block_start >= to)
+ break;
+
+- kaddr = kmap_atomic(page, KM_USER0);
+- memset(kaddr+block_start, 0, bh->b_size);
+- flush_dcache_page(page);
+- kunmap_atomic(kaddr, KM_USER0);
++ zero_user_page(page, block_start, bh->b_size, KM_USER0);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+
+@@ -783,217 +779,240 @@
+ return ret;
+ }
+
++#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
++#define OCFS2_MAX_CTXT_PAGES 1
++#else
++#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
++#endif
++
++#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
++
+ /*
+- * This will copy user data from the buffer page in the splice
+- * context.
+- *
+- * For now, we ignore SPLICE_F_MOVE as that would require some extra
+- * communication out all the way to ocfs2_write().
++ * Describe the state of a single cluster to be written to.
+ */
+-int ocfs2_map_and_write_splice_data(struct inode *inode,
+- struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+- unsigned int *ret_from, unsigned int *ret_to)
++struct ocfs2_write_cluster_desc {
++ u32 c_cpos;
++ u32 c_phys;
++ /*
++ * Give this a unique field because c_phys eventually gets
++ * filled.
++ */
++ unsigned c_new;
++ unsigned c_unwritten;
++};
++
++static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
+ {
+- int ret;
+- unsigned int to, from, cluster_start, cluster_end;
+- char *src, *dst;
+- struct ocfs2_splice_write_priv *sp = wc->w_private;
+- struct pipe_buffer *buf = sp->s_buf;
+- unsigned long bytes, src_from;
+- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++ return d->c_new || d->c_unwritten;
++}
+
+- ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+- &cluster_end);
++struct ocfs2_write_ctxt {
++ /* Logical cluster position / len of write */
++ u32 w_cpos;
++ u32 w_clen;
+
+- from = sp->s_offset;
+- src_from = sp->s_buf_offset;
+- bytes = wc->w_count;
++ struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
+
+- if (wc->w_large_pages) {
+ /*
+- * For cluster size < page size, we have to
+- * calculate pos within the cluster and obey
+- * the rightmost boundary.
+- */
+- bytes = min(bytes, (unsigned long)(osb->s_clustersize
+- - (wc->w_pos & (osb->s_clustersize - 1))));
+- }
+- to = from + bytes;
+-
+- BUG_ON(from > PAGE_CACHE_SIZE);
+- BUG_ON(to > PAGE_CACHE_SIZE);
+- BUG_ON(from < cluster_start);
+- BUG_ON(to > cluster_end);
+-
+- if (wc->w_this_page_new)
+- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+- cluster_start, cluster_end, 1);
+- else
+- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+- from, to, 0);
+- if (ret) {
+- mlog_errno(ret);
+- goto out;
++ * This is true if page_size > cluster_size.
++ *
++ * It triggers a set of special cases during write which might
++ * have to deal with allocating writes to partial pages.
++ */
++ unsigned int w_large_pages;
++
++ /*
++ * Pages involved in this write.
++ *
++ * w_target_page is the page being written to by the user.
++ *
++ * w_pages is an array of pages which always contains
++ * w_target_page, and in the case of an allocating write with
++ * page_size < cluster size, it will contain zero'd and mapped
++ * pages adjacent to w_target_page which need to be written
++ * out in so that future reads from that region will get
++ * zero's.
++ */
++ struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
++ unsigned int w_num_pages;
++ struct page *w_target_page;
++
++ /*
++ * ocfs2_write_end() uses this to know what the real range to
++ * write in the target should be.
++ */
++ unsigned int w_target_from;
++ unsigned int w_target_to;
++
++ /*
++ * We could use journal_current_handle() but this is cleaner,
++ * IMHO -Mark
++ */
++ handle_t *w_handle;
++
++ struct buffer_head *w_di_bh;
++
++ struct ocfs2_cached_dealloc_ctxt w_dealloc;
++};
++
++static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
++{
++ int i;
++
++ for(i = 0; i < wc->w_num_pages; i++) {
++ if (wc->w_pages[i] == NULL)
++ continue;
++
++ unlock_page(wc->w_pages[i]);
++ mark_page_accessed(wc->w_pages[i]);
++ page_cache_release(wc->w_pages[i]);
+ }
+
+- src = buf->ops->map(sp->s_pipe, buf, 1);
+- dst = kmap_atomic(wc->w_this_page, KM_USER1);
+- memcpy(dst + from, src + src_from, bytes);
+- kunmap_atomic(wc->w_this_page, KM_USER1);
+- buf->ops->unmap(sp->s_pipe, buf, src);
++ brelse(wc->w_di_bh);
++ kfree(wc);
++}
++
++static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
++ struct ocfs2_super *osb, loff_t pos,
++ unsigned len, struct buffer_head *di_bh)
++{
++ struct ocfs2_write_ctxt *wc;
++
++ wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
++ if (!wc)
++ return -ENOMEM;
++
++ wc->w_cpos = pos >> osb->s_clustersize_bits;
++ wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
++ get_bh(di_bh);
++ wc->w_di_bh = di_bh;
+
+- wc->w_finished_copy = 1;
++ if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
++ wc->w_large_pages = 1;
++ else
++ wc->w_large_pages = 0;
+
+- *ret_from = from;
+- *ret_to = to;
+-out:
++ ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
+
+- return bytes ? (unsigned int)bytes : ret;
++ *wcp = wc;
++
++ return 0;
+ }
+
+ /*
+- * This will copy user data from the iovec in the buffered write
+- * context.
++ * If a page has any new buffers, zero them out here, and mark them uptodate
++ * and dirty so they'll be written out (in order to prevent uninitialised
++ * block data from leaking). And clear the new bit.
+ */
+-int ocfs2_map_and_write_user_data(struct inode *inode,
+- struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+- unsigned int *ret_from, unsigned int *ret_to)
++static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+ {
+- int ret;
+- unsigned int to, from, cluster_start, cluster_end;
+- unsigned long bytes, src_from;
+- char *dst;
+- struct ocfs2_buffered_write_priv *bp = wc->w_private;
+- const struct iovec *cur_iov = bp->b_cur_iov;
+- char __user *buf;
+- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+-
+- ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+- &cluster_end);
++ unsigned int block_start, block_end;
++ struct buffer_head *head, *bh;
+
+- buf = cur_iov->iov_base + bp->b_cur_off;
+- src_from = (unsigned long)buf & ~PAGE_CACHE_MASK;
++ BUG_ON(!PageLocked(page));
++ if (!page_has_buffers(page))
++ return;
+
+- from = wc->w_pos & (PAGE_CACHE_SIZE - 1);
++ bh = head = page_buffers(page);
++ block_start = 0;
++ do {
++ block_end = block_start + bh->b_size;
+
+- /*
+- * This is a lot of comparisons, but it reads quite
+- * easily, which is important here.
+- */
+- /* Stay within the src page */
+- bytes = PAGE_SIZE - src_from;
+- /* Stay within the vector */
+- bytes = min(bytes,
+- (unsigned long)(cur_iov->iov_len - bp->b_cur_off));
+- /* Stay within count */
+- bytes = min(bytes, (unsigned long)wc->w_count);
+- /*
+- * For clustersize > page size, just stay within
+- * target page, otherwise we have to calculate pos
+- * within the cluster and obey the rightmost
+- * boundary.
+- */
+- if (wc->w_large_pages) {
+- /*
+- * For cluster size < page size, we have to
+- * calculate pos within the cluster and obey
+- * the rightmost boundary.
+- */
+- bytes = min(bytes, (unsigned long)(osb->s_clustersize
+- - (wc->w_pos & (osb->s_clustersize - 1))));
+- } else {
+- /*
+- * cluster size > page size is the most common
+- * case - we just stay within the target page
+- * boundary.
+- */
+- bytes = min(bytes, PAGE_CACHE_SIZE - from);
+- }
++ if (buffer_new(bh)) {
++ if (block_end > from && block_start < to) {
++ if (!PageUptodate(page)) {
++ unsigned start, end;
+
+- to = from + bytes;
++ start = max(from, block_start);
++ end = min(to, block_end);
+
+- BUG_ON(from > PAGE_CACHE_SIZE);
+- BUG_ON(to > PAGE_CACHE_SIZE);
+- BUG_ON(from < cluster_start);
+- BUG_ON(to > cluster_end);
++ zero_user_page(page, start, end - start, KM_USER0);
++ set_buffer_uptodate(bh);
++ }
+
+- if (wc->w_this_page_new)
+- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+- cluster_start, cluster_end, 1);
+- else
+- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+- from, to, 0);
+- if (ret) {
+- mlog_errno(ret);
+- goto out;
++ clear_buffer_new(bh);
++ mark_buffer_dirty(bh);
++ }
+ }
+
+- dst = kmap(wc->w_this_page);
+- memcpy(dst + from, bp->b_src_buf + src_from, bytes);
+- kunmap(wc->w_this_page);
++ block_start = block_end;
++ bh = bh->b_this_page;
++ } while (bh != head);
++}
+
+- /*
+- * XXX: This is slow, but simple. The caller of
+- * ocfs2_buffered_write_cluster() is responsible for
+- * passing through the iovecs, so it's difficult to
+- * predict what our next step is in here after our
+- * initial write. A future version should be pushing
+- * that iovec manipulation further down.
+- *
+- * By setting this, we indicate that a copy from user
+- * data was done, and subsequent calls for this
+- * cluster will skip copying more data.
++/*
++ * Only called when we have a failure during allocating write to write
++ * zero's to the newly allocated region.
+ */
+- wc->w_finished_copy = 1;
++static void ocfs2_write_failure(struct inode *inode,
++ struct ocfs2_write_ctxt *wc,
++ loff_t user_pos, unsigned user_len)
++{
++ int i;
++ unsigned from, to;
++ struct page *tmppage;
+
+- *ret_from = from;
+- *ret_to = to;
+-out:
++ ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
++
++ if (wc->w_large_pages) {
++ from = wc->w_target_from;
++ to = wc->w_target_to;
++ } else {
++ from = 0;
++ to = PAGE_CACHE_SIZE;
++ }
+
+- return bytes ? (unsigned int)bytes : ret;
++ for(i = 0; i < wc->w_num_pages; i++) {
++ tmppage = wc->w_pages[i];
++
++ if (ocfs2_should_order_data(inode))
++ walk_page_buffers(wc->w_handle, page_buffers(tmppage),
++ from, to, NULL,
++ ocfs2_journal_dirty_data);
++
++ block_commit_write(tmppage, from, to);
++ }
+ }
+
+-/*
+- * Map, fill and write a page to disk.
+- *
+- * The work of copying data is done via callback. Newly allocated
+- * pages which don't take user data will be zero'd (set 'new' to
+- * indicate an allocating write)
+- *
+- * Returns a negative error code or the number of bytes copied into
+- * the page.
+- */
+-static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
+- u64 *p_blkno, struct page *page,
+- struct ocfs2_write_ctxt *wc, int new)
++static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
++ struct ocfs2_write_ctxt *wc,
++ struct page *page, u32 cpos,
++ loff_t user_pos, unsigned user_len,
++ int new)
+ {
+- int ret, copied = 0;
+- unsigned int from = 0, to = 0;
++ int ret;
++ unsigned int map_from = 0, map_to = 0;
+ unsigned int cluster_start, cluster_end;
+- unsigned int zero_from = 0, zero_to = 0;
++ unsigned int user_data_from = 0, user_data_to = 0;
+
+- ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos,
++ ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,
+ &cluster_start, &cluster_end);
+
+- if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index
+- && !wc->w_finished_copy) {
++ if (page == wc->w_target_page) {
++ map_from = user_pos & (PAGE_CACHE_SIZE - 1);
++ map_to = map_from + user_len;
+
+- wc->w_this_page = page;
+- wc->w_this_page_new = new;
+- ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to);
+- if (ret < 0) {
++ if (new)
++ ret = ocfs2_map_page_blocks(page, p_blkno, inode,
++ cluster_start, cluster_end,
++ new);
++ else
++ ret = ocfs2_map_page_blocks(page, p_blkno, inode,
++ map_from, map_to, new);
++ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+- copied = ret;
+-
+- zero_from = from;
+- zero_to = to;
++ user_data_from = map_from;
++ user_data_to = map_to;
+ if (new) {
+- from = cluster_start;
+- to = cluster_end;
++ map_from = cluster_start;
++ map_to = cluster_end;
+ }
++
++ wc->w_target_from = map_from;
++ wc->w_target_to = map_to;
+ } else {
+ /*
+ * If we haven't allocated the new page yet, we
+@@ -1002,11 +1021,11 @@
+ */
+ BUG_ON(!new);
+
+- from = cluster_start;
+- to = cluster_end;
++ map_from = cluster_start;
++ map_to = cluster_end;
+
+ ret = ocfs2_map_page_blocks(page, p_blkno, inode,
+- cluster_start, cluster_end, 1);
++ cluster_start, cluster_end, new);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+@@ -1025,108 +1044,113 @@
+ */
+ if (new && !PageUptodate(page))
+ ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),
+- wc->w_cpos, zero_from, zero_to);
++ cpos, user_data_from, user_data_to);
+
+ flush_dcache_page(page);
+
+- if (ocfs2_should_order_data(inode)) {
+- ret = walk_page_buffers(handle,
+- page_buffers(page),
+- from, to, NULL,
+- ocfs2_journal_dirty_data);
+- if (ret < 0)
+- mlog_errno(ret);
+- }
+-
+- /*
+- * We don't use generic_commit_write() because we need to
+- * handle our own i_size update.
+- */
+- ret = block_commit_write(page, from, to);
+- if (ret)
+- mlog_errno(ret);
+ out:
+-
+- return copied ? copied : ret;
++ return ret;
+ }
+
+ /*
+- * Do the actual write of some data into an inode. Optionally allocate
+- * in order to fulfill the write.
+- *
+- * cpos is the logical cluster offset within the file to write at
+- *
+- * 'phys' is the physical mapping of that offset. a 'phys' value of
+- * zero indicates that allocation is required. In this case, data_ac
+- * and meta_ac should be valid (meta_ac can be null if metadata
+- * allocation isn't required).
++ * This function will only grab one clusters worth of pages.
+ */
+-static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
+- struct buffer_head *di_bh,
+- struct ocfs2_alloc_context *data_ac,
+- struct ocfs2_alloc_context *meta_ac,
+- struct ocfs2_write_ctxt *wc)
++static int ocfs2_grab_pages_for_write(struct address_space *mapping,
++ struct ocfs2_write_ctxt *wc,
++ u32 cpos, loff_t user_pos, int new,
++ struct page *mmap_page)
+ {
+- int ret, i, numpages = 1, new;
+- unsigned int copied = 0;
+- u32 tmp_pos;
+- u64 v_blkno, p_blkno;
+- struct address_space *mapping = file->f_mapping;
++ int ret = 0, i;
++ unsigned long start, target_index, index;
+ struct inode *inode = mapping->host;
+- unsigned long index, start;
+- struct page **cpages;
+
+- new = phys == 0 ? 1 : 0;
++ target_index = user_pos >> PAGE_CACHE_SHIFT;
+
+ /*
+ * Figure out how many pages we'll be manipulating here. For
+ * non allocating write, we just change the one
+ * page. Otherwise, we'll need a whole clusters worth.
+ */
+- if (new)
+- numpages = ocfs2_pages_per_cluster(inode->i_sb);
+-
+- cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
+- if (!cpages) {
+- ret = -ENOMEM;
+- mlog_errno(ret);
+- return ret;
+- }
+-
+- /*
+- * Fill our page array first. That way we've grabbed enough so
+- * that we can zero and flush if we error after adding the
+- * extent.
+- */
+ if (new) {
+- start = ocfs2_align_clusters_to_page_index(inode->i_sb,
+- wc->w_cpos);
+- v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos);
++ wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
++ start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+ } else {
+- start = wc->w_pos >> PAGE_CACHE_SHIFT;
+- v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits;
++ wc->w_num_pages = 1;
++ start = target_index;
+ }
+
+- for(i = 0; i < numpages; i++) {
++ for(i = 0; i < wc->w_num_pages; i++) {
+ index = start + i;
+
+- cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
+- if (!cpages[i]) {
++ if (index == target_index && mmap_page) {
++ /*
++ * ocfs2_pagemkwrite() is a little different
++ * and wants us to directly use the page
++ * passed in.
++ */
++ lock_page(mmap_page);
++
++ if (mmap_page->mapping != mapping) {
++ unlock_page(mmap_page);
++ /*
++ * Sanity check - the locking in
++ * ocfs2_pagemkwrite() should ensure
++ * that this code doesn't trigger.
++ */
++ ret = -EINVAL;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ page_cache_get(mmap_page);
++ wc->w_pages[i] = mmap_page;
++ } else {
++ wc->w_pages[i] = find_or_create_page(mapping, index,
++ GFP_NOFS);
++ if (!wc->w_pages[i]) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
++ if (index == target_index)
++ wc->w_target_page = wc->w_pages[i];
++ }
++out:
++ return ret;
++}
++
++/*
++ * Prepare a single cluster for write one cluster into the file.
++ */
++static int ocfs2_write_cluster(struct address_space *mapping,
++ u32 phys, unsigned int unwritten,
++ struct ocfs2_alloc_context *data_ac,
++ struct ocfs2_alloc_context *meta_ac,
++ struct ocfs2_write_ctxt *wc, u32 cpos,
++ loff_t user_pos, unsigned user_len)
++{
++ int ret, i, new, should_zero = 0;
++ u64 v_blkno, p_blkno;
++ struct inode *inode = mapping->host;
++
++ new = phys == 0 ? 1 : 0;
++ if (new || unwritten)
++ should_zero = 1;
++
+ if (new) {
++ u32 tmp_pos;
++
+ /*
+ * This is safe to call with the page locks - it won't take
+ * any additional semaphores or cluster locks.
+ */
+- tmp_pos = wc->w_cpos;
++ tmp_pos = cpos;
+ ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
+- &tmp_pos, 1, di_bh, handle,
+- data_ac, meta_ac, NULL);
++ &tmp_pos, 1, 0, wc->w_di_bh,
++ wc->w_handle, data_ac,
++ meta_ac, NULL);
+ /*
+ * This shouldn't happen because we must have already
+ * calculated the correct meta data allocation required. The
+@@ -1143,159 +1167,433 @@
+ mlog_errno(ret);
+ goto out;
+ }
++ } else if (unwritten) {
++ ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
++ wc->w_handle, cpos, 1, phys,
++ meta_ac, &wc->w_dealloc);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
++ }
+ }
+
++ if (should_zero)
++ v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
++ else
++ v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
++
++ /*
++ * The only reason this should fail is due to an inability to
++ * find the extent added.
++ */
+ ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
+ NULL);
+ if (ret < 0) {
++ ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
++ "at logical block %llu",
++ (unsigned long long)OCFS2_I(inode)->ip_blkno,
++ (unsigned long long)v_blkno);
++ goto out;
++ }
++
++ BUG_ON(p_blkno == 0);
++
++ for(i = 0; i < wc->w_num_pages; i++) {
++ int tmpret;
++
++ tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
++ wc->w_pages[i], cpos,
++ user_pos, user_len,
++ should_zero);
++ if (tmpret) {
++ mlog_errno(tmpret);
++ if (ret == 0)
++ tmpret = ret;
++ }
++ }
+
+ /*
+- * XXX: Should we go readonly here?
++ * We only have cleanup to do in case of allocating write.
+ */
++ if (ret && new)
++ ocfs2_write_failure(inode, wc, user_pos, user_len);
+
+- mlog_errno(ret);
+- goto out;
+- }
++out:
+
+- BUG_ON(p_blkno == 0);
++ return ret;
++}
+
+- for(i = 0; i < numpages; i++) {
+- ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i],
+- wc, new);
+- if (ret < 0) {
++static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
++ struct ocfs2_alloc_context *data_ac,
++ struct ocfs2_alloc_context *meta_ac,
++ struct ocfs2_write_ctxt *wc,
++ loff_t pos, unsigned len)
++{
++ int ret, i;
++ struct ocfs2_write_cluster_desc *desc;
++
++ for (i = 0; i < wc->w_clen; i++) {
++ desc = &wc->w_desc[i];
++
++ ret = ocfs2_write_cluster(mapping, desc->c_phys,
++ desc->c_unwritten, data_ac, meta_ac,
++ wc, desc->c_cpos, pos, len);
++ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+-
+- copied += ret;
+ }
+
++ ret = 0;
+ out:
+- for(i = 0; i < numpages; i++) {
+- unlock_page(cpages[i]);
+- mark_page_accessed(cpages[i]);
+- page_cache_release(cpages[i]);
+- }
+- kfree(cpages);
+-
+- return copied ? copied : ret;
++ return ret;
+ }
+
+-static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc,
+- struct ocfs2_super *osb, loff_t pos,
+- size_t count, ocfs2_page_writer *cb,
+- void *cb_priv)
++/*
++ * ocfs2_write_end() wants to know which parts of the target page it
++ * should complete the write on. It's easiest to compute them ahead of
++ * time when a more complete view of the write is available.
++ */
++static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
++ struct ocfs2_write_ctxt *wc,
++ loff_t pos, unsigned len, int alloc)
+ {
+- wc->w_count = count;
+- wc->w_pos = pos;
+- wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits;
+- wc->w_finished_copy = 0;
++ struct ocfs2_write_cluster_desc *desc;
+
+- if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
+- wc->w_large_pages = 1;
+- else
+- wc->w_large_pages = 0;
++ wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
++ wc->w_target_to = wc->w_target_from + len;
++
++ if (alloc == 0)
++ return;
++
++ /*
++ * Allocating write - we may have different boundaries based
++ * on page size and cluster size.
++ *
++ * NOTE: We can no longer compute one value from the other as
++ * the actual write length and user provided length may be
++ * different.
++ */
+
+- wc->w_write_data_page = cb;
+- wc->w_private = cb_priv;
++ if (wc->w_large_pages) {
++ /*
++ * We only care about the 1st and last cluster within
++ * our range and whether they should be zero'd or not. Either
++ * value may be extended out to the start/end of a
++ * newly allocated cluster.
++ */
++ desc = &wc->w_desc[0];
++ if (ocfs2_should_zero_cluster(desc))
++ ocfs2_figure_cluster_boundaries(osb,
++ desc->c_cpos,
++ &wc->w_target_from,
++ NULL);
++
++ desc = &wc->w_desc[wc->w_clen - 1];
++ if (ocfs2_should_zero_cluster(desc))
++ ocfs2_figure_cluster_boundaries(osb,
++ desc->c_cpos,
++ NULL,
++ &wc->w_target_to);
++ } else {
++ wc->w_target_from = 0;
++ wc->w_target_to = PAGE_CACHE_SIZE;
++ }
+ }
+
+ /*
+- * Write a cluster to an inode. The cluster may not be allocated yet,
+- * in which case it will be. This only exists for buffered writes -
+- * O_DIRECT takes a more "traditional" path through the kernel.
++ * Populate each single-cluster write descriptor in the write context
++ * with information about the i/o to be done.
+ *
+- * The caller is responsible for incrementing pos, written counts, etc
+- *
+- * For file systems that don't support sparse files, pre-allocation
+- * and page zeroing up until cpos should be done prior to this
+- * function call.
+- *
+- * Callers should be holding i_sem, and the rw cluster lock.
++ * Returns the number of clusters that will have to be allocated, as
++ * well as a worst case estimate of the number of extent records that
++ * would have to be created during a write to an unwritten region.
++ */
++static int ocfs2_populate_write_desc(struct inode *inode,
++ struct ocfs2_write_ctxt *wc,
++ unsigned int *clusters_to_alloc,
++ unsigned int *extents_to_split)
++{
++ int ret;
++ struct ocfs2_write_cluster_desc *desc;
++ unsigned int num_clusters = 0;
++ unsigned int ext_flags = 0;
++ u32 phys = 0;
++ int i;
++
++ *clusters_to_alloc = 0;
++ *extents_to_split = 0;
++
++ for (i = 0; i < wc->w_clen; i++) {
++ desc = &wc->w_desc[i];
++ desc->c_cpos = wc->w_cpos + i;
++
++ if (num_clusters == 0) {
++ /*
++ * Need to look up the next extent record.
++ */
++ ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,
++ &num_clusters, &ext_flags);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ /*
++ * Assume worst case - that we're writing in
++ * the middle of the extent.
+ *
+- * Returns the number of user bytes written, or less than zero for
+- * error.
++ * We can assume that the write proceeds from
++ * left to right, in which case the extent
++ * insert code is smart enough to coalesce the
++ * next splits into the previous records created.
+ */
+-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
+- size_t count, ocfs2_page_writer *actor,
+- void *priv)
++ if (ext_flags & OCFS2_EXT_UNWRITTEN)
++ *extents_to_split = *extents_to_split + 2;
++ } else if (phys) {
++ /*
++ * Only increment phys if it doesn't describe
++ * a hole.
++ */
++ phys++;
++ }
++
++ desc->c_phys = phys;
++ if (phys == 0) {
++ desc->c_new = 1;
++ *clusters_to_alloc = *clusters_to_alloc + 1;
++ }
++ if (ext_flags & OCFS2_EXT_UNWRITTEN)
++ desc->c_unwritten = 1;
++
++ num_clusters--;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
++
++int ocfs2_write_begin_nolock(struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned flags,
++ struct page **pagep, void **fsdata,
++ struct buffer_head *di_bh, struct page *mmap_page)
+ {
+ int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+- ssize_t written = 0;
+- u32 phys;
+- struct inode *inode = file->f_mapping->host;
++ unsigned int clusters_to_alloc, extents_to_split;
++ struct ocfs2_write_ctxt *wc;
++ struct inode *inode = mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+- struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di;
+ struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle;
+- struct ocfs2_write_ctxt wc;
+
+- ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv);
++ ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
++ if (ret) {
++ mlog_errno(ret);
++ return ret;
++ }
+
+- ret = ocfs2_meta_lock(inode, &di_bh, 1);
++ ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
++ &extents_to_split);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+- di = (struct ocfs2_dinode *)di_bh->b_data;
++
++ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+
+ /*
+- * Take alloc sem here to prevent concurrent lookups. That way
+- * the mapping, zeroing and tree manipulation within
+- * ocfs2_write() will be safe against ->readpage(). This
+- * should also serve to lock out allocation from a shared
+- * writeable region.
++ * We set w_target_from, w_target_to here so that
++ * ocfs2_write_end() knows which range in the target page to
++ * write out. An allocation requires that we write the entire
++ * cluster range.
+ */
+- down_write(&OCFS2_I(inode)->ip_alloc_sem);
+-
+- ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL);
++ if (clusters_to_alloc || extents_to_split) {
++ /*
++ * XXX: We are stretching the limits of
++ * ocfs2_lock_allocators(). It greatly over-estimates
++ * the work to be done.
++ */
++ ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
++ extents_to_split, &data_ac, &meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+- goto out_meta;
++ goto out;
+ }
+
+- /* phys == 0 means that allocation is required. */
+- if (phys == 0) {
+- ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac);
++ credits = ocfs2_calc_extend_credits(inode->i_sb, di,
++ clusters_to_alloc);
++
++ }
++
++ ocfs2_set_target_boundaries(osb, wc, pos, len,
++ clusters_to_alloc + extents_to_split);
++
++ handle = ocfs2_start_trans(osb, credits);
++ if (IS_ERR(handle)) {
++ ret = PTR_ERR(handle);
++ mlog_errno(ret);
++ goto out;
++ }
++
++ wc->w_handle = handle;
++
++ /*
++ * We don't want this to fail in ocfs2_write_end(), so do it
++ * here.
++ */
++ ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
++ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+- goto out_meta;
++ goto out_commit;
+ }
+
+- credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1);
++ /*
++ * Fill our page array first. That way we've grabbed enough so
++ * that we can zero and flush if we error after adding the
++ * extent.
++ */
++ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
++ clusters_to_alloc + extents_to_split,
++ mmap_page);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_commit;
+ }
+
+- ret = ocfs2_data_lock(inode, 1);
++ ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
++ len);
+ if (ret) {
+ mlog_errno(ret);
+- goto out_meta;
++ goto out_commit;
+ }
+
+- handle = ocfs2_start_trans(osb, credits);
+- if (IS_ERR(handle)) {
+- ret = PTR_ERR(handle);
++ if (data_ac)
++ ocfs2_free_alloc_context(data_ac);
++ if (meta_ac)
++ ocfs2_free_alloc_context(meta_ac);
++
++ *pagep = wc->w_target_page;
++ *fsdata = wc;
++ return 0;
++out_commit:
++ ocfs2_commit_trans(osb, handle);
++
++out:
++ ocfs2_free_write_ctxt(wc);
++
++ if (data_ac)
++ ocfs2_free_alloc_context(data_ac);
++ if (meta_ac)
++ ocfs2_free_alloc_context(meta_ac);
++ return ret;
++}
++
++int ocfs2_write_begin(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned flags,
++ struct page **pagep, void **fsdata)
++{
++ int ret;
++ struct buffer_head *di_bh = NULL;
++ struct inode *inode = mapping->host;
++
++ ret = ocfs2_meta_lock(inode, &di_bh, 1);
++ if (ret) {
+ mlog_errno(ret);
+- goto out_data;
++ return ret;
+ }
+
+- written = ocfs2_write(file, phys, handle, di_bh, data_ac,
+- meta_ac, &wc);
+- if (written < 0) {
+- ret = written;
++ /*
++ * Take alloc sem here to prevent concurrent lookups. That way
++ * the mapping, zeroing and tree manipulation within
++ * ocfs2_write() will be safe against ->readpage(). This
++ * should also serve to lock out allocation from a shared
++ * writeable region.
++ */
++ down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++ ret = ocfs2_data_lock(inode, 1);
++ if (ret) {
+ mlog_errno(ret);
+- goto out_commit;
++ goto out_fail;
+ }
+
+- ret = ocfs2_journal_access(handle, inode, di_bh,
+- OCFS2_JOURNAL_ACCESS_WRITE);
++ ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
++ fsdata, di_bh, NULL);
+ if (ret) {
+ mlog_errno(ret);
+- goto out_commit;
++ goto out_fail_data;
++ }
++
++ brelse(di_bh);
++
++ return 0;
++
++out_fail_data:
++ ocfs2_data_unlock(inode, 1);
++out_fail:
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++ brelse(di_bh);
++ ocfs2_meta_unlock(inode, 1);
++
++ return ret;
++}
++
++int ocfs2_write_end_nolock(struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned copied,
++ struct page *page, void *fsdata)
++{
++ int i;
++ unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
++ struct inode *inode = mapping->host;
++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++ struct ocfs2_write_ctxt *wc = fsdata;
++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
++ handle_t *handle = wc->w_handle;
++ struct page *tmppage;
++
++ if (unlikely(copied < len)) {
++ if (!PageUptodate(wc->w_target_page))
++ copied = 0;
++
++ ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
++ start+len);
++ }
++ flush_dcache_page(wc->w_target_page);
++
++ for(i = 0; i < wc->w_num_pages; i++) {
++ tmppage = wc->w_pages[i];
++
++ if (tmppage == wc->w_target_page) {
++ from = wc->w_target_from;
++ to = wc->w_target_to;
++
++ BUG_ON(from > PAGE_CACHE_SIZE ||
++ to > PAGE_CACHE_SIZE ||
++ to < from);
++ } else {
++ /*
++ * Pages adjacent to the target (if any) imply
++ * a hole-filling write in which case we want
++ * to flush their entire range.
++ */
++ from = 0;
++ to = PAGE_CACHE_SIZE;
++ }
++
++ if (ocfs2_should_order_data(inode))
++ walk_page_buffers(wc->w_handle, page_buffers(tmppage),
++ from, to, NULL,
++ ocfs2_journal_dirty_data);
++
++ block_commit_write(tmppage, from, to);
+ }
+
+- pos += written;
++ pos += copied;
+ if (pos > inode->i_size) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+@@ -1306,28 +1604,31 @@
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+
+- ret = ocfs2_journal_dirty(handle, di_bh);
+- if (ret)
+- mlog_errno(ret);
++ ocfs2_journal_dirty(handle, wc->w_di_bh);
+
+-out_commit:
+ ocfs2_commit_trans(osb, handle);
+
+-out_data:
+- ocfs2_data_unlock(inode, 1);
++ ocfs2_run_deallocs(osb, &wc->w_dealloc);
++
++ ocfs2_free_write_ctxt(wc);
++
++ return copied;
++}
++
++int ocfs2_write_end(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned copied,
++ struct page *page, void *fsdata)
++{
++ int ret;
++ struct inode *inode = mapping->host;
++
++ ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
+
+-out_meta:
++ ocfs2_data_unlock(inode, 1);
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ ocfs2_meta_unlock(inode, 1);
+
+-out:
+- brelse(di_bh);
+- if (data_ac)
+- ocfs2_free_alloc_context(data_ac);
+- if (meta_ac)
+- ocfs2_free_alloc_context(meta_ac);
+-
+- return written ? written : ret;
++ return ret;
+ }
+
+ const struct address_space_operations ocfs2_aops = {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.h linux-2.6.22-591/fs/ocfs2/aops.h
+--- linux-2.6.22-570/fs/ocfs2/aops.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/aops.h 2007-12-21 15:36:12.000000000 -0500
+@@ -42,57 +42,22 @@
+ int (*fn)( handle_t *handle,
+ struct buffer_head *bh));
+
+-struct ocfs2_write_ctxt;
+-typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
+- u64 *, unsigned int *, unsigned int *);
+-
+-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
+- size_t count, ocfs2_page_writer *actor,
+- void *priv);
+-
+-struct ocfs2_write_ctxt {
+- size_t w_count;
+- loff_t w_pos;
+- u32 w_cpos;
+- unsigned int w_finished_copy;
+-
+- /* This is true if page_size > cluster_size */
+- unsigned int w_large_pages;
+-
+- /* Filler callback and private data */
+- ocfs2_page_writer *w_write_data_page;
+- void *w_private;
+-
+- /* Only valid for the filler callback */
+- struct page *w_this_page;
+- unsigned int w_this_page_new;
+-};
+-
+-struct ocfs2_buffered_write_priv {
+- char *b_src_buf;
+- const struct iovec *b_cur_iov; /* Current iovec */
+- size_t b_cur_off; /* Offset in the
+- * current iovec */
+-};
+-int ocfs2_map_and_write_user_data(struct inode *inode,
+- struct ocfs2_write_ctxt *wc,
+- u64 *p_blkno,
+- unsigned int *ret_from,
+- unsigned int *ret_to);
+-
+-struct ocfs2_splice_write_priv {
+- struct splice_desc *s_sd;
+- struct pipe_buffer *s_buf;
+- struct pipe_inode_info *s_pipe;
+- /* Neither offset value is ever larger than one page */
+- unsigned int s_offset;
+- unsigned int s_buf_offset;
+-};
+-int ocfs2_map_and_write_splice_data(struct inode *inode,
+- struct ocfs2_write_ctxt *wc,
+- u64 *p_blkno,
+- unsigned int *ret_from,
+- unsigned int *ret_to);
++int ocfs2_write_begin(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned flags,
++ struct page **pagep, void **fsdata);
++
++int ocfs2_write_end(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned copied,
++ struct page *page, void *fsdata);
++
++int ocfs2_write_end_nolock(struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned copied,
++ struct page *page, void *fsdata);
++
++int ocfs2_write_begin_nolock(struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned flags,
++ struct page **pagep, void **fsdata,
++ struct buffer_head *di_bh, struct page *mmap_page);
+
+ /* all ocfs2_dio_end_io()'s fault */
+ #define ocfs2_iocb_is_rw_locked(iocb) \
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1335,6 +1335,7 @@
+ ret = wait_event_interruptible(o2hb_steady_queue,
+ atomic_read(®->hr_steady_iterations) == 0);
+ if (ret) {
++ /* We got interrupted (hello ptrace!). Clean up */
+ spin_lock(&o2hb_live_lock);
+ hb_task = reg->hr_task;
+ reg->hr_task = NULL;
+@@ -1345,7 +1346,16 @@
+ goto out;
+ }
+
++ /* Ok, we were woken. Make sure it wasn't by drop_item() */
++ spin_lock(&o2hb_live_lock);
++ hb_task = reg->hr_task;
++ spin_unlock(&o2hb_live_lock);
++
++ if (hb_task)
+ ret = count;
++ else
++ ret = -EIO;
++
+ out:
+ if (filp)
+ fput(filp);
+@@ -1523,6 +1533,15 @@
+ if (hb_task)
+ kthread_stop(hb_task);
+
++ /*
++ * If we're racing a dev_write(), we need to wake them. They will
++ * check reg->hr_task
++ */
++ if (atomic_read(®->hr_steady_iterations) != 0) {
++ atomic_set(®->hr_steady_iterations, 0);
++ wake_up(&o2hb_steady_queue);
++ }
++
+ config_item_put(item);
+ }
+
+@@ -1665,7 +1684,67 @@
+ }
+ EXPORT_SYMBOL_GPL(o2hb_setup_callback);
+
+-int o2hb_register_callback(struct o2hb_callback_func *hc)
++static struct o2hb_region *o2hb_find_region(const char *region_uuid)
++{
++ struct o2hb_region *p, *reg = NULL;
++
++ assert_spin_locked(&o2hb_live_lock);
++
++ list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
++ if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
++ reg = p;
++ break;
++ }
++ }
++
++ return reg;
++}
++
++static int o2hb_region_get(const char *region_uuid)
++{
++ int ret = 0;
++ struct o2hb_region *reg;
++
++ spin_lock(&o2hb_live_lock);
++
++ reg = o2hb_find_region(region_uuid);
++ if (!reg)
++ ret = -ENOENT;
++ spin_unlock(&o2hb_live_lock);
++
++ if (ret)
++ goto out;
++
++ ret = o2nm_depend_this_node();
++ if (ret)
++ goto out;
++
++ ret = o2nm_depend_item(®->hr_item);
++ if (ret)
++ o2nm_undepend_this_node();
++
++out:
++ return ret;
++}
++
++static void o2hb_region_put(const char *region_uuid)
++{
++ struct o2hb_region *reg;
++
++ spin_lock(&o2hb_live_lock);
++
++ reg = o2hb_find_region(region_uuid);
++
++ spin_unlock(&o2hb_live_lock);
++
++ if (reg) {
++ o2nm_undepend_item(®->hr_item);
++ o2nm_undepend_this_node();
++ }
++}
++
++int o2hb_register_callback(const char *region_uuid,
++ struct o2hb_callback_func *hc)
+ {
+ struct o2hb_callback_func *tmp;
+ struct list_head *iter;
+@@ -1681,6 +1760,12 @@
+ goto out;
+ }
+
++ if (region_uuid) {
++ ret = o2hb_region_get(region_uuid);
++ if (ret)
++ goto out;
++ }
++
+ down_write(&o2hb_callback_sem);
+
+ list_for_each(iter, &hbcall->list) {
+@@ -1702,16 +1787,21 @@
+ }
+ EXPORT_SYMBOL_GPL(o2hb_register_callback);
+
+-void o2hb_unregister_callback(struct o2hb_callback_func *hc)
++void o2hb_unregister_callback(const char *region_uuid,
++ struct o2hb_callback_func *hc)
+ {
+ BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
+
+ mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ __builtin_return_address(0), hc);
+
++ /* XXX Can this happen _with_ a region reference? */
+ if (list_empty(&hc->hc_item))
+ return;
+
++ if (region_uuid)
++ o2hb_region_put(region_uuid);
++
+ down_write(&o2hb_callback_sem);
+
+ list_del_init(&hc->hc_item);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h
+--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h 2007-12-21 15:36:12.000000000 -0500
+@@ -69,8 +69,10 @@
+ o2hb_cb_func *func,
+ void *data,
+ int priority);
+-int o2hb_register_callback(struct o2hb_callback_func *hc);
+-void o2hb_unregister_callback(struct o2hb_callback_func *hc);
++int o2hb_register_callback(const char *region_uuid,
++ struct o2hb_callback_func *hc);
++void o2hb_unregister_callback(const char *region_uuid,
++ struct o2hb_callback_func *hc);
+ void o2hb_fill_node_map(unsigned long *map,
+ unsigned bytes);
+ void o2hb_init(void);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/masklog.c linux-2.6.22-591/fs/ocfs2/cluster/masklog.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/masklog.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/masklog.c 2007-12-21 15:36:12.000000000 -0500
+@@ -74,7 +74,6 @@
+ #define define_mask(_name) { \
+ .attr = { \
+ .name = #_name, \
+- .owner = THIS_MODULE, \
+ .mode = S_IRUGO | S_IWUSR, \
+ }, \
+ .mask = ML_##_name, \
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c 2007-12-21 15:36:12.000000000 -0500
+@@ -900,6 +900,46 @@
+ },
+ };
+
++int o2nm_depend_item(struct config_item *item)
++{
++ return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
++}
++
++void o2nm_undepend_item(struct config_item *item)
++{
++ configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
++}
++
++int o2nm_depend_this_node(void)
++{
++ int ret = 0;
++ struct o2nm_node *local_node;
++
++ local_node = o2nm_get_node_by_num(o2nm_this_node());
++ if (!local_node) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ ret = o2nm_depend_item(&local_node->nd_item);
++ o2nm_node_put(local_node);
++
++out:
++ return ret;
++}
++
++void o2nm_undepend_this_node(void)
++{
++ struct o2nm_node *local_node;
++
++ local_node = o2nm_get_node_by_num(o2nm_this_node());
++ BUG_ON(!local_node);
++
++ o2nm_undepend_item(&local_node->nd_item);
++ o2nm_node_put(local_node);
++}
++
++
+ static void __exit exit_o2nm(void)
+ {
+ if (ocfs2_table_header)
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h
+--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h 2007-12-21 15:36:12.000000000 -0500
+@@ -77,4 +77,9 @@
+ void o2nm_node_get(struct o2nm_node *node);
+ void o2nm_node_put(struct o2nm_node *node);
+
++int o2nm_depend_item(struct config_item *item);
++void o2nm_undepend_item(struct config_item *item);
++int o2nm_depend_this_node(void);
++void o2nm_undepend_this_node(void);
++
+ #endif /* O2CLUSTER_NODEMANAGER_H */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/tcp.c linux-2.6.22-591/fs/ocfs2/cluster/tcp.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/tcp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/cluster/tcp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -261,14 +261,12 @@
+
+ static void o2net_complete_nodes_nsw(struct o2net_node *nn)
+ {
+- struct list_head *iter, *tmp;
++ struct o2net_status_wait *nsw, *tmp;
+ unsigned int num_kills = 0;
+- struct o2net_status_wait *nsw;
+
+ assert_spin_locked(&nn->nn_lock);
+
+- list_for_each_safe(iter, tmp, &nn->nn_status_list) {
+- nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
++ list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
+ o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
+ num_kills++;
+ }
+@@ -764,13 +762,10 @@
+
+ void o2net_unregister_handler_list(struct list_head *list)
+ {
+- struct list_head *pos, *n;
+- struct o2net_msg_handler *nmh;
++ struct o2net_msg_handler *nmh, *n;
+
+ write_lock(&o2net_handler_lock);
+- list_for_each_safe(pos, n, list) {
+- nmh = list_entry(pos, struct o2net_msg_handler,
+- nh_unregister_item);
++ list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
+ mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
+ nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
+ rb_erase(&nmh->nh_node, &o2net_handler_tree);
+@@ -1638,8 +1633,8 @@
+
+ void o2net_unregister_hb_callbacks(void)
+ {
+- o2hb_unregister_callback(&o2net_hb_up);
+- o2hb_unregister_callback(&o2net_hb_down);
++ o2hb_unregister_callback(NULL, &o2net_hb_up);
++ o2hb_unregister_callback(NULL, &o2net_hb_down);
+ }
+
+ int o2net_register_hb_callbacks(void)
+@@ -1651,9 +1646,9 @@
+ o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
+ o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
+
+- ret = o2hb_register_callback(&o2net_hb_up);
++ ret = o2hb_register_callback(NULL, &o2net_hb_up);
+ if (ret == 0)
+- ret = o2hb_register_callback(&o2net_hb_down);
++ ret = o2hb_register_callback(NULL, &o2net_hb_down);
+
+ if (ret)
+ o2net_unregister_hb_callbacks();
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dir.c linux-2.6.22-591/fs/ocfs2/dir.c
+--- linux-2.6.22-570/fs/ocfs2/dir.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/dir.c 2007-12-21 15:36:12.000000000 -0500
+@@ -368,7 +368,7 @@
+ u32 offset = OCFS2_I(dir)->ip_clusters;
+
+ status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
+- 1, parent_fe_bh, handle,
++ 1, 0, parent_fe_bh, handle,
+ data_ac, meta_ac, NULL);
+ BUG_ON(status == -EAGAIN);
+ if (status < 0) {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1128,8 +1128,8 @@
+
+ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
+ {
+- o2hb_unregister_callback(&dlm->dlm_hb_up);
+- o2hb_unregister_callback(&dlm->dlm_hb_down);
++ o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
++ o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+ o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
+ }
+
+@@ -1141,13 +1141,13 @@
+
+ o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
+ dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
+- status = o2hb_register_callback(&dlm->dlm_hb_down);
++ status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+ if (status)
+ goto bail;
+
+ o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
+ dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
+- status = o2hb_register_callback(&dlm->dlm_hb_up);
++ status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+ if (status)
+ goto bail;
+
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c 2007-12-21 15:36:12.000000000 -0500
+@@ -192,25 +192,20 @@
+ static void dlm_dump_mles(struct dlm_ctxt *dlm)
+ {
+ struct dlm_master_list_entry *mle;
+- struct list_head *iter;
+
+ mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
+ spin_lock(&dlm->master_lock);
+- list_for_each(iter, &dlm->master_list) {
+- mle = list_entry(iter, struct dlm_master_list_entry, list);
++ list_for_each_entry(mle, &dlm->master_list, list)
+ dlm_print_one_mle(mle);
+- }
+ spin_unlock(&dlm->master_lock);
+ }
+
+ int dlm_dump_all_mles(const char __user *data, unsigned int len)
+ {
+- struct list_head *iter;
+ struct dlm_ctxt *dlm;
+
+ spin_lock(&dlm_domain_lock);
+- list_for_each(iter, &dlm_domains) {
+- dlm = list_entry (iter, struct dlm_ctxt, list);
++ list_for_each_entry(dlm, &dlm_domains, list) {
+ mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
+ dlm_dump_mles(dlm);
+ }
+@@ -454,12 +449,10 @@
+ char *name, unsigned int namelen)
+ {
+ struct dlm_master_list_entry *tmpmle;
+- struct list_head *iter;
+
+ assert_spin_locked(&dlm->master_lock);
+
+- list_for_each(iter, &dlm->master_list) {
+- tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
++ list_for_each_entry(tmpmle, &dlm->master_list, list) {
+ if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
+ continue;
+ dlm_get_mle(tmpmle);
+@@ -472,13 +465,10 @@
+ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
+ {
+ struct dlm_master_list_entry *mle;
+- struct list_head *iter;
+
+ assert_spin_locked(&dlm->spinlock);
+
+- list_for_each(iter, &dlm->mle_hb_events) {
+- mle = list_entry(iter, struct dlm_master_list_entry,
+- hb_events);
++ list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
+ if (node_up)
+ dlm_mle_node_up(dlm, mle, NULL, idx);
+ else
+@@ -2434,7 +2424,7 @@
+ int ret;
+ int i;
+ int count = 0;
+- struct list_head *queue, *iter;
++ struct list_head *queue;
+ struct dlm_lock *lock;
+
+ assert_spin_locked(&res->spinlock);
+@@ -2453,8 +2443,7 @@
+ ret = 0;
+ queue = &res->granted;
+ for (i = 0; i < 3; i++) {
+- list_for_each(iter, queue) {
+- lock = list_entry(iter, struct dlm_lock, list);
++ list_for_each_entry(lock, queue, list) {
+ ++count;
+ if (lock->ml.node == dlm->node_num) {
+ mlog(0, "found a lock owned by this node still "
+@@ -2923,18 +2912,16 @@
+ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+ {
+- struct list_head *iter, *iter2;
+ struct list_head *queue = &res->granted;
+ int i, bit;
+- struct dlm_lock *lock;
++ struct dlm_lock *lock, *next;
+
+ assert_spin_locked(&res->spinlock);
+
+ BUG_ON(res->owner == dlm->node_num);
+
+ for (i=0; i<3; i++) {
+- list_for_each_safe(iter, iter2, queue) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry_safe(lock, next, queue, list) {
+ if (lock->ml.node != dlm->node_num) {
+ mlog(0, "putting lock for node %u\n",
+ lock->ml.node);
+@@ -2976,7 +2963,6 @@
+ {
+ int i;
+ struct list_head *queue = &res->granted;
+- struct list_head *iter;
+ struct dlm_lock *lock;
+ int nodenum;
+
+@@ -2984,10 +2970,9 @@
+
+ spin_lock(&res->spinlock);
+ for (i=0; i<3; i++) {
+- list_for_each(iter, queue) {
++ list_for_each_entry(lock, queue, list) {
+ /* up to the caller to make sure this node
+ * is alive */
+- lock = list_entry (iter, struct dlm_lock, list);
+ if (lock->ml.node != dlm->node_num) {
+ spin_unlock(&res->spinlock);
+ return lock->ml.node;
+@@ -3234,8 +3219,7 @@
+
+ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
+ {
+- struct list_head *iter, *iter2;
+- struct dlm_master_list_entry *mle;
++ struct dlm_master_list_entry *mle, *next;
+ struct dlm_lock_resource *res;
+ unsigned int hash;
+
+@@ -3245,9 +3229,7 @@
+
+ /* clean the master list */
+ spin_lock(&dlm->master_lock);
+- list_for_each_safe(iter, iter2, &dlm->master_list) {
+- mle = list_entry(iter, struct dlm_master_list_entry, list);
+-
++ list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
+ BUG_ON(mle->type != DLM_MLE_BLOCK &&
+ mle->type != DLM_MLE_MASTER &&
+ mle->type != DLM_MLE_MIGRATION);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c 2007-12-21 15:36:12.000000000 -0500
+@@ -158,8 +158,7 @@
+ struct dlm_ctxt *dlm =
+ container_of(work, struct dlm_ctxt, dispatched_work);
+ LIST_HEAD(tmp_list);
+- struct list_head *iter, *iter2;
+- struct dlm_work_item *item;
++ struct dlm_work_item *item, *next;
+ dlm_workfunc_t *workfunc;
+ int tot=0;
+
+@@ -167,13 +166,12 @@
+ list_splice_init(&dlm->work_list, &tmp_list);
+ spin_unlock(&dlm->work_lock);
+
+- list_for_each_safe(iter, iter2, &tmp_list) {
++ list_for_each_entry(item, &tmp_list, list) {
+ tot++;
+ }
+ mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
+
+- list_for_each_safe(iter, iter2, &tmp_list) {
+- item = list_entry(iter, struct dlm_work_item, list);
++ list_for_each_entry_safe(item, next, &tmp_list, list) {
+ workfunc = item->func;
+ list_del_init(&item->list);
+
+@@ -549,7 +547,6 @@
+ {
+ int status = 0;
+ struct dlm_reco_node_data *ndata;
+- struct list_head *iter;
+ int all_nodes_done;
+ int destroy = 0;
+ int pass = 0;
+@@ -567,8 +564,7 @@
+
+ /* safe to access the node data list without a lock, since this
+ * process is the only one to change the list */
+- list_for_each(iter, &dlm->reco.node_data) {
+- ndata = list_entry (iter, struct dlm_reco_node_data, list);
++ list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
+ ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
+
+@@ -655,9 +651,7 @@
+ * done, or if anyone died */
+ all_nodes_done = 1;
+ spin_lock(&dlm_reco_state_lock);
+- list_for_each(iter, &dlm->reco.node_data) {
+- ndata = list_entry (iter, struct dlm_reco_node_data, list);
+-
++ list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ mlog(0, "checking recovery state of node %u\n",
+ ndata->node_num);
+ switch (ndata->state) {
+@@ -774,16 +768,14 @@
+
+ static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
+ {
+- struct list_head *iter, *iter2;
+- struct dlm_reco_node_data *ndata;
++ struct dlm_reco_node_data *ndata, *next;
+ LIST_HEAD(tmplist);
+
+ spin_lock(&dlm_reco_state_lock);
+ list_splice_init(&dlm->reco.node_data, &tmplist);
+ spin_unlock(&dlm_reco_state_lock);
+
+- list_for_each_safe(iter, iter2, &tmplist) {
+- ndata = list_entry (iter, struct dlm_reco_node_data, list);
++ list_for_each_entry_safe(ndata, next, &tmplist, list) {
+ list_del_init(&ndata->list);
+ kfree(ndata);
+ }
+@@ -876,7 +868,6 @@
+ struct dlm_lock_resource *res;
+ struct dlm_ctxt *dlm;
+ LIST_HEAD(resources);
+- struct list_head *iter;
+ int ret;
+ u8 dead_node, reco_master;
+ int skip_all_done = 0;
+@@ -920,8 +911,7 @@
+
+ /* any errors returned will be due to the new_master dying,
+ * the dlm_reco_thread should detect this */
+- list_for_each(iter, &resources) {
+- res = list_entry (iter, struct dlm_lock_resource, recovering);
++ list_for_each_entry(res, &resources, recovering) {
+ ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
+ DLM_MRES_RECOVERY);
+ if (ret < 0) {
+@@ -983,7 +973,6 @@
+ {
+ struct dlm_ctxt *dlm = data;
+ struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
+- struct list_head *iter;
+ struct dlm_reco_node_data *ndata = NULL;
+ int ret = -EINVAL;
+
+@@ -1000,8 +989,7 @@
+ dlm->reco.dead_node, done->node_idx, dlm->node_num);
+
+ spin_lock(&dlm_reco_state_lock);
+- list_for_each(iter, &dlm->reco.node_data) {
+- ndata = list_entry (iter, struct dlm_reco_node_data, list);
++ list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ if (ndata->node_num != done->node_idx)
+ continue;
+
+@@ -1049,13 +1037,11 @@
+ struct list_head *list,
+ u8 dead_node)
+ {
+- struct dlm_lock_resource *res;
+- struct list_head *iter, *iter2;
++ struct dlm_lock_resource *res, *next;
+ struct dlm_lock *lock;
+
+ spin_lock(&dlm->spinlock);
+- list_for_each_safe(iter, iter2, &dlm->reco.resources) {
+- res = list_entry (iter, struct dlm_lock_resource, recovering);
++ list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
+ /* always prune any $RECOVERY entries for dead nodes,
+ * otherwise hangs can occur during later recovery */
+ if (dlm_is_recovery_lock(res->lockname.name,
+@@ -1169,7 +1155,7 @@
+ u8 flags, u8 master)
+ {
+ /* mres here is one full page */
+- memset(mres, 0, PAGE_SIZE);
++ clear_page(mres);
+ mres->lockname_len = namelen;
+ memcpy(mres->lockname, lockname, namelen);
+ mres->num_locks = 0;
+@@ -1252,7 +1238,7 @@
+ struct dlm_migratable_lockres *mres,
+ u8 send_to, u8 flags)
+ {
+- struct list_head *queue, *iter;
++ struct list_head *queue;
+ int total_locks, i;
+ u64 mig_cookie = 0;
+ struct dlm_lock *lock;
+@@ -1278,9 +1264,7 @@
+ total_locks = 0;
+ for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
+ queue = dlm_list_idx_to_ptr(res, i);
+- list_for_each(iter, queue) {
+- lock = list_entry (iter, struct dlm_lock, list);
+-
++ list_for_each_entry(lock, queue, list) {
+ /* add another lock. */
+ total_locks++;
+ if (!dlm_add_lock_to_array(lock, mres, i))
+@@ -1717,7 +1701,6 @@
+ struct dlm_lockstatus *lksb = NULL;
+ int ret = 0;
+ int i, j, bad;
+- struct list_head *iter;
+ struct dlm_lock *lock = NULL;
+ u8 from = O2NM_MAX_NODES;
+ unsigned int added = 0;
+@@ -1755,8 +1738,7 @@
+ spin_lock(&res->spinlock);
+ for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+ tmpq = dlm_list_idx_to_ptr(res, j);
+- list_for_each(iter, tmpq) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry(lock, tmpq, list) {
+ if (lock->ml.cookie != ml->cookie)
+ lock = NULL;
+ else
+@@ -1930,8 +1912,8 @@
+ struct dlm_lock_resource *res)
+ {
+ int i;
+- struct list_head *queue, *iter, *iter2;
+- struct dlm_lock *lock;
++ struct list_head *queue;
++ struct dlm_lock *lock, *next;
+
+ res->state |= DLM_LOCK_RES_RECOVERING;
+ if (!list_empty(&res->recovering)) {
+@@ -1947,8 +1929,7 @@
+ /* find any pending locks and put them back on proper list */
+ for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
+ queue = dlm_list_idx_to_ptr(res, i);
+- list_for_each_safe(iter, iter2, queue) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry_safe(lock, next, queue, list) {
+ dlm_lock_get(lock);
+ if (lock->convert_pending) {
+ /* move converting lock back to granted */
+@@ -2013,18 +1994,15 @@
+ u8 dead_node, u8 new_master)
+ {
+ int i;
+- struct list_head *iter, *iter2;
+ struct hlist_node *hash_iter;
+ struct hlist_head *bucket;
+-
+- struct dlm_lock_resource *res;
++ struct dlm_lock_resource *res, *next;
+
+ mlog_entry_void();
+
+ assert_spin_locked(&dlm->spinlock);
+
+- list_for_each_safe(iter, iter2, &dlm->reco.resources) {
+- res = list_entry (iter, struct dlm_lock_resource, recovering);
++ list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
+ if (res->owner == dead_node) {
+ list_del_init(&res->recovering);
+ spin_lock(&res->spinlock);
+@@ -2099,7 +2077,7 @@
+ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res, u8 dead_node)
+ {
+- struct list_head *iter, *queue;
++ struct list_head *queue;
+ struct dlm_lock *lock;
+ int blank_lvb = 0, local = 0;
+ int i;
+@@ -2121,8 +2099,7 @@
+
+ for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
+ queue = dlm_list_idx_to_ptr(res, i);
+- list_for_each(iter, queue) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry(lock, queue, list) {
+ if (lock->ml.node == search_node) {
+ if (dlm_lvb_needs_invalidation(lock, local)) {
+ /* zero the lksb lvb and lockres lvb */
+@@ -2143,8 +2120,7 @@
+ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res, u8 dead_node)
+ {
+- struct list_head *iter, *tmpiter;
+- struct dlm_lock *lock;
++ struct dlm_lock *lock, *next;
+ unsigned int freed = 0;
+
+ /* this node is the lockres master:
+@@ -2155,24 +2131,21 @@
+ assert_spin_locked(&res->spinlock);
+
+ /* TODO: check pending_asts, pending_basts here */
+- list_for_each_safe(iter, tmpiter, &res->granted) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry_safe(lock, next, &res->granted, list) {
+ if (lock->ml.node == dead_node) {
+ list_del_init(&lock->list);
+ dlm_lock_put(lock);
+ freed++;
+ }
+ }
+- list_for_each_safe(iter, tmpiter, &res->converting) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry_safe(lock, next, &res->converting, list) {
+ if (lock->ml.node == dead_node) {
+ list_del_init(&lock->list);
+ dlm_lock_put(lock);
+ freed++;
+ }
+ }
+- list_for_each_safe(iter, tmpiter, &res->blocked) {
+- lock = list_entry (iter, struct dlm_lock, list);
++ list_for_each_entry_safe(lock, next, &res->blocked, list) {
+ if (lock->ml.node == dead_node) {
+ list_del_init(&lock->list);
+ dlm_lock_put(lock);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlmglue.c linux-2.6.22-591/fs/ocfs2/dlmglue.c
+--- linux-2.6.22-570/fs/ocfs2/dlmglue.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/dlmglue.c 2007-12-21 15:36:12.000000000 -0500
+@@ -600,15 +600,13 @@
+ static void lockres_set_flags(struct ocfs2_lock_res *lockres,
+ unsigned long newflags)
+ {
+- struct list_head *pos, *tmp;
+- struct ocfs2_mask_waiter *mw;
++ struct ocfs2_mask_waiter *mw, *tmp;
+
+ assert_spin_locked(&lockres->l_lock);
+
+ lockres->l_flags = newflags;
+
+- list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
+- mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
++ list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
+ if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
+ continue;
+
+diff -Nurb linux-2.6.22-570/fs/ocfs2/endian.h linux-2.6.22-591/fs/ocfs2/endian.h
+--- linux-2.6.22-570/fs/ocfs2/endian.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/endian.h 2007-12-21 15:36:12.000000000 -0500
+@@ -32,6 +32,11 @@
+ *var = cpu_to_le32(le32_to_cpu(*var) + val);
+ }
+
++static inline void le64_add_cpu(__le64 *var, u64 val)
++{
++ *var = cpu_to_le64(le64_to_cpu(*var) + val);
++}
++
+ static inline void le32_and_cpu(__le32 *var, u32 val)
+ {
+ *var = cpu_to_le32(le32_to_cpu(*var) & val);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/extent_map.c linux-2.6.22-591/fs/ocfs2/extent_map.c
+--- linux-2.6.22-570/fs/ocfs2/extent_map.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/extent_map.c 2007-12-21 15:36:12.000000000 -0500
+@@ -109,17 +109,14 @@
+ */
+ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
+ {
+- struct list_head *p, *n;
+- struct ocfs2_extent_map_item *emi;
++ struct ocfs2_extent_map_item *emi, *n;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_extent_map *em = &oi->ip_extent_map;
+ LIST_HEAD(tmp_list);
+ unsigned int range;
+
+ spin_lock(&oi->ip_lock);
+- list_for_each_safe(p, n, &em->em_list) {
+- emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
+-
++ list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
+ if (emi->ei_cpos >= cpos) {
+ /* Full truncate of this record. */
+ list_move(&emi->ei_list, &tmp_list);
+@@ -136,8 +133,7 @@
+ }
+ spin_unlock(&oi->ip_lock);
+
+- list_for_each_safe(p, n, &tmp_list) {
+- emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
++ list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
+ list_del(&emi->ei_list);
+ kfree(emi);
+ }
+@@ -377,37 +373,6 @@
+ return ret;
+ }
+
+-/*
+- * Return the index of the extent record which contains cluster #v_cluster.
+- * -1 is returned if it was not found.
+- *
+- * Should work fine on interior and exterior nodes.
+- */
+-static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
+- u32 v_cluster)
+-{
+- int ret = -1;
+- int i;
+- struct ocfs2_extent_rec *rec;
+- u32 rec_end, rec_start, clusters;
+-
+- for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
+- rec = &el->l_recs[i];
+-
+- rec_start = le32_to_cpu(rec->e_cpos);
+- clusters = ocfs2_rec_clusters(el, rec);
+-
+- rec_end = rec_start + clusters;
+-
+- if (v_cluster >= rec_start && v_cluster < rec_end) {
+- ret = i;
+- break;
+- }
+- }
+-
+- return ret;
+-}
+-
+ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
+ u32 *p_cluster, u32 *num_clusters,
+ unsigned int *extent_flags)
+diff -Nurb linux-2.6.22-570/fs/ocfs2/file.c linux-2.6.22-591/fs/ocfs2/file.c
+--- linux-2.6.22-570/fs/ocfs2/file.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/file.c 2007-12-21 15:36:12.000000000 -0500
+@@ -326,9 +326,6 @@
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)new_i_size);
+
+- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+- truncate_inode_pages(inode->i_mapping, new_i_size);
+-
+ fe = (struct ocfs2_dinode *) di_bh->b_data;
+ if (!OCFS2_IS_VALID_DINODE(fe)) {
+ OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+@@ -363,16 +360,23 @@
+ if (new_i_size == le64_to_cpu(fe->i_size))
+ goto bail;
+
++ down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ /* This forces other nodes to sync and drop their pages. Do
+ * this even if we have a truncate without allocation change -
+ * ocfs2 cluster sizes can be much greater than page size, so
+ * we have to truncate them anyway. */
+ status = ocfs2_data_lock(inode, 1);
+ if (status < 0) {
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ mlog_errno(status);
+ goto bail;
+ }
+
++ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
++ truncate_inode_pages(inode->i_mapping, new_i_size);
++
+ /* alright, we're going to need to do a full blown alloc size
+ * change. Orphan the inode so that recovery can complete the
+ * truncate if necessary. This does the task of marking
+@@ -399,6 +403,8 @@
+ bail_unlock_data:
+ ocfs2_data_unlock(inode, 1);
+
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ bail:
+
+ mlog_exit(status);
+@@ -419,6 +425,7 @@
+ struct inode *inode,
+ u32 *logical_offset,
+ u32 clusters_to_add,
++ int mark_unwritten,
+ struct buffer_head *fe_bh,
+ handle_t *handle,
+ struct ocfs2_alloc_context *data_ac,
+@@ -431,9 +438,13 @@
+ enum ocfs2_alloc_restarted reason = RESTART_NONE;
+ u32 bit_off, num_bits;
+ u64 block;
++ u8 flags = 0;
+
+ BUG_ON(!clusters_to_add);
+
++ if (mark_unwritten)
++ flags = OCFS2_EXT_UNWRITTEN;
++
+ free_extents = ocfs2_num_free_extents(osb, inode, fe);
+ if (free_extents < 0) {
+ status = free_extents;
+@@ -483,7 +494,7 @@
+ num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
+ *logical_offset, block, num_bits,
+- meta_ac);
++ flags, meta_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+@@ -516,25 +527,28 @@
+ * For a given allocation, determine which allocators will need to be
+ * accessed, and lock them, reserving the appropriate number of bits.
+ *
+- * Called from ocfs2_extend_allocation() for file systems which don't
+- * support holes, and from ocfs2_write() for file systems which
+- * understand sparse inodes.
++ * Sparse file systems call this from ocfs2_write_begin_nolock()
++ * and ocfs2_allocate_unwritten_extents().
++ *
++ * File systems which don't support holes call this from
++ * ocfs2_extend_allocation().
+ */
+ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
+- u32 clusters_to_add,
++ u32 clusters_to_add, u32 extents_to_split,
+ struct ocfs2_alloc_context **data_ac,
+ struct ocfs2_alloc_context **meta_ac)
+ {
+ int ret, num_free_extents;
++ unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ *meta_ac = NULL;
+ *data_ac = NULL;
+
+ mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
+- "clusters_to_add = %u\n",
++ "clusters_to_add = %u, extents_to_split = %u\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+- le32_to_cpu(di->i_clusters), clusters_to_add);
++ le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
+
+ num_free_extents = ocfs2_num_free_extents(osb, inode, di);
+ if (num_free_extents < 0) {
+@@ -552,9 +566,12 @@
+ *
+ * Most of the time we'll only be seeing this 1 cluster at a time
+ * anyway.
++ *
++ * Always lock for any unwritten extents - we might want to
++ * remove blocks for a merge.
+ */
+ if (!num_free_extents ||
+- (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
++ (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
+ ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+@@ -585,14 +602,13 @@
+ return ret;
+ }
+
+-static int ocfs2_extend_allocation(struct inode *inode,
+- u32 clusters_to_add)
++static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
++ u32 clusters_to_add, int mark_unwritten)
+ {
+ int status = 0;
+ int restart_func = 0;
+- int drop_alloc_sem = 0;
+ int credits;
+- u32 prev_clusters, logical_start;
++ u32 prev_clusters;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_dinode *fe = NULL;
+ handle_t *handle = NULL;
+@@ -607,7 +623,7 @@
+ * This function only exists for file systems which don't
+ * support holes.
+ */
+- BUG_ON(ocfs2_sparse_alloc(osb));
++ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
+
+ status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
+ OCFS2_BH_CACHED, inode);
+@@ -623,19 +639,10 @@
+ goto leave;
+ }
+
+- logical_start = OCFS2_I(inode)->ip_clusters;
+-
+ restart_all:
+ BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
+
+- /* blocks peope in read/write from reading our allocation
+- * until we're done changing it. We depend on i_mutex to block
+- * other extend/truncate calls while we're here. Ordering wrt
+- * start_trans is important here -- always do it before! */
+- down_write(&OCFS2_I(inode)->ip_alloc_sem);
+- drop_alloc_sem = 1;
+-
+- status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
++ status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
+ &meta_ac);
+ if (status) {
+ mlog_errno(status);
+@@ -668,6 +675,7 @@
+ inode,
+ &logical_start,
+ clusters_to_add,
++ mark_unwritten,
+ bh,
+ handle,
+ data_ac,
+@@ -720,10 +728,6 @@
+ OCFS2_I(inode)->ip_clusters, i_size_read(inode));
+
+ leave:
+- if (drop_alloc_sem) {
+- up_write(&OCFS2_I(inode)->ip_alloc_sem);
+- drop_alloc_sem = 0;
+- }
+ if (handle) {
+ ocfs2_commit_trans(osb, handle);
+ handle = NULL;
+@@ -749,6 +753,25 @@
+ return status;
+ }
+
++static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
++ u32 clusters_to_add, int mark_unwritten)
++{
++ int ret;
++
++ /*
++ * The alloc sem blocks peope in read/write from reading our
++ * allocation until we're done changing it. We depend on
++ * i_mutex to block other extend/truncate calls while we're
++ * here.
++ */
++ down_write(&OCFS2_I(inode)->ip_alloc_sem);
++ ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
++ mark_unwritten);
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++ return ret;
++}
++
+ /* Some parts of this taken from generic_cont_expand, which turned out
+ * to be too fragile to do exactly what we need without us having to
+ * worry about recursive locking in ->prepare_write() and
+@@ -890,7 +913,9 @@
+ }
+
+ if (clusters_to_add) {
+- ret = ocfs2_extend_allocation(inode, clusters_to_add);
++ ret = ocfs2_extend_allocation(inode,
++ OCFS2_I(inode)->ip_clusters,
++ clusters_to_add, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out_unlock;
+@@ -997,6 +1022,13 @@
+ goto bail_unlock;
+ }
+
++ /*
++ * This will intentionally not wind up calling vmtruncate(),
++ * since all the work for a size change has been done above.
++ * Otherwise, we could get into problems with truncate as
++ * ip_alloc_sem is used there to protect against i_size
++ * changes.
++ */
+ status = inode_setattr(inode, attr);
+ if (status < 0) {
+ mlog_errno(status);
+@@ -1072,17 +1104,16 @@
+ return ret;
+ }
+
+-static int ocfs2_write_remove_suid(struct inode *inode)
++static int __ocfs2_write_remove_suid(struct inode *inode,
++ struct buffer_head *bh)
+ {
+ int ret;
+- struct buffer_head *bh = NULL;
+- struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ handle_t *handle;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_dinode *di;
+
+ mlog_entry("(Inode %llu, mode 0%o)\n",
+- (unsigned long long)oi->ip_blkno, inode->i_mode);
++ (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (handle == NULL) {
+@@ -1091,17 +1122,11 @@
+ goto out;
+ }
+
+- ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
+- if (ret < 0) {
+- mlog_errno(ret);
+- goto out_trans;
+- }
+-
+ ret = ocfs2_journal_access(handle, inode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
+ mlog_errno(ret);
+- goto out_bh;
++ goto out_trans;
+ }
+
+ inode->i_mode &= ~S_ISUID;
+@@ -1114,8 +1139,7 @@
+ ret = ocfs2_journal_dirty(handle, bh);
+ if (ret < 0)
+ mlog_errno(ret);
+-out_bh:
+- brelse(bh);
++
+ out_trans:
+ ocfs2_commit_trans(osb, handle);
+ out:
+@@ -1161,6 +1185,211 @@
+ return ret;
+ }
+
++static int ocfs2_write_remove_suid(struct inode *inode)
++{
++ int ret;
++ struct buffer_head *bh = NULL;
++ struct ocfs2_inode_info *oi = OCFS2_I(inode);
++
++ ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
++ oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = __ocfs2_write_remove_suid(inode, bh);
++out:
++ brelse(bh);
++ return ret;
++}
++
++/*
++ * Allocate enough extents to cover the region starting at byte offset
++ * start for len bytes. Existing extents are skipped, any extents
++ * added are marked as "unwritten".
++ */
++static int ocfs2_allocate_unwritten_extents(struct inode *inode,
++ u64 start, u64 len)
++{
++ int ret;
++ u32 cpos, phys_cpos, clusters, alloc_size;
++
++ /*
++ * We consider both start and len to be inclusive.
++ */
++ cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
++ clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
++ clusters -= cpos;
++
++ while (clusters) {
++ ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
++ &alloc_size, NULL);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ /*
++ * Hole or existing extent len can be arbitrary, so
++ * cap it to our own allocation request.
++ */
++ if (alloc_size > clusters)
++ alloc_size = clusters;
++
++ if (phys_cpos) {
++ /*
++ * We already have an allocation at this
++ * region so we can safely skip it.
++ */
++ goto next;
++ }
++
++ ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
++ if (ret) {
++ if (ret != -ENOSPC)
++ mlog_errno(ret);
++ goto out;
++ }
++
++next:
++ cpos += alloc_size;
++ clusters -= alloc_size;
++ }
++
++ ret = 0;
++out:
++ return ret;
++}
++
++/*
++ * Parts of this function taken from xfs_change_file_space()
++ */
++int ocfs2_change_file_space(struct file *file, unsigned int cmd,
++ struct ocfs2_space_resv *sr)
++{
++ int ret;
++ s64 llen;
++ struct inode *inode = file->f_path.dentry->d_inode;
++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++ struct buffer_head *di_bh = NULL;
++ handle_t *handle;
++ unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
++
++ if (!ocfs2_writes_unwritten_extents(osb))
++ return -ENOTTY;
++
++ if (!S_ISREG(inode->i_mode))
++ return -EINVAL;
++
++ if (!(file->f_mode & FMODE_WRITE))
++ return -EBADF;
++
++ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
++ return -EROFS;
++
++ mutex_lock(&inode->i_mutex);
++
++ /*
++ * This prevents concurrent writes on other nodes
++ */
++ ret = ocfs2_rw_lock(inode, 1);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_meta_lock(inode, &di_bh, 1);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_rw_unlock;
++ }
++
++ if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
++ ret = -EPERM;
++ goto out_meta_unlock;
++ }
++
++ switch (sr->l_whence) {
++ case 0: /*SEEK_SET*/
++ break;
++ case 1: /*SEEK_CUR*/
++ sr->l_start += file->f_pos;
++ break;
++ case 2: /*SEEK_END*/
++ sr->l_start += i_size_read(inode);
++ break;
++ default:
++ ret = -EINVAL;
++ goto out_meta_unlock;
++ }
++ sr->l_whence = 0;
++
++ llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
++
++ if (sr->l_start < 0
++ || sr->l_start > max_off
++ || (sr->l_start + llen) < 0
++ || (sr->l_start + llen) > max_off) {
++ ret = -EINVAL;
++ goto out_meta_unlock;
++ }
++
++ if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
++ if (sr->l_len <= 0) {
++ ret = -EINVAL;
++ goto out_meta_unlock;
++ }
++ }
++
++ if (should_remove_suid(file->f_path.dentry)) {
++ ret = __ocfs2_write_remove_suid(inode, di_bh);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_meta_unlock;
++ }
++ }
++
++ down_write(&OCFS2_I(inode)->ip_alloc_sem);
++ /*
++ * This takes unsigned offsets, but the signed ones we pass
++ * have been checked against overflow above.
++ */
++ ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, sr->l_len);
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++ if (ret) {
++ mlog_errno(ret);
++ goto out_meta_unlock;
++ }
++
++ /*
++ * We update c/mtime for these changes
++ */
++ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
++ if (IS_ERR(handle)) {
++ ret = PTR_ERR(handle);
++ mlog_errno(ret);
++ goto out_meta_unlock;
++ }
++
++ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++ ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
++ if (ret < 0)
++ mlog_errno(ret);
++
++ ocfs2_commit_trans(osb, handle);
++
++out_meta_unlock:
++ brelse(di_bh);
++ ocfs2_meta_unlock(inode, 1);
++out_rw_unlock:
++ ocfs2_rw_unlock(inode, 1);
++
++ mutex_unlock(&inode->i_mutex);
++out:
++ return ret;
++}
++
+ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+ loff_t *ppos,
+ size_t count,
+@@ -1331,15 +1560,16 @@
+ *basep = base;
+ }
+
+-static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
++static struct page * ocfs2_get_write_source(char **ret_src_buf,
+ const struct iovec *cur_iov,
+ size_t iov_offset)
+ {
+ int ret;
+- char *buf;
++ char *buf = cur_iov->iov_base + iov_offset;
+ struct page *src_page = NULL;
++ unsigned long off;
+
+- buf = cur_iov->iov_base + iov_offset;
++ off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
+
+ if (!segment_eq(get_fs(), KERNEL_DS)) {
+ /*
+@@ -1378,10 +1608,12 @@
+ {
+ int ret = 0;
+ ssize_t copied, total = 0;
+- size_t iov_offset = 0;
++ size_t iov_offset = 0, bytes;
++ loff_t pos;
+ const struct iovec *cur_iov = iov;
+- struct ocfs2_buffered_write_priv bp;
+- struct page *page;
++ struct page *user_page, *page;
++ char *buf, *dst;
++ void *fsdata;
+
+ /*
+ * handle partial DIO write. Adjust cur_iov if needed.
+@@ -1389,21 +1621,38 @@
+ ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
+
+ do {
+- bp.b_cur_off = iov_offset;
+- bp.b_cur_iov = cur_iov;
++ pos = *ppos;
+
+- page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
+- if (IS_ERR(page)) {
+- ret = PTR_ERR(page);
++ user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
++ if (IS_ERR(user_page)) {
++ ret = PTR_ERR(user_page);
+ goto out;
+ }
+
+- copied = ocfs2_buffered_write_cluster(file, *ppos, count,
+- ocfs2_map_and_write_user_data,
+- &bp);
++ /* Stay within our page boundaries */
++ bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
++ (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
++ /* Stay within the vector boundary */
++ bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
++ /* Stay within count */
++ bytes = min(bytes, count);
++
++ page = NULL;
++ ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
++ &page, &fsdata);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
+
+- ocfs2_put_write_source(&bp, page);
++ dst = kmap_atomic(page, KM_USER0);
++ memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
++ kunmap_atomic(dst, KM_USER0);
++ flush_dcache_page(page);
++ ocfs2_put_write_source(user_page);
+
++ copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
++ bytes, page, fsdata);
+ if (copied < 0) {
+ mlog_errno(copied);
+ ret = copied;
+@@ -1411,7 +1660,7 @@
+ }
+
+ total += copied;
+- *ppos = *ppos + copied;
++ *ppos = pos + copied;
+ count -= copied;
+
+ ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
+@@ -1581,52 +1830,46 @@
+ struct pipe_buffer *buf,
+ struct splice_desc *sd)
+ {
+- int ret, count, total = 0;
++ int ret, count;
+ ssize_t copied = 0;
+- struct ocfs2_splice_write_priv sp;
++ struct file *file = sd->file;
++ unsigned int offset;
++ struct page *page = NULL;
++ void *fsdata;
++ char *src, *dst;
+
+ ret = buf->ops->pin(pipe, buf);
+ if (ret)
+ goto out;
+
+- sp.s_sd = sd;
+- sp.s_buf = buf;
+- sp.s_pipe = pipe;
+- sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
+- sp.s_buf_offset = buf->offset;
+-
++ offset = sd->pos & ~PAGE_CACHE_MASK;
+ count = sd->len;
+- if (count + sp.s_offset > PAGE_CACHE_SIZE)
+- count = PAGE_CACHE_SIZE - sp.s_offset;
++ if (count + offset > PAGE_CACHE_SIZE)
++ count = PAGE_CACHE_SIZE - offset;
+
+- do {
+- /*
+- * splice wants us to copy up to one page at a
+- * time. For pagesize > cluster size, this means we
+- * might enter ocfs2_buffered_write_cluster() more
+- * than once, so keep track of our progress here.
+- */
+- copied = ocfs2_buffered_write_cluster(sd->file,
+- (loff_t)sd->pos + total,
+- count,
+- ocfs2_map_and_write_splice_data,
+- &sp);
++ ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
++ &page, &fsdata);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ src = buf->ops->map(pipe, buf, 1);
++ dst = kmap_atomic(page, KM_USER1);
++ memcpy(dst + offset, src + buf->offset, count);
++ kunmap_atomic(page, KM_USER1);
++ buf->ops->unmap(pipe, buf, src);
++
++ copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
++ page, fsdata);
+ if (copied < 0) {
+ mlog_errno(copied);
+ ret = copied;
+ goto out;
+ }
+-
+- count -= copied;
+- sp.s_offset += copied;
+- sp.s_buf_offset += copied;
+- total += copied;
+- } while (count);
+-
+- ret = 0;
+ out:
+
+- return total ? total : ret;
++ return copied ? copied : ret;
+ }
+
+ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
+diff -Nurb linux-2.6.22-570/fs/ocfs2/file.h linux-2.6.22-591/fs/ocfs2/file.h
+--- linux-2.6.22-570/fs/ocfs2/file.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/file.h 2007-12-21 15:36:12.000000000 -0500
+@@ -39,15 +39,16 @@
+ };
+ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
+ struct inode *inode,
+- u32 *cluster_start,
++ u32 *logical_offset,
+ u32 clusters_to_add,
++ int mark_unwritten,
+ struct buffer_head *fe_bh,
+ handle_t *handle,
+ struct ocfs2_alloc_context *data_ac,
+ struct ocfs2_alloc_context *meta_ac,
+- enum ocfs2_alloc_restarted *reason);
++ enum ocfs2_alloc_restarted *reason_ret);
+ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
+- u32 clusters_to_add,
++ u32 clusters_to_add, u32 extents_to_split,
+ struct ocfs2_alloc_context **data_ac,
+ struct ocfs2_alloc_context **meta_ac);
+ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
+@@ -61,4 +62,7 @@
+ int ocfs2_update_inode_atime(struct inode *inode,
+ struct buffer_head *bh);
+
++int ocfs2_change_file_space(struct file *file, unsigned int cmd,
++ struct ocfs2_space_resv *sr);
++
+ #endif /* OCFS2_FILE_H */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/heartbeat.c linux-2.6.22-591/fs/ocfs2/heartbeat.c
+--- linux-2.6.22-570/fs/ocfs2/heartbeat.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/heartbeat.c 2007-12-21 15:36:12.000000000 -0500
+@@ -157,16 +157,16 @@
+ if (ocfs2_mount_local(osb))
+ return 0;
+
+- status = o2hb_register_callback(&osb->osb_hb_down);
++ status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+- status = o2hb_register_callback(&osb->osb_hb_up);
++ status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
+ if (status < 0) {
+ mlog_errno(status);
+- o2hb_unregister_callback(&osb->osb_hb_down);
++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
+ }
+
+ bail:
+@@ -178,8 +178,8 @@
+ if (ocfs2_mount_local(osb))
+ return;
+
+- o2hb_unregister_callback(&osb->osb_hb_down);
+- o2hb_unregister_callback(&osb->osb_hb_up);
++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
+ }
+
+ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
+@@ -209,7 +209,7 @@
+ envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ envp[2] = NULL;
+
+- ret = call_usermodehelper(argv[0], argv, envp, 1);
++ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ioctl.c linux-2.6.22-591/fs/ocfs2/ioctl.c
+--- linux-2.6.22-570/fs/ocfs2/ioctl.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/ioctl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include "ocfs2.h"
+ #include "alloc.h"
+ #include "dlmglue.h"
++#include "file.h"
+ #include "inode.h"
+ #include "journal.h"
+
+@@ -115,6 +116,7 @@
+ {
+ unsigned int flags;
+ int status;
++ struct ocfs2_space_resv sr;
+
+ switch (cmd) {
+ case OCFS2_IOC_GETFLAGS:
+@@ -130,6 +132,12 @@
+
+ return ocfs2_set_inode_attr(inode, flags,
+ OCFS2_FL_MODIFIABLE);
++ case OCFS2_IOC_RESVSP:
++ case OCFS2_IOC_RESVSP64:
++ if (copy_from_user(&sr, (int __user *) arg, sizeof(sr)))
++ return -EFAULT;
++
++ return ocfs2_change_file_space(filp, cmd, &sr);
+ default:
+ return -ENOTTY;
+ }
+@@ -148,6 +156,9 @@
+ case OCFS2_IOC32_SETFLAGS:
+ cmd = OCFS2_IOC_SETFLAGS;
+ break;
++ case OCFS2_IOC_RESVSP:
++ case OCFS2_IOC_RESVSP64:
++ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+diff -Nurb linux-2.6.22-570/fs/ocfs2/journal.c linux-2.6.22-591/fs/ocfs2/journal.c
+--- linux-2.6.22-570/fs/ocfs2/journal.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/journal.c 2007-12-21 15:36:12.000000000 -0500
+@@ -722,8 +722,7 @@
+ container_of(work, struct ocfs2_journal, j_recovery_work);
+ struct ocfs2_super *osb = journal->j_osb;
+ struct ocfs2_dinode *la_dinode, *tl_dinode;
+- struct ocfs2_la_recovery_item *item;
+- struct list_head *p, *n;
++ struct ocfs2_la_recovery_item *item, *n;
+ LIST_HEAD(tmp_la_list);
+
+ mlog_entry_void();
+@@ -734,8 +733,7 @@
+ list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
+ spin_unlock(&journal->j_lock);
+
+- list_for_each_safe(p, n, &tmp_la_list) {
+- item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
++ list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
+ list_del_init(&item->lri_list);
+
+ mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/mmap.c linux-2.6.22-591/fs/ocfs2/mmap.c
+--- linux-2.6.22-570/fs/ocfs2/mmap.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/mmap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -37,38 +37,48 @@
+
+ #include "ocfs2.h"
+
++#include "aops.h"
+ #include "dlmglue.h"
+ #include "file.h"
+ #include "inode.h"
+ #include "mmap.h"
+
+-static struct page *ocfs2_nopage(struct vm_area_struct * area,
+- unsigned long address,
+- int *type)
++static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
+ {
+- struct page *page = NOPAGE_SIGBUS;
+- sigset_t blocked, oldset;
+- int ret;
+-
+- mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
+- type);
+-
+- /* The best way to deal with signals in this path is
++ /* The best way to deal with signals in the vm path is
+ * to block them upfront, rather than allowing the
+ * locking paths to return -ERESTARTSYS. */
+- sigfillset(&blocked);
++ sigfillset(blocked);
+
+- /* We should technically never get a bad ret return
++ /* We should technically never get a bad return value
+ * from sigprocmask */
+- ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
++ return sigprocmask(SIG_BLOCK, blocked, oldset);
++}
++
++static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
++{
++ return sigprocmask(SIG_SETMASK, oldset, NULL);
++}
++
++static struct page *ocfs2_fault(struct vm_area_struct *area,
++ struct fault_data *fdata)
++{
++ struct page *page = NULL;
++ sigset_t blocked, oldset;
++ int ret;
++
++ mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff);
++
++ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
+ if (ret < 0) {
++ fdata->type = VM_FAULT_SIGBUS;
+ mlog_errno(ret);
+ goto out;
+ }
+
+- page = filemap_nopage(area, address, type);
++ page = filemap_fault(area, fdata);
+
+- ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
++ ret = ocfs2_vm_op_unblock_sigs(&oldset);
+ if (ret < 0)
+ mlog_errno(ret);
+ out:
+@@ -76,28 +86,136 @@
+ return page;
+ }
+
+-static struct vm_operations_struct ocfs2_file_vm_ops = {
+- .nopage = ocfs2_nopage,
+-};
++static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
++ struct page *page)
++{
++ int ret;
++ struct address_space *mapping = inode->i_mapping;
++ loff_t pos = page->index << PAGE_CACHE_SHIFT;
++ unsigned int len = PAGE_CACHE_SIZE;
++ pgoff_t last_index;
++ struct page *locked_page = NULL;
++ void *fsdata;
++ loff_t size = i_size_read(inode);
+
+-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
++ /*
++ * Another node might have truncated while we were waiting on
++ * cluster locks.
++ */
++ last_index = size >> PAGE_CACHE_SHIFT;
++ if (page->index > last_index) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ /*
++ * The i_size check above doesn't catch the case where nodes
++ * truncated and then re-extended the file. We'll re-check the
++ * page mapping after taking the page lock inside of
++ * ocfs2_write_begin_nolock().
++ */
++ if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ /*
++ * Call ocfs2_write_begin() and ocfs2_write_end() to take
++ * advantage of the allocation code there. We pass a write
++ * length of the whole page (chopped to i_size) to make sure
++ * the whole thing is allocated.
++ *
++ * Since we know the page is up to date, we don't have to
++ * worry about ocfs2_write_begin() skipping some buffer reads
++ * because the "write" would invalidate their data.
++ */
++ if (page->index == last_index)
++ len = size & ~PAGE_CACHE_MASK;
++
++ ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
++ &fsdata, di_bh, page);
++ if (ret) {
++ if (ret != -ENOSPC)
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
++ fsdata);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
++ }
++ BUG_ON(ret != len);
++ ret = 0;
++out:
++ return ret;
++}
++
++static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+ {
+- int ret = 0, lock_level = 0;
+- struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
++ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++ struct buffer_head *di_bh = NULL;
++ sigset_t blocked, oldset;
++ int ret, ret2;
++
++ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
++ if (ret < 0) {
++ mlog_errno(ret);
++ return ret;
++ }
+
+ /*
+- * Only support shared writeable mmap for local mounts which
+- * don't know about holes.
++ * The cluster locks taken will block a truncate from another
++ * node. Taking the data lock will also ensure that we don't
++ * attempt page truncation as part of a downconvert.
+ */
+- if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
+- ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
+- ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
+- mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
+- /* This is -EINVAL because generic_file_readonly_mmap
+- * returns it in a similar situation. */
+- return -EINVAL;
++ ret = ocfs2_meta_lock(inode, &di_bh, 1);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out;
+ }
+
++ /*
++ * The alloc sem should be enough to serialize with
++ * ocfs2_truncate_file() changing i_size as well as any thread
++ * modifying the inode btree.
++ */
++ down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++ ret = ocfs2_data_lock(inode, 1);
++ if (ret < 0) {
++ mlog_errno(ret);
++ goto out_meta_unlock;
++ }
++
++ ret = __ocfs2_page_mkwrite(inode, di_bh, page);
++
++ ocfs2_data_unlock(inode, 1);
++
++out_meta_unlock:
++ up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++ brelse(di_bh);
++ ocfs2_meta_unlock(inode, 1);
++
++out:
++ ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
++ if (ret2 < 0)
++ mlog_errno(ret2);
++
++ return ret;
++}
++
++static struct vm_operations_struct ocfs2_file_vm_ops = {
++ .nopage = ocfs2_fault,
++ .page_mkwrite = ocfs2_page_mkwrite,
++};
++
++int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ int ret = 0, lock_level = 0;
++
+ ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
+ file->f_vfsmnt, &lock_level);
+ if (ret < 0) {
+@@ -107,6 +225,7 @@
+ ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
+ out:
+ vma->vm_ops = &ocfs2_file_vm_ops;
++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/fs/ocfs2/namei.c linux-2.6.22-591/fs/ocfs2/namei.c
+--- linux-2.6.22-570/fs/ocfs2/namei.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/namei.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1684,7 +1684,7 @@
+ u32 offset = 0;
+
+ inode->i_op = &ocfs2_symlink_inode_operations;
+- status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
++ status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
+ new_fe_bh,
+ handle, data_ac, NULL,
+ NULL);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2.h linux-2.6.22-591/fs/ocfs2/ocfs2.h
+--- linux-2.6.22-570/fs/ocfs2/ocfs2.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/ocfs2.h 2007-12-21 15:36:12.000000000 -0500
+@@ -220,6 +220,7 @@
+ u16 max_slots;
+ s16 node_num;
+ s16 slot_num;
++ s16 preferred_slot;
+ int s_sectsize_bits;
+ int s_clustersize;
+ int s_clustersize_bits;
+@@ -306,6 +307,19 @@
+ return 0;
+ }
+
++static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
++{
++ /*
++ * Support for sparse files is a pre-requisite
++ */
++ if (!ocfs2_sparse_alloc(osb))
++ return 0;
++
++ if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
++ return 1;
++ return 0;
++}
++
+ /* set / clear functions because cluster events can make these happen
+ * in parallel so we want the transitions to be atomic. this also
+ * means that any future flags osb_flags must be protected by spinlock
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h
+--- linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -88,7 +88,7 @@
+ #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
+ #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
+ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
+-#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
++#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
+
+ /*
+ * Heartbeat-only devices are missing journals and other files. The
+@@ -116,6 +116,11 @@
+ */
+ #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
+
++/*
++ * Unwritten extents support.
++ */
++#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
++
+ /* The byte offset of the first backup block will be 1G.
+ * The following will be 4G, 16G, 64G, 256G and 1T.
+ */
+@@ -174,6 +179,32 @@
+ #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
+
+ /*
++ * Space reservation / allocation / free ioctls and argument structure
++ * are designed to be compatible with XFS.
++ *
++ * ALLOCSP* and FREESP* are not and will never be supported, but are
++ * included here for completeness.
++ */
++struct ocfs2_space_resv {
++ __s16 l_type;
++ __s16 l_whence;
++ __s64 l_start;
++ __s64 l_len; /* len == 0 means until end of file */
++ __s32 l_sysid;
++ __u32 l_pid;
++ __s32 l_pad[4]; /* reserve area */
++};
++
++#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv)
++#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv)
++#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv)
++#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv)
++#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv)
++#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv)
++#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
++#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
++
++/*
+ * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
+ */
+ #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/slot_map.c linux-2.6.22-591/fs/ocfs2/slot_map.c
+--- linux-2.6.22-570/fs/ocfs2/slot_map.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/slot_map.c 2007-12-21 15:36:12.000000000 -0500
+@@ -121,17 +121,25 @@
+ return ret;
+ }
+
+-static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si)
++static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
+ {
+ int i;
+ s16 ret = OCFS2_INVALID_SLOT;
+
++ if (preferred >= 0 && preferred < si->si_num_slots) {
++ if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
++ ret = preferred;
++ goto out;
++ }
++ }
++
+ for(i = 0; i < si->si_num_slots; i++) {
+ if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
+ ret = (s16) i;
+ break;
+ }
+ }
++out:
+ return ret;
+ }
+
+@@ -248,7 +256,7 @@
+ if (slot == OCFS2_INVALID_SLOT) {
+ /* if no slot yet, then just take 1st available
+ * one. */
+- slot = __ocfs2_find_empty_slot(si);
++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+ if (slot == OCFS2_INVALID_SLOT) {
+ spin_unlock(&si->si_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.c linux-2.6.22-591/fs/ocfs2/suballoc.c
+--- linux-2.6.22-570/fs/ocfs2/suballoc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/suballoc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -98,14 +98,6 @@
+ u16 chain);
+ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
+ u32 wanted);
+-static int ocfs2_free_suballoc_bits(handle_t *handle,
+- struct inode *alloc_inode,
+- struct buffer_head *alloc_bh,
+- unsigned int start_bit,
+- u64 bg_blkno,
+- unsigned int count);
+-static inline u64 ocfs2_which_suballoc_group(u64 block,
+- unsigned int bit);
+ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
+ u64 bg_blkno,
+ u16 bg_bit_off);
+@@ -496,13 +488,7 @@
+
+ (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
+ (*ac)->ac_which = OCFS2_AC_USE_META;
+-
+-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
+- slot = 0;
+-#else
+ slot = osb->slot_num;
+-#endif
+-
+ (*ac)->ac_group_search = ocfs2_block_group_search;
+
+ status = ocfs2_reserve_suballoc_bits(osb, (*ac),
+@@ -1626,7 +1612,7 @@
+ /*
+ * expects the suballoc inode to already be locked.
+ */
+-static int ocfs2_free_suballoc_bits(handle_t *handle,
++int ocfs2_free_suballoc_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+@@ -1703,13 +1689,6 @@
+ return status;
+ }
+
+-static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
+-{
+- u64 group = block - (u64) bit;
+-
+- return group;
+-}
+-
+ int ocfs2_free_dinode(handle_t *handle,
+ struct inode *inode_alloc_inode,
+ struct buffer_head *inode_alloc_bh,
+@@ -1723,19 +1702,6 @@
+ inode_alloc_bh, bit, bg_blkno, 1);
+ }
+
+-int ocfs2_free_extent_block(handle_t *handle,
+- struct inode *eb_alloc_inode,
+- struct buffer_head *eb_alloc_bh,
+- struct ocfs2_extent_block *eb)
+-{
+- u64 blk = le64_to_cpu(eb->h_blkno);
+- u16 bit = le16_to_cpu(eb->h_suballoc_bit);
+- u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
+-
+- return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
+- bit, bg_blkno, 1);
+-}
+-
+ int ocfs2_free_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.h linux-2.6.22-591/fs/ocfs2/suballoc.h
+--- linux-2.6.22-570/fs/ocfs2/suballoc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/suballoc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -86,20 +86,29 @@
+ u32 *cluster_start,
+ u32 *num_clusters);
+
++int ocfs2_free_suballoc_bits(handle_t *handle,
++ struct inode *alloc_inode,
++ struct buffer_head *alloc_bh,
++ unsigned int start_bit,
++ u64 bg_blkno,
++ unsigned int count);
+ int ocfs2_free_dinode(handle_t *handle,
+ struct inode *inode_alloc_inode,
+ struct buffer_head *inode_alloc_bh,
+ struct ocfs2_dinode *di);
+-int ocfs2_free_extent_block(handle_t *handle,
+- struct inode *eb_alloc_inode,
+- struct buffer_head *eb_alloc_bh,
+- struct ocfs2_extent_block *eb);
+ int ocfs2_free_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters);
+
++static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
++{
++ u64 group = block - (u64) bit;
++
++ return group;
++}
++
+ static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
+ u64 bg_blkno)
+ {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/super.c linux-2.6.22-591/fs/ocfs2/super.c
+--- linux-2.6.22-570/fs/ocfs2/super.c 2007-12-21 15:36:07.000000000 -0500
++++ linux-2.6.22-591/fs/ocfs2/super.c 2007-12-21 15:36:12.000000000 -0500
+@@ -82,7 +82,8 @@
+ MODULE_LICENSE("GPL");
+
+ static int ocfs2_parse_options(struct super_block *sb, char *options,
+- unsigned long *mount_opt, int is_remount);
++ unsigned long *mount_opt, s16 *slot,
++ int is_remount);
+ static void ocfs2_put_super(struct super_block *sb);
+ static int ocfs2_mount_volume(struct super_block *sb);
+ static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
+@@ -114,8 +115,6 @@
+ static struct inode *ocfs2_alloc_inode(struct super_block *sb);
+ static void ocfs2_destroy_inode(struct inode *inode);
+
+-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
+-
+ static const struct super_operations ocfs2_sops = {
+ .statfs = ocfs2_statfs,
+ .alloc_inode = ocfs2_alloc_inode,
+@@ -323,7 +322,7 @@
+ /* From xfs_super.c:xfs_max_file_offset
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.
+ */
+-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
++unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
+ {
+ unsigned int pagefactor = 1;
+ unsigned int bitshift = BITS_PER_LONG - 1;
+@@ -360,9 +359,10 @@
+ int incompat_features;
+ int ret = 0;
+ unsigned long parsed_options;
++ s16 slot;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+
+- if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
++ if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
+@@ -546,6 +546,7 @@
+ struct dentry *root;
+ int status, sector_size;
+ unsigned long parsed_opt;
++ s16 slot;
+ struct inode *inode = NULL;
+ struct ocfs2_super *osb = NULL;
+ struct buffer_head *bh = NULL;
+@@ -553,7 +554,7 @@
+
+ mlog_entry("%p, %p, %i", sb, data, silent);
+
+- if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
++ if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
+ status = -EINVAL;
+ goto read_super_error;
+ }
+@@ -583,6 +584,7 @@
+ brelse(bh);
+ bh = NULL;
+ osb->s_mount_opt = parsed_opt;
++ osb->preferred_slot = slot;
+
+ sb->s_magic = OCFS2_SUPER_MAGIC;
+
+@@ -728,6 +730,7 @@
+ static int ocfs2_parse_options(struct super_block *sb,
+ char *options,
+ unsigned long *mount_opt,
++ s16 *slot,
+ int is_remount)
+ {
+ int status;
+@@ -737,6 +740,7 @@
+ options ? options : "(none)");
+
+ *mount_opt = 0;
++ *slot = OCFS2_INVALID_SLOT;
+
+ if (!options) {
+ status = 1;
+diff -Nurb linux-2.6.22-570/fs/ocfs2/super.h linux-2.6.22-591/fs/ocfs2/super.h
+--- linux-2.6.22-570/fs/ocfs2/super.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ocfs2/super.h 2007-12-21 15:36:12.000000000 -0500
+@@ -45,4 +45,6 @@
+
+ #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
+
++unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
++
+ #endif /* OCFS2_SUPER_H */
+diff -Nurb linux-2.6.22-570/fs/open.c linux-2.6.22-591/fs/open.c
+--- linux-2.6.22-570/fs/open.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/open.c 2007-12-21 15:36:12.000000000 -0500
+@@ -362,6 +362,92 @@
+ #endif
+
+ /*
++ * sys_fallocate - preallocate blocks or free preallocated blocks
++ * @fd: the file descriptor
++ * @mode: mode specifies if fallocate should preallocate blocks OR free
++ * (unallocate) preallocated blocks. Currently only FA_ALLOCATE and
++ * FA_DEALLOCATE modes are supported.
++ * @offset: The offset within file, from where (un)allocation is being
++ * requested. It should not have a negative value.
++ * @len: The amount (in bytes) of space to be (un)allocated, from the offset.
++ *
++ * This system call, depending on the mode, preallocates or unallocates blocks
++ * for a file. The range of blocks depends on the value of offset and len
++ * arguments provided by the user/application. For FA_ALLOCATE mode, if this
++ * system call succeeds, subsequent writes to the file in the given range
++ * (specified by offset & len) should not fail - even if the file system
++ * later becomes full. Hence the preallocation done is persistent (valid
++ * even after reopen of the file and remount/reboot).
++ *
++ * It is expected that the ->fallocate() inode operation implemented by the
++ * individual file systems will update the file size and/or ctime/mtime
++ * depending on the mode and also on the success of the operation.
++ *
++ * Note: Incase the file system does not support preallocation,
++ * posix_fallocate() should fall back to the library implementation (i.e.
++ * allocating zero-filled new blocks to the file).
++ *
++ * Return Values
++ * 0 : On SUCCESS a value of zero is returned.
++ * error : On Failure, an error code will be returned.
++ * An error code of -ENOSYS or -EOPNOTSUPP should make posix_fallocate()
++ * fall back on library implementation of fallocate.
++ *
++ * <TBD> Generic fallocate to be added for file systems that do not
++ * support fallocate it.
++ */
++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
++{
++ struct file *file;
++ struct inode *inode;
++ long ret = -EINVAL;
++
++ if (offset < 0 || len <= 0)
++ goto out;
++
++ /* Return error if mode is not supported */
++ ret = -EOPNOTSUPP;
++ if (mode != FA_ALLOCATE && mode !=FA_DEALLOCATE)
++ goto out;
++
++ ret = -EBADF;
++ file = fget(fd);
++ if (!file)
++ goto out;
++ if (!(file->f_mode & FMODE_WRITE))
++ goto out_fput;
++
++ inode = file->f_path.dentry->d_inode;
++
++ ret = -ESPIPE;
++ if (S_ISFIFO(inode->i_mode))
++ goto out_fput;
++
++ ret = -ENODEV;
++ /*
++ * Let individual file system decide if it supports preallocation
++ * for directories or not.
++ */
++ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
++ goto out_fput;
++
++ ret = -EFBIG;
++ /* Check for wrap through zero too */
++ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
++ goto out_fput;
++
++ if (inode->i_op && inode->i_op->fallocate)
++ ret = inode->i_op->fallocate(inode, mode, offset, len);
++ else
++ ret = -ENOSYS;
++
++out_fput:
++ fput(file);
++out:
++ return ret;
++}
++
++/*
+ * access() needs to use the real uid/gid, not the effective uid/gid.
+ * We do this by temporarily clearing all FS-related capabilities and
+ * switching the fsuid/fsgid around to the real ones.
+diff -Nurb linux-2.6.22-570/fs/partitions/check.c linux-2.6.22-591/fs/partitions/check.c
+--- linux-2.6.22-570/fs/partitions/check.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/partitions/check.c 2007-12-21 15:36:12.000000000 -0500
+@@ -397,7 +397,6 @@
+ static struct attribute addpartattr = {
+ .name = "whole_disk",
+ .mode = S_IRUSR | S_IRGRP | S_IROTH,
+- .owner = THIS_MODULE,
+ };
+
+ sysfs_create_file(&p->kobj, &addpartattr);
+diff -Nurb linux-2.6.22-570/fs/proc/Makefile linux-2.6.22-591/fs/proc/Makefile
+--- linux-2.6.22-570/fs/proc/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/proc/Makefile 2007-12-21 15:36:14.000000000 -0500
+@@ -11,6 +11,7 @@
+ proc_tty.o proc_misc.o
+
+ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
++proc-$(CONFIG_NET) += proc_net.o
+ proc-$(CONFIG_PROC_KCORE) += kcore.o
+ proc-$(CONFIG_PROC_VMCORE) += vmcore.o
+ proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
+diff -Nurb linux-2.6.22-570/fs/proc/array.c linux-2.6.22-591/fs/proc/array.c
+--- linux-2.6.22-570/fs/proc/array.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/array.c 2007-12-21 15:36:12.000000000 -0500
+@@ -291,6 +291,15 @@
+ return buffer;
+ }
+
++static inline char *task_context_switch_counts(struct task_struct *p,
++ char *buffer)
++{
++ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
++ "nonvoluntary_ctxt_switches:\t%lu\n",
++ p->nvcsw,
++ p->nivcsw);
++}
++
+ static inline char *task_cap(struct task_struct *p, char *buffer)
+ {
+ struct vx_info *vxi = p->vx_info;
+@@ -328,6 +337,7 @@
+ #if defined(CONFIG_S390)
+ buffer = task_show_regs(task, buffer);
+ #endif
++ buffer = task_context_switch_counts(task, buffer);
+ return buffer - orig;
+ }
+
+@@ -426,8 +436,9 @@
+
+ /* Temporary variable needed for gcc-2.96 */
+ /* convert timespec -> nsec*/
+- start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+- + task->start_time.tv_nsec;
++ start_time =
++ (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
++ + task->real_start_time.tv_nsec;
+ /* convert nsec -> ticks */
+ start_time = nsec_to_clock_t(start_time);
+
+diff -Nurb linux-2.6.22-570/fs/proc/base.c linux-2.6.22-591/fs/proc/base.c
+--- linux-2.6.22-570/fs/proc/base.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/base.c 2007-12-21 15:36:12.000000000 -0500
+@@ -67,7 +67,7 @@
+ #include <linux/mount.h>
+ #include <linux/security.h>
+ #include <linux/ptrace.h>
+-#include <linux/seccomp.h>
++#include <linux/container.h>
+ #include <linux/cpuset.h>
+ #include <linux/audit.h>
+ #include <linux/poll.h>
+@@ -490,7 +490,7 @@
+ count = PROC_BLOCK_SIZE;
+
+ length = -ENOMEM;
+- if (!(page = __get_free_page(GFP_KERNEL)))
++ if (!(page = __get_free_page(GFP_TEMPORARY)))
+ goto out;
+
+ length = PROC_I(inode)->op.proc_read(task, (char*)page);
+@@ -530,7 +530,7 @@
+ goto out;
+
+ ret = -ENOMEM;
+- page = (char *)__get_free_page(GFP_USER);
++ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ goto out;
+
+@@ -600,7 +600,7 @@
+ goto out;
+
+ copied = -ENOMEM;
+- page = (char *)__get_free_page(GFP_USER);
++ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ goto out;
+
+@@ -633,7 +633,7 @@
+ }
+ #endif
+
+-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
++loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+ {
+ switch (orig) {
+ case 0:
+@@ -711,42 +711,6 @@
+ .write = oom_adjust_write,
+ };
+
+-#ifdef CONFIG_MMU
+-static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+- size_t count, loff_t *ppos)
+-{
+- struct task_struct *task;
+- char buffer[PROC_NUMBUF], *end;
+- struct mm_struct *mm;
+-
+- memset(buffer, 0, sizeof(buffer));
+- if (count > sizeof(buffer) - 1)
+- count = sizeof(buffer) - 1;
+- if (copy_from_user(buffer, buf, count))
+- return -EFAULT;
+- if (!simple_strtol(buffer, &end, 0))
+- return -EINVAL;
+- if (*end == '\n')
+- end++;
+- task = get_proc_task(file->f_path.dentry->d_inode);
+- if (!task)
+- return -ESRCH;
+- mm = get_task_mm(task);
+- if (mm) {
+- clear_refs_smap(mm);
+- mmput(mm);
+- }
+- put_task_struct(task);
+- if (end - buffer == 0)
+- return -EIO;
+- return end - buffer;
+-}
+-
+-static struct file_operations proc_clear_refs_operations = {
+- .write = clear_refs_write,
+-};
+-#endif
+-
+ #ifdef CONFIG_AUDITSYSCALL
+ #define TMPBUFLEN 21
+ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
+@@ -786,7 +750,7 @@
+ /* No partial writes. */
+ return -EINVAL;
+ }
+- page = (char*)__get_free_page(GFP_USER);
++ page = (char*)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ return -ENOMEM;
+ length = -EFAULT;
+@@ -815,71 +779,6 @@
+ };
+ #endif
+
+-#ifdef CONFIG_SECCOMP
+-static ssize_t seccomp_read(struct file *file, char __user *buf,
+- size_t count, loff_t *ppos)
+-{
+- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
+- char __buf[20];
+- size_t len;
+-
+- if (!tsk)
+- return -ESRCH;
+- /* no need to print the trailing zero, so use only len */
+- len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
+- put_task_struct(tsk);
+-
+- return simple_read_from_buffer(buf, count, ppos, __buf, len);
+-}
+-
+-static ssize_t seccomp_write(struct file *file, const char __user *buf,
+- size_t count, loff_t *ppos)
+-{
+- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
+- char __buf[20], *end;
+- unsigned int seccomp_mode;
+- ssize_t result;
+-
+- result = -ESRCH;
+- if (!tsk)
+- goto out_no_task;
+-
+- /* can set it only once to be even more secure */
+- result = -EPERM;
+- if (unlikely(tsk->seccomp.mode))
+- goto out;
+-
+- result = -EFAULT;
+- memset(__buf, 0, sizeof(__buf));
+- count = min(count, sizeof(__buf) - 1);
+- if (copy_from_user(__buf, buf, count))
+- goto out;
+-
+- seccomp_mode = simple_strtoul(__buf, &end, 0);
+- if (*end == '\n')
+- end++;
+- result = -EINVAL;
+- if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
+- tsk->seccomp.mode = seccomp_mode;
+- set_tsk_thread_flag(tsk, TIF_SECCOMP);
+- } else
+- goto out;
+- result = -EIO;
+- if (unlikely(!(end - __buf)))
+- goto out;
+- result = end - __buf;
+-out:
+- put_task_struct(tsk);
+-out_no_task:
+- return result;
+-}
+-
+-static const struct file_operations proc_seccomp_operations = {
+- .read = seccomp_read,
+- .write = seccomp_write,
+-};
+-#endif /* CONFIG_SECCOMP */
+-
+ #ifdef CONFIG_FAULT_INJECTION
+ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+@@ -954,7 +853,8 @@
+ char __user *buffer, int buflen)
+ {
+ struct inode * inode;
+- char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
++ char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
++ char *path;
+ int len;
+
+ if (!tmp)
+@@ -1015,7 +915,7 @@
+ task_lock(task);
+ mm = task->mm;
+ if (mm)
+- dumpable = mm->dumpable;
++ dumpable = get_dumpable(mm);
+ task_unlock(task);
+ if(dumpable == 1)
+ return 1;
+@@ -1744,7 +1644,7 @@
+ goto out;
+
+ length = -ENOMEM;
+- page = (char*)__get_free_page(GFP_USER);
++ page = (char*)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ goto out;
+
+@@ -1804,6 +1704,91 @@
+
+ #endif
+
++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
++static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
++ struct mm_struct *mm;
++ char buffer[PROC_NUMBUF];
++ size_t len;
++ int ret;
++
++ if (!task)
++ return -ESRCH;
++
++ ret = 0;
++ mm = get_task_mm(task);
++ if (mm) {
++ len = snprintf(buffer, sizeof(buffer), "%08lx\n",
++ ((mm->flags & MMF_DUMP_FILTER_MASK) >>
++ MMF_DUMP_FILTER_SHIFT));
++ mmput(mm);
++ ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
++ }
++
++ put_task_struct(task);
++
++ return ret;
++}
++
++static ssize_t proc_coredump_filter_write(struct file *file,
++ const char __user *buf,
++ size_t count,
++ loff_t *ppos)
++{
++ struct task_struct *task;
++ struct mm_struct *mm;
++ char buffer[PROC_NUMBUF], *end;
++ unsigned int val;
++ int ret;
++ int i;
++ unsigned long mask;
++
++ ret = -EFAULT;
++ memset(buffer, 0, sizeof(buffer));
++ if (count > sizeof(buffer) - 1)
++ count = sizeof(buffer) - 1;
++ if (copy_from_user(buffer, buf, count))
++ goto out_no_task;
++
++ ret = -EINVAL;
++ val = (unsigned int)simple_strtoul(buffer, &end, 0);
++ if (*end == '\n')
++ end++;
++ if (end - buffer == 0)
++ goto out_no_task;
++
++ ret = -ESRCH;
++ task = get_proc_task(file->f_dentry->d_inode);
++ if (!task)
++ goto out_no_task;
++
++ ret = end - buffer;
++ mm = get_task_mm(task);
++ if (!mm)
++ goto out_no_mm;
++
++ for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
++ if (val & mask)
++ set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
++ else
++ clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
++ }
++
++ mmput(mm);
++ out_no_mm:
++ put_task_struct(task);
++ out_no_task:
++ return ret;
++}
++
++static const struct file_operations proc_coredump_filter_operations = {
++ .read = proc_coredump_filter_read,
++ .write = proc_coredump_filter_write,
++};
++#endif
++
+ /*
+ * /proc/self:
+ */
+@@ -1995,18 +1980,22 @@
+ REG("numa_maps", S_IRUGO, numa_maps),
+ #endif
+ REG("mem", S_IRUSR|S_IWUSR, mem),
+-#ifdef CONFIG_SECCOMP
+- REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
+-#endif
+ LNK("cwd", cwd),
+ LNK("root", root),
+ LNK("exe", exe),
+ REG("mounts", S_IRUGO, mounts),
+ REG("mountstats", S_IRUSR, mountstats),
+ #ifdef CONFIG_MMU
++#ifdef CONFIG_PROC_CLEAR_REFS
+ REG("clear_refs", S_IWUSR, clear_refs),
++#endif
++#ifdef CONFIG_PROC_SMAPS
+ REG("smaps", S_IRUGO, smaps),
+ #endif
++#ifdef CONFIG_PROC_PAGEMAP
++ REG("pagemap", S_IRUSR, pagemap),
++#endif
++#endif
+ #ifdef CONFIG_SECURITY
+ DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
+ #endif
+@@ -2016,7 +2005,7 @@
+ #ifdef CONFIG_SCHEDSTATS
+ INF("schedstat", S_IRUGO, pid_schedstat),
+ #endif
+-#ifdef CONFIG_CPUSETS
++#ifdef CONFIG_PROC_PID_CPUSET
+ REG("cpuset", S_IRUGO, cpuset),
+ #endif
+ INF("vinfo", S_IRUGO, pid_vx_info),
+@@ -2029,6 +2018,9 @@
+ #ifdef CONFIG_FAULT_INJECTION
+ REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+ #endif
++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
++ REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
++#endif
+ #ifdef CONFIG_TASK_IO_ACCOUNTING
+ INF("io", S_IRUGO, pid_io_accounting),
+ #endif
+@@ -2285,17 +2277,21 @@
+ REG("numa_maps", S_IRUGO, numa_maps),
+ #endif
+ REG("mem", S_IRUSR|S_IWUSR, mem),
+-#ifdef CONFIG_SECCOMP
+- REG("seccomp", S_IRUSR|S_IWUSR, seccomp),
+-#endif
+ LNK("cwd", cwd),
+ LNK("root", root),
+ LNK("exe", exe),
+ REG("mounts", S_IRUGO, mounts),
+ #ifdef CONFIG_MMU
++#ifdef CONFIG_PROC_CLEAR_REFS
+ REG("clear_refs", S_IWUSR, clear_refs),
++#endif
++#ifdef CONFIG_PROC_SMAPS
+ REG("smaps", S_IRUGO, smaps),
+ #endif
++#ifdef CONFIG_PROC_PAGEMAP
++ REG("pagemap", S_IRUSR, pagemap),
++#endif
++#endif
+ #ifdef CONFIG_SECURITY
+ DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
+ #endif
+@@ -2305,9 +2301,12 @@
+ #ifdef CONFIG_SCHEDSTATS
+ INF("schedstat", S_IRUGO, pid_schedstat),
+ #endif
+-#ifdef CONFIG_CPUSETS
++#ifdef CONFIG_PROC_PID_CPUSET
+ REG("cpuset", S_IRUGO, cpuset),
+ #endif
++#ifdef CONFIG_CONTAINERS
++ REG("container", S_IRUGO, container),
++#endif
+ INF("oom_score", S_IRUGO, oom_score),
+ REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust),
+ #ifdef CONFIG_AUDITSYSCALL
+diff -Nurb linux-2.6.22-570/fs/proc/generic.c linux-2.6.22-591/fs/proc/generic.c
+--- linux-2.6.22-570/fs/proc/generic.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/generic.c 2007-12-21 15:36:12.000000000 -0500
+@@ -74,7 +74,7 @@
+ nbytes = MAX_NON_LFS - pos;
+
+ dp = PDE(inode);
+- if (!(page = (char*) __get_free_page(GFP_KERNEL)))
++ if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
+ return -ENOMEM;
+
+ while ((nbytes > 0) && !eof) {
+diff -Nurb linux-2.6.22-570/fs/proc/internal.h linux-2.6.22-591/fs/proc/internal.h
+--- linux-2.6.22-570/fs/proc/internal.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/internal.h 2007-12-21 15:36:14.000000000 -0500
+@@ -17,6 +17,11 @@
+ #else
+ static inline void proc_sys_init(void) { }
+ #endif
++#ifdef CONFIG_NET
++extern int proc_net_init(void);
++#else
++static inline int proc_net_init(void) { return 0; }
++#endif
+
+ struct vmalloc_info {
+ unsigned long used;
+@@ -46,15 +51,13 @@
+ extern int proc_tgid_stat(struct task_struct *, char *);
+ extern int proc_pid_status(struct task_struct *, char *);
+ extern int proc_pid_statm(struct task_struct *, char *);
++extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
+
+ extern const struct file_operations proc_maps_operations;
+ extern const struct file_operations proc_numa_maps_operations;
+ extern const struct file_operations proc_smaps_operations;
+-
+-extern const struct file_operations proc_maps_operations;
+-extern const struct file_operations proc_numa_maps_operations;
+-extern const struct file_operations proc_smaps_operations;
+-
++extern const struct file_operations proc_clear_refs_operations;
++extern const struct file_operations proc_pagemap_operations;
+
+ void free_proc_entry(struct proc_dir_entry *de);
+
+diff -Nurb linux-2.6.22-570/fs/proc/proc_misc.c linux-2.6.22-591/fs/proc/proc_misc.c
+--- linux-2.6.22-570/fs/proc/proc_misc.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/proc_misc.c 2007-12-21 15:36:14.000000000 -0500
+@@ -122,6 +122,7 @@
+ cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+
+ do_posix_clock_monotonic_gettime(&uptime);
++ monotonic_to_bootbased(&uptime);
+ cputime_to_timespec(idletime, &idle);
+ if (vx_flags(VXF_VIRT_UPTIME, 0))
+ vx_vsi_uptime(&uptime, &idle);
+@@ -463,12 +464,14 @@
+ unsigned long jif;
+ cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ u64 sum = 0;
++ struct timespec boottime;
+
+ user = nice = system = idle = iowait =
+ irq = softirq = steal = cputime64_zero;
+- jif = - wall_to_monotonic.tv_sec;
+- if (wall_to_monotonic.tv_nsec)
+- --jif;
++ getboottime(&boottime);
++ jif = boottime.tv_sec;
++ if (boottime.tv_nsec)
++ ++jif;
+
+ for_each_possible_cpu(i) {
+ int j;
+diff -Nurb linux-2.6.22-570/fs/proc/proc_net.c linux-2.6.22-591/fs/proc/proc_net.c
+--- linux-2.6.22-570/fs/proc/proc_net.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/proc/proc_net.c 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,154 @@
++/*
++ * linux/fs/proc/net.c
++ *
++ * Copyright (C) 2007
++ *
++ * Author: Eric Biederman <ebiederm@xmission.com>
++ *
++ * proc net directory handling functions
++ */
++
++#include <asm/uaccess.h>
++
++#include <linux/errno.h>
++#include <linux/time.h>
++#include <linux/proc_fs.h>
++#include <linux/stat.h>
++#include <linux/init.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/bitops.h>
++#include <linux/smp_lock.h>
++#include <linux/mount.h>
++#include <linux/nsproxy.h>
++#include <net/net_namespace.h>
++
++#include "internal.h"
++
++static struct proc_dir_entry *proc_net_shadow;
++
++static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
++ struct proc_dir_entry *de)
++{
++ struct dentry *shadow = NULL;
++ struct inode *inode;
++ if (!de)
++ goto out;
++ de_get(de);
++ inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de);
++ if (!inode)
++ goto out_de_put;
++ shadow = d_alloc_name(parent, de->name);
++ if (!shadow)
++ goto out_iput;
++ shadow->d_op = parent->d_op; /* proc_dentry_operations */
++ d_instantiate(shadow, inode);
++out:
++ return shadow;
++out_iput:
++ iput(inode);
++out_de_put:
++ de_put(de);
++ goto out;
++}
++
++static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd)
++{
++ struct net *net = current->nsproxy->net_ns;
++ struct dentry *shadow;
++ shadow = proc_net_shadow_dentry(parent, net->proc_net);
++ if (!shadow)
++ return ERR_PTR(-ENOENT);
++
++ dput(nd->dentry);
++ /* My dentry count is 1 and that should be enough as the
++ * shadow dentry is thrown away immediately.
++ */
++ nd->dentry = shadow;
++ return NULL;
++}
++
++static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct net *net = current->nsproxy->net_ns;
++ struct dentry *shadow;
++
++ shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net);
++ if (!shadow)
++ return ERR_PTR(-ENOENT);
++
++ dput(nd->dentry);
++ nd->dentry = shadow;
++
++ return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd);
++}
++
++static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr)
++{
++ struct net *net = current->nsproxy->net_ns;
++ struct dentry *shadow;
++ int ret;
++
++ shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net);
++ if (!shadow)
++ return -ENOENT;
++ ret = shadow->d_inode->i_op->setattr(shadow, iattr);
++ dput(shadow);
++ return ret;
++}
++
++static const struct file_operations proc_net_dir_operations = {
++ .read = generic_read_dir,
++};
++
++static struct inode_operations proc_net_dir_inode_operations = {
++ .follow_link = proc_net_follow_link,
++ .lookup = proc_net_lookup,
++ .setattr = proc_net_setattr,
++};
++
++
++static int proc_net_ns_init(struct net *net)
++{
++ struct proc_dir_entry *netd, *net_statd;
++
++ netd = proc_mkdir("net", &net->proc_net_root);
++ if (!netd)
++ return -EEXIST;
++
++ net_statd = proc_mkdir("stat", netd);
++ if (!net_statd) {
++ remove_proc_entry("net", &net->proc_net_root);
++ return -EEXIST;
++ }
++
++ netd->data = net;
++ net_statd->data = net;
++ net->proc_net_root.data = net;
++ net->proc_net = netd;
++ net->proc_net_stat = net_statd;
++
++ return 0;
++}
++
++static void proc_net_ns_exit(struct net *net)
++{
++ remove_proc_entry("stat", net->proc_net);
++ remove_proc_entry("net", &net->proc_net_root);
++
++}
++
++struct pernet_operations proc_net_ns_ops = {
++ .init = proc_net_ns_init,
++ .exit = proc_net_ns_exit,
++};
++
++int proc_net_init(void)
++{
++ proc_net_shadow = proc_mkdir("net", NULL);
++ proc_net_shadow->proc_iops = &proc_net_dir_inode_operations;
++ proc_net_shadow->proc_fops = &proc_net_dir_operations;
++
++ return register_pernet_subsys(&proc_net_ns_ops);
++}
+diff -Nurb linux-2.6.22-570/fs/proc/root.c linux-2.6.22-591/fs/proc/root.c
+--- linux-2.6.22-570/fs/proc/root.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/proc/root.c 2007-12-21 15:36:14.000000000 -0500
+@@ -21,11 +21,11 @@
+
+ #include "internal.h"
+
+-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+ struct proc_dir_entry *proc_virtual;
+
+ extern void proc_vx_init(void);
+
++struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
+ static int proc_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+ {
+@@ -64,8 +64,8 @@
+ return;
+ }
+ proc_misc_init();
+- proc_net = proc_mkdir("net", NULL);
+- proc_net_stat = proc_mkdir("net/stat", NULL);
++
++ proc_net_init();
+
+ #ifdef CONFIG_SYSVIPC
+ proc_mkdir("sysvipc", NULL);
+@@ -163,7 +163,5 @@
+ EXPORT_SYMBOL(remove_proc_entry);
+ EXPORT_SYMBOL(proc_root);
+ EXPORT_SYMBOL(proc_root_fs);
+-EXPORT_SYMBOL(proc_net);
+-EXPORT_SYMBOL(proc_net_stat);
+ EXPORT_SYMBOL(proc_bus);
+ EXPORT_SYMBOL(proc_root_driver);
+diff -Nurb linux-2.6.22-570/fs/proc/task_mmu.c linux-2.6.22-591/fs/proc/task_mmu.c
+--- linux-2.6.22-570/fs/proc/task_mmu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/proc/task_mmu.c 2007-12-21 15:36:12.000000000 -0500
+@@ -5,6 +5,7 @@
+ #include <linux/highmem.h>
+ #include <linux/ptrace.h>
+ #include <linux/pagemap.h>
++#include <linux/ptrace.h>
+ #include <linux/mempolicy.h>
+
+ #include <asm/elf.h>
+@@ -114,24 +115,123 @@
+ seq_printf(m, "%*c", len, ' ');
+ }
+
+-struct mem_size_stats
++static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
+ {
+- unsigned long resident;
+- unsigned long shared_clean;
+- unsigned long shared_dirty;
+- unsigned long private_clean;
+- unsigned long private_dirty;
+- unsigned long referenced;
+-};
++ if (vma && vma != priv->tail_vma) {
++ struct mm_struct *mm = vma->vm_mm;
++ up_read(&mm->mmap_sem);
++ mmput(mm);
++ }
++}
+
+-struct pmd_walker {
+- struct vm_area_struct *vma;
+- void *private;
+- void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
+- unsigned long, void *);
+-};
++static void *m_start(struct seq_file *m, loff_t *pos)
++{
++ struct proc_maps_private *priv = m->private;
++ unsigned long last_addr = m->version;
++ struct mm_struct *mm;
++ struct vm_area_struct *vma, *tail_vma = NULL;
++ loff_t l = *pos;
++
++ /* Clear the per syscall fields in priv */
++ priv->task = NULL;
++ priv->tail_vma = NULL;
++
++ /*
++ * We remember last_addr rather than next_addr to hit with
++ * mmap_cache most of the time. We have zero last_addr at
++ * the beginning and also after lseek. We will have -1 last_addr
++ * after the end of the vmas.
++ */
++
++ if (last_addr == -1UL)
++ return NULL;
++
++ priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
++ if (!priv->task)
++ return NULL;
++
++ mm = get_task_mm(priv->task);
++ if (!mm)
++ return NULL;
++
++ priv->tail_vma = tail_vma = get_gate_vma(priv->task);
++ down_read(&mm->mmap_sem);
++
++ /* Start with last addr hint */
++ if (last_addr && (vma = find_vma(mm, last_addr))) {
++ vma = vma->vm_next;
++ goto out;
++ }
++
++ /*
++ * Check the vma index is within the range and do
++ * sequential scan until m_index.
++ */
++ vma = NULL;
++ if ((unsigned long)l < mm->map_count) {
++ vma = mm->mmap;
++ while (l-- && vma)
++ vma = vma->vm_next;
++ goto out;
++ }
++
++ if (l != mm->map_count)
++ tail_vma = NULL; /* After gate vma */
++
++out:
++ if (vma)
++ return vma;
++
++ /* End of vmas has been reached */
++ m->version = (tail_vma != NULL)? 0: -1UL;
++ up_read(&mm->mmap_sem);
++ mmput(mm);
++ return tail_vma;
++}
++
++static void *m_next(struct seq_file *m, void *v, loff_t *pos)
++{
++ struct proc_maps_private *priv = m->private;
++ struct vm_area_struct *vma = v;
++ struct vm_area_struct *tail_vma = priv->tail_vma;
++
++ (*pos)++;
++ if (vma && (vma != tail_vma) && vma->vm_next)
++ return vma->vm_next;
++ vma_stop(priv, vma);
++ return (vma != tail_vma)? tail_vma: NULL;
++}
++
++static void m_stop(struct seq_file *m, void *v)
++{
++ struct proc_maps_private *priv = m->private;
++ struct vm_area_struct *vma = v;
++
++ vma_stop(priv, vma);
++ if (priv->task)
++ put_task_struct(priv->task);
++}
++
++static int do_maps_open(struct inode *inode, struct file *file,
++ struct seq_operations *ops)
++{
++ struct proc_maps_private *priv;
++ int ret = -ENOMEM;
++ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++ if (priv) {
++ priv->pid = proc_pid(inode);
++ ret = seq_open(file, ops);
++ if (!ret) {
++ struct seq_file *m = file->private_data;
++ m->private = priv;
++ } else {
++ kfree(priv);
++ }
++ }
++ return ret;
++}
+
+-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
++static int show_map(struct seq_file *m, void *v)
+ {
+ struct proc_maps_private *priv = m->private;
+ struct task_struct *task = priv->task;
+@@ -191,38 +291,47 @@
+ }
+ seq_putc(m, '\n');
+
+- if (mss)
+- seq_printf(m,
+- "Size: %8lu kB\n"
+- "Rss: %8lu kB\n"
+- "Shared_Clean: %8lu kB\n"
+- "Shared_Dirty: %8lu kB\n"
+- "Private_Clean: %8lu kB\n"
+- "Private_Dirty: %8lu kB\n"
+- "Referenced: %8lu kB\n",
+- (vma->vm_end - vma->vm_start) >> 10,
+- mss->resident >> 10,
+- mss->shared_clean >> 10,
+- mss->shared_dirty >> 10,
+- mss->private_clean >> 10,
+- mss->private_dirty >> 10,
+- mss->referenced >> 10);
+-
+ if (m->count < m->size) /* vma is copied successfully */
+ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
+ return 0;
+ }
+
+-static int show_map(struct seq_file *m, void *v)
++static struct seq_operations proc_pid_maps_op = {
++ .start = m_start,
++ .next = m_next,
++ .stop = m_stop,
++ .show = show_map
++};
++
++static int maps_open(struct inode *inode, struct file *file)
+ {
+- return show_map_internal(m, v, NULL);
++ return do_maps_open(inode, file, &proc_pid_maps_op);
+ }
+
+-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+- unsigned long addr, unsigned long end,
++const struct file_operations proc_maps_operations = {
++ .open = maps_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release_private,
++};
++
++#ifdef CONFIG_PROC_SMAPS
++struct mem_size_stats
++{
++ struct vm_area_struct *vma;
++ unsigned long resident;
++ unsigned long shared_clean;
++ unsigned long shared_dirty;
++ unsigned long private_clean;
++ unsigned long private_dirty;
++ unsigned long referenced;
++};
++
++static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ void *private)
+ {
+ struct mem_size_stats *mss = private;
++ struct vm_area_struct *vma = mss->vma;
+ pte_t *pte, ptent;
+ spinlock_t *ptl;
+ struct page *page;
+@@ -256,12 +365,71 @@
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
++ return 0;
+ }
+
+-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+- unsigned long addr, unsigned long end,
+- void *private)
++static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
++
++static int show_smap(struct seq_file *m, void *v)
+ {
++ struct vm_area_struct *vma = v;
++ struct mem_size_stats mss;
++ int ret;
++
++ memset(&mss, 0, sizeof mss);
++ mss.vma = vma;
++ if (vma->vm_mm && !is_vm_hugetlb_page(vma))
++ walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
++ &smaps_walk, &mss);
++
++ ret = show_map(m, v);
++ if (ret)
++ return ret;
++
++ seq_printf(m,
++ "Size: %8lu kB\n"
++ "Rss: %8lu kB\n"
++ "Shared_Clean: %8lu kB\n"
++ "Shared_Dirty: %8lu kB\n"
++ "Private_Clean: %8lu kB\n"
++ "Private_Dirty: %8lu kB\n"
++ "Referenced: %8lu kB\n",
++ (vma->vm_end - vma->vm_start) >> 10,
++ mss.resident >> 10,
++ mss.shared_clean >> 10,
++ mss.shared_dirty >> 10,
++ mss.private_clean >> 10,
++ mss.private_dirty >> 10,
++ mss.referenced >> 10);
++
++ return ret;
++}
++
++static struct seq_operations proc_pid_smaps_op = {
++ .start = m_start,
++ .next = m_next,
++ .stop = m_stop,
++ .show = show_smap
++};
++
++static int smaps_open(struct inode *inode, struct file *file)
++{
++ return do_maps_open(inode, file, &proc_pid_smaps_op);
++}
++
++const struct file_operations proc_smaps_operations = {
++ .open = smaps_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release_private,
++};
++#endif
++
++#ifdef CONFIG_PROC_CLEAR_REFS
++static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
++ unsigned long end, void *private)
++{
++ struct vm_area_struct *vma = private;
+ pte_t *pte, ptent;
+ spinlock_t *ptl;
+ struct page *page;
+@@ -282,236 +450,52 @@
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
++ return 0;
+ }
+
+-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
+- unsigned long addr, unsigned long end)
+-{
+- pmd_t *pmd;
+- unsigned long next;
+-
+- for (pmd = pmd_offset(pud, addr); addr != end;
+- pmd++, addr = next) {
+- next = pmd_addr_end(addr, end);
+- if (pmd_none_or_clear_bad(pmd))
+- continue;
+- walker->action(walker->vma, pmd, addr, next, walker->private);
+- }
+-}
+-
+-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
+- unsigned long addr, unsigned long end)
+-{
+- pud_t *pud;
+- unsigned long next;
+-
+- for (pud = pud_offset(pgd, addr); addr != end;
+- pud++, addr = next) {
+- next = pud_addr_end(addr, end);
+- if (pud_none_or_clear_bad(pud))
+- continue;
+- walk_pmd_range(walker, pud, addr, next);
+- }
+-}
+-
+-/*
+- * walk_page_range - walk the page tables of a VMA with a callback
+- * @vma - VMA to walk
+- * @action - callback invoked for every bottom-level (PTE) page table
+- * @private - private data passed to the callback function
+- *
+- * Recursively walk the page table for the memory area in a VMA, calling
+- * a callback for every bottom-level (PTE) page table.
+- */
+-static inline void walk_page_range(struct vm_area_struct *vma,
+- void (*action)(struct vm_area_struct *,
+- pmd_t *, unsigned long,
+- unsigned long, void *),
+- void *private)
+-{
+- unsigned long addr = vma->vm_start;
+- unsigned long end = vma->vm_end;
+- struct pmd_walker walker = {
+- .vma = vma,
+- .private = private,
+- .action = action,
+- };
+- pgd_t *pgd;
+- unsigned long next;
+-
+- for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
+- pgd++, addr = next) {
+- next = pgd_addr_end(addr, end);
+- if (pgd_none_or_clear_bad(pgd))
+- continue;
+- walk_pud_range(&walker, pgd, addr, next);
+- }
+-}
+-
+-static int show_smap(struct seq_file *m, void *v)
+-{
+- struct vm_area_struct *vma = v;
+- struct mem_size_stats mss;
++static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+
+- memset(&mss, 0, sizeof mss);
+- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+- walk_page_range(vma, smaps_pte_range, &mss);
+- return show_map_internal(m, v, &mss);
+-}
+-
+-void clear_refs_smap(struct mm_struct *mm)
++static ssize_t clear_refs_write(struct file *file, const char __user *buf,
++ size_t count, loff_t *ppos)
+ {
++ struct task_struct *task;
++ char buffer[13], *end;
++ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+
++ memset(buffer, 0, sizeof(buffer));
++ if (count > sizeof(buffer) - 1)
++ count = sizeof(buffer) - 1;
++ if (copy_from_user(buffer, buf, count))
++ return -EFAULT;
++ if (!simple_strtol(buffer, &end, 0))
++ return -EINVAL;
++ if (*end == '\n')
++ end++;
++ task = get_proc_task(file->f_path.dentry->d_inode);
++ if (!task)
++ return -ESRCH;
++ mm = get_task_mm(task);
++ if (mm) {
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+- walk_page_range(vma, clear_refs_pte_range, NULL);
++ if (!is_vm_hugetlb_page(vma))
++ walk_page_range(mm, vma->vm_start, vma->vm_end,
++ &clear_refs_walk, vma);
+ flush_tlb_mm(mm);
+ up_read(&mm->mmap_sem);
+-}
+-
+-static void *m_start(struct seq_file *m, loff_t *pos)
+-{
+- struct proc_maps_private *priv = m->private;
+- unsigned long last_addr = m->version;
+- struct mm_struct *mm;
+- struct vm_area_struct *vma, *tail_vma = NULL;
+- loff_t l = *pos;
+-
+- /* Clear the per syscall fields in priv */
+- priv->task = NULL;
+- priv->tail_vma = NULL;
+-
+- /*
+- * We remember last_addr rather than next_addr to hit with
+- * mmap_cache most of the time. We have zero last_addr at
+- * the beginning and also after lseek. We will have -1 last_addr
+- * after the end of the vmas.
+- */
+-
+- if (last_addr == -1UL)
+- return NULL;
+-
+- priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+- if (!priv->task)
+- return NULL;
+-
+- mm = get_task_mm(priv->task);
+- if (!mm)
+- return NULL;
+-
+- priv->tail_vma = tail_vma = get_gate_vma(priv->task);
+- down_read(&mm->mmap_sem);
+-
+- /* Start with last addr hint */
+- if (last_addr && (vma = find_vma(mm, last_addr))) {
+- vma = vma->vm_next;
+- goto out;
+- }
+-
+- /*
+- * Check the vma index is within the range and do
+- * sequential scan until m_index.
+- */
+- vma = NULL;
+- if ((unsigned long)l < mm->map_count) {
+- vma = mm->mmap;
+- while (l-- && vma)
+- vma = vma->vm_next;
+- goto out;
+- }
+-
+- if (l != mm->map_count)
+- tail_vma = NULL; /* After gate vma */
+-
+-out:
+- if (vma)
+- return vma;
+-
+- /* End of vmas has been reached */
+- m->version = (tail_vma != NULL)? 0: -1UL;
+- up_read(&mm->mmap_sem);
+- mmput(mm);
+- return tail_vma;
+-}
+-
+-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
+-{
+- if (vma && vma != priv->tail_vma) {
+- struct mm_struct *mm = vma->vm_mm;
+- up_read(&mm->mmap_sem);
+ mmput(mm);
+ }
++ put_task_struct(task);
++ if (end - buffer == 0)
++ return -EIO;
++ return end - buffer;
+ }
+
+-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+-{
+- struct proc_maps_private *priv = m->private;
+- struct vm_area_struct *vma = v;
+- struct vm_area_struct *tail_vma = priv->tail_vma;
+-
+- (*pos)++;
+- if (vma && (vma != tail_vma) && vma->vm_next)
+- return vma->vm_next;
+- vma_stop(priv, vma);
+- return (vma != tail_vma)? tail_vma: NULL;
+-}
+-
+-static void m_stop(struct seq_file *m, void *v)
+-{
+- struct proc_maps_private *priv = m->private;
+- struct vm_area_struct *vma = v;
+-
+- vma_stop(priv, vma);
+- if (priv->task)
+- put_task_struct(priv->task);
+-}
+-
+-static struct seq_operations proc_pid_maps_op = {
+- .start = m_start,
+- .next = m_next,
+- .stop = m_stop,
+- .show = show_map
+-};
+-
+-static struct seq_operations proc_pid_smaps_op = {
+- .start = m_start,
+- .next = m_next,
+- .stop = m_stop,
+- .show = show_smap
+-};
+-
+-static int do_maps_open(struct inode *inode, struct file *file,
+- struct seq_operations *ops)
+-{
+- struct proc_maps_private *priv;
+- int ret = -ENOMEM;
+- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+- if (priv) {
+- priv->pid = proc_pid(inode);
+- ret = seq_open(file, ops);
+- if (!ret) {
+- struct seq_file *m = file->private_data;
+- m->private = priv;
+- } else {
+- kfree(priv);
+- }
+- }
+- return ret;
+-}
+-
+-static int maps_open(struct inode *inode, struct file *file)
+-{
+- return do_maps_open(inode, file, &proc_pid_maps_op);
+-}
+-
+-const struct file_operations proc_maps_operations = {
+- .open = maps_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = seq_release_private,
++const struct file_operations proc_clear_refs_operations = {
++ .write = clear_refs_write,
+ };
++#endif
+
+ #ifdef CONFIG_NUMA
+ extern int show_numa_map(struct seq_file *m, void *v);
+@@ -547,14 +531,211 @@
+ };
+ #endif
+
+-static int smaps_open(struct inode *inode, struct file *file)
++#ifdef CONFIG_PROC_PAGEMAP
++struct pagemapread {
++ struct mm_struct *mm;
++ unsigned long next;
++ unsigned long *buf;
++ pte_t *ptebuf;
++ unsigned long pos;
++ size_t count;
++ int index;
++ char __user *out;
++};
++
++static int flush_pagemap(struct pagemapread *pm)
+ {
+- return do_maps_open(inode, file, &proc_pid_smaps_op);
++ int n = min(pm->count, pm->index * sizeof(unsigned long));
++ if (copy_to_user(pm->out, pm->buf, n))
++ return -EFAULT;
++ pm->out += n;
++ pm->pos += n;
++ pm->count -= n;
++ pm->index = 0;
++ cond_resched();
++ return 0;
+ }
+
+-const struct file_operations proc_smaps_operations = {
+- .open = smaps_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = seq_release_private,
++static int add_to_pagemap(unsigned long addr, unsigned long pfn,
++ struct pagemapread *pm)
++{
++ pm->buf[pm->index++] = pfn;
++ pm->next = addr + PAGE_SIZE;
++ if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
++ pm->index * sizeof(unsigned long) >= pm->count)
++ return flush_pagemap(pm);
++ return 0;
++}
++
++static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
++ void *private)
++{
++ struct pagemapread *pm = private;
++ pte_t *pte;
++ int err;
++
++ pte = pte_offset_map(pmd, addr);
++
++#ifdef CONFIG_HIGHPTE
++ /* copy PTE directory to temporary buffer and unmap it */
++ memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte);
++ pte_unmap(pte);
++ pte = pm->ptebuf;
++#endif
++
++ for (; addr != end; pte++, addr += PAGE_SIZE) {
++ if (addr < pm->next)
++ continue;
++ if (!pte_present(*pte))
++ err = add_to_pagemap(addr, -1, pm);
++ else
++ err = add_to_pagemap(addr, pte_pfn(*pte), pm);
++ if (err)
++ return err;
++ }
++
++#ifndef CONFIG_HIGHPTE
++ pte_unmap(pte - 1);
++#endif
++
++ return 0;
++}
++
++static int pagemap_fill(struct pagemapread *pm, unsigned long end)
++{
++ int ret;
++
++ while (pm->next != end) {
++ ret = add_to_pagemap(pm->next, -1UL, pm);
++ if (ret)
++ return ret;
++ }
++ return 0;
++}
++
++static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
++
++/*
++ * /proc/pid/pagemap - an array mapping virtual pages to pfns
++ *
++ * For each page in the address space, this file contains one long
++ * representing the corresponding physical page frame number (PFN) or
++ * -1 if the page isn't present. This allows determining precisely
++ * which pages are mapped and comparing mapped pages between
++ * processes.
++ *
++ * Efficient users of this interface will use /proc/pid/maps to
++ * determine which areas of memory are actually mapped and llseek to
++ * skip over unmapped regions.
++ *
++ * The first 4 bytes of this file form a simple header:
++ *
++ * first byte: 0 for big endian, 1 for little
++ * second byte: page shift (eg 12 for 4096 byte pages)
++ * third byte: entry size in bytes (currently either 4 or 8)
++ * fourth byte: header size
++ */
++static ssize_t pagemap_read(struct file *file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
++ unsigned long src = *ppos;
++ unsigned long *page;
++ unsigned long addr, end, vend, svpfn, evpfn;
++ struct mm_struct *mm;
++ struct vm_area_struct *vma;
++ struct pagemapread pm;
++ int ret = -ESRCH;
++
++ if (!task)
++ goto out_no_task;
++
++ ret = -EACCES;
++ if (!ptrace_may_attach(task))
++ goto out;
++
++ ret = -EIO;
++ svpfn = src / sizeof(unsigned long) - 1;
++ addr = PAGE_SIZE * svpfn;
++ if ((svpfn + 1) * sizeof(unsigned long) != src)
++ goto out;
++ evpfn = min((src + count) / sizeof(unsigned long),
++ ((~0UL) >> PAGE_SHIFT) + 1);
++ count = (evpfn - svpfn) * sizeof(unsigned long);
++ end = PAGE_SIZE * evpfn;
++
++ ret = -ENOMEM;
++ page = kzalloc(PAGE_SIZE, GFP_USER);
++ if (!page)
++ goto out;
++
++#ifdef CONFIG_HIGHPTE
++ pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER);
++ if (!pm.ptebuf)
++ goto out_free;
++#endif
++
++ ret = 0;
++ mm = get_task_mm(task);
++ if (!mm)
++ goto out_freepte;
++
++ pm.mm = mm;
++ pm.next = addr;
++ pm.buf = page;
++ pm.pos = src;
++ pm.count = count;
++ pm.index = 0;
++ pm.out = buf;
++
++ if (svpfn == -1) {
++ add_to_pagemap(pm.next, 0, &pm);
++ ((char *)page)[0] = (ntohl(1) != 1);
++ ((char *)page)[1] = PAGE_SHIFT;
++ ((char *)page)[2] = sizeof(unsigned long);
++ ((char *)page)[3] = sizeof(unsigned long);
++ }
++
++ down_read(&mm->mmap_sem);
++ vma = find_vma(mm, pm.next);
++ while (pm.count > 0 && vma) {
++ if (!ptrace_may_attach(task)) {
++ ret = -EIO;
++ goto out_mm;
++ }
++ vend = min(vma->vm_start - 1, end - 1) + 1;
++ ret = pagemap_fill(&pm, vend);
++ if (ret || !pm.count)
++ break;
++ vend = min(vma->vm_end - 1, end - 1) + 1;
++ ret = walk_page_range(mm, vma->vm_start, vend,
++ &pagemap_walk, &pm);
++ vma = vma->vm_next;
++ }
++ up_read(&mm->mmap_sem);
++
++ ret = pagemap_fill(&pm, end);
++
++ *ppos = pm.pos;
++ if (!ret)
++ ret = pm.pos - src;
++
++out_mm:
++ mmput(mm);
++out_freepte:
++#ifdef CONFIG_HIGHPTE
++ kfree(pm.ptebuf);
++out_free:
++#endif
++ kfree(page);
++out:
++ put_task_struct(task);
++out_no_task:
++ return ret;
++}
++
++const struct file_operations proc_pagemap_operations = {
++ .llseek = mem_lseek, /* borrow this */
++ .read = pagemap_read,
+ };
++#endif
+diff -Nurb linux-2.6.22-570/fs/ramfs/inode.c linux-2.6.22-591/fs/ramfs/inode.c
+--- linux-2.6.22-570/fs/ramfs/inode.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/ramfs/inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -60,6 +60,7 @@
+ inode->i_blocks = 0;
+ inode->i_mapping->a_ops = &ramfs_aops;
+ inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
++ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ switch (mode & S_IFMT) {
+ default:
+diff -Nurb linux-2.6.22-570/fs/revoke.c linux-2.6.22-591/fs/revoke.c
+--- linux-2.6.22-570/fs/revoke.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/revoke.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,777 @@
++/*
++ * fs/revoke.c - Invalidate all current open file descriptors of an inode.
++ *
++ * Copyright (C) 2006-2007 Pekka Enberg
++ *
++ * This file is released under the GPLv2.
++ */
++
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/magic.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/revoked_fs_i.h>
++#include <linux/syscalls.h>
++
++/**
++ * fileset - an array of file pointers.
++ * @files: the array of file pointers
++ * @nr: number of elements in the array
++ * @end: index to next unused file pointer
++ */
++struct fileset {
++ struct file **files;
++ unsigned long nr;
++ unsigned long end;
++};
++
++/**
++ * revoke_details - details of the revoke operation
++ * @inode: invalidate open file descriptors of this inode
++ * @fset: set of files that point to a revoked inode
++ * @restore_start: index to the first file pointer that is currently in
++ * use by a file descriptor but the real file has not
++ * been revoked
++ */
++struct revoke_details {
++ struct fileset *fset;
++ unsigned long restore_start;
++};
++
++static struct kmem_cache *revokefs_inode_cache;
++
++static inline bool fset_is_full(struct fileset *set)
++{
++ return set->nr == set->end;
++}
++
++static inline struct file *fset_get_filp(struct fileset *set)
++{
++ return set->files[set->end++];
++}
++
++static struct fileset *alloc_fset(unsigned long size)
++{
++ struct fileset *fset;
++
++ fset = kzalloc(sizeof *fset, GFP_KERNEL);
++ if (!fset)
++ return NULL;
++
++ fset->files = kcalloc(size, sizeof(struct file *), GFP_KERNEL);
++ if (!fset->files) {
++ kfree(fset);
++ return NULL;
++ }
++ fset->nr = size;
++ return fset;
++}
++
++static void free_fset(struct fileset *fset)
++{
++ int i;
++
++ for (i = fset->end; i < fset->nr; i++)
++ fput(fset->files[i]);
++
++ kfree(fset->files);
++ kfree(fset);
++}
++
++/*
++ * Revoked file descriptors point to inodes in the revokefs filesystem.
++ */
++static struct vfsmount *revokefs_mnt;
++
++static struct file *get_revoked_file(void)
++{
++ struct dentry *dentry;
++ struct inode *inode;
++ struct file *filp;
++ struct qstr name;
++
++ filp = get_empty_filp();
++ if (!filp)
++ goto err;
++
++ inode = new_inode(revokefs_mnt->mnt_sb);
++ if (!inode)
++ goto err_inode;
++
++ name.name = "revoked_file";
++ name.len = strlen(name.name);
++ dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name);
++ if (!dentry)
++ goto err_dentry;
++
++ d_instantiate(dentry, inode);
++
++ filp->f_mapping = inode->i_mapping;
++ filp->f_dentry = dget(dentry);
++ filp->f_vfsmnt = mntget(revokefs_mnt);
++ filp->f_op = fops_get(inode->i_fop);
++ filp->f_pos = 0;
++
++ return filp;
++
++ err_dentry:
++ iput(inode);
++ err_inode:
++ fput(filp);
++ err:
++ return NULL;
++}
++
++static inline bool can_revoke_file(struct file *file, struct inode *inode,
++ struct file *to_exclude)
++{
++ if (!file || file == to_exclude)
++ return false;
++
++ return file->f_dentry->d_inode == inode;
++}
++
++/*
++ * LOCKING: task_lock(owner)
++ */
++static int revoke_fds(struct task_struct *owner,
++ struct inode *inode,
++ struct file *to_exclude, struct fileset *fset)
++{
++ struct files_struct *files;
++ struct fdtable *fdt;
++ unsigned int fd;
++ int err = 0;
++
++ files = get_files_struct(owner);
++ if (!files)
++ goto out;
++
++ spin_lock(&files->file_lock);
++ fdt = files_fdtable(files);
++
++ for (fd = 0; fd < fdt->max_fds; fd++) {
++ struct revokefs_inode_info *info;
++ struct file *filp, *new_filp;
++ struct inode *new_inode;
++
++ filp = fcheck_files(files, fd);
++ if (!can_revoke_file(filp, inode, to_exclude))
++ continue;
++
++ if (!filp->f_op->revoke) {
++ err = -EOPNOTSUPP;
++ goto failed;
++ }
++
++ if (fset_is_full(fset)) {
++ err = -ENOMEM;
++ goto failed;
++ }
++
++ new_filp = fset_get_filp(fset);
++
++ /*
++ * Replace original struct file pointer with a pointer to
++ * a 'revoked file.' After this point, we don't need to worry
++ * about racing with sys_close or sys_dup.
++ */
++ rcu_assign_pointer(fdt->fd[fd], new_filp);
++
++ /*
++ * Hold on to task until we can take down the file and its
++ * mmap.
++ */
++ get_task_struct(owner);
++
++ new_inode = new_filp->f_dentry->d_inode;
++ make_revoked_inode(new_inode, inode->i_mode & S_IFMT);
++
++ info = revokefs_i(new_inode);
++ info->fd = fd;
++ info->file = filp;
++ info->owner = owner;
++ }
++ failed:
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++ out:
++ return err;
++}
++
++static inline bool can_revoke_vma(struct vm_area_struct *vma,
++ struct inode *inode, struct file *to_exclude)
++{
++ struct file *file = vma->vm_file;
++
++ if (vma->vm_flags & VM_REVOKED)
++ return false;
++
++ if (!file || file == to_exclude)
++ return false;
++
++ return file->f_path.dentry->d_inode == inode;
++}
++
++static int __revoke_break_cow(struct task_struct *tsk, struct inode *inode,
++ struct file *to_exclude)
++{
++ struct mm_struct *mm = tsk->mm;
++ struct vm_area_struct *vma;
++ int err = 0;
++
++ down_read(&mm->mmap_sem);
++ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
++ int ret;
++
++ if (vma->vm_flags & VM_SHARED)
++ continue;
++
++ if (!can_revoke_vma(vma, inode, to_exclude))
++ continue;
++
++ ret = get_user_pages(tsk, tsk->mm, vma->vm_start,
++ vma_pages(vma), 1, 1, NULL, NULL);
++ if (ret < 0) {
++ err = ret;
++ break;
++ }
++
++ unlink_file_vma(vma);
++ fput(vma->vm_file);
++ vma->vm_file = NULL;
++ }
++ up_read(&mm->mmap_sem);
++ return err;
++}
++
++static int revoke_break_cow(struct fileset *fset, struct inode *inode,
++ struct file *to_exclude)
++{
++ unsigned long i;
++ int err = 0;
++
++ for (i = 0; i < fset->end; i++) {
++ struct revokefs_inode_info *info;
++ struct file *this;
++
++ this = fset->files[i];
++ info = revokefs_i(this->f_dentry->d_inode);
++
++ err = __revoke_break_cow(info->owner, inode, to_exclude);
++ if (err)
++ break;
++ }
++ return err;
++}
++
++/*
++ * LOCKING: down_write(&mm->mmap_sem)
++ * -> spin_lock(&mapping->i_mmap_lock)
++ */
++static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details)
++{
++ unsigned long restart_addr, start_addr, end_addr;
++ int need_break;
++
++ start_addr = vma->vm_start;
++ end_addr = vma->vm_end;
++
++ again:
++ restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr,
++ details);
++
++ need_break = need_resched() || need_lockbreak(details->i_mmap_lock);
++ if (need_break)
++ goto out_need_break;
++
++ if (restart_addr < end_addr) {
++ start_addr = restart_addr;
++ goto again;
++ }
++ vma->vm_flags |= VM_REVOKED;
++ return 0;
++
++ out_need_break:
++ spin_unlock(details->i_mmap_lock);
++ cond_resched();
++ spin_lock(details->i_mmap_lock);
++ return -EINTR;
++}
++
++/*
++ * LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static int revoke_mm(struct mm_struct *mm, struct address_space *mapping,
++ struct file *to_exclude)
++{
++ struct vm_area_struct *vma;
++ struct zap_details details;
++ int err = 0;
++
++ details.i_mmap_lock = &mapping->i_mmap_lock;
++
++ /*
++ * If ->mmap_sem is under contention, we continue scanning other
++ * mms and try again later.
++ */
++ if (!down_write_trylock(&mm->mmap_sem)) {
++ err = -EAGAIN;
++ goto out;
++ }
++ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
++ if (!(vma->vm_flags & VM_SHARED))
++ continue;
++
++ if (!can_revoke_vma(vma, mapping->host, to_exclude))
++ continue;
++
++ err = revoke_vma(vma, &details);
++ if (err)
++ break;
++
++ __unlink_file_vma(vma);
++ fput(vma->vm_file);
++ vma->vm_file = NULL;
++ }
++ up_write(&mm->mmap_sem);
++ out:
++ return err;
++}
++
++/*
++ * LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static void revoke_mapping_tree(struct address_space *mapping,
++ struct file *to_exclude)
++{
++ struct vm_area_struct *vma;
++ struct prio_tree_iter iter;
++ int try_again = 0;
++
++ restart:
++ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
++ int err;
++
++ if (!(vma->vm_flags & VM_SHARED))
++ continue;
++
++ if (likely(!can_revoke_vma(vma, mapping->host, to_exclude)))
++ continue;
++
++ err = revoke_mm(vma->vm_mm, mapping, to_exclude);
++ if (err == -EAGAIN)
++ try_again = 1;
++
++ goto restart;
++ }
++ if (try_again) {
++ cond_resched();
++ goto restart;
++ }
++}
++
++/*
++ * LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static void revoke_mapping_list(struct address_space *mapping,
++ struct file *to_exclude)
++{
++ struct vm_area_struct *vma;
++ int try_again = 0;
++
++ restart:
++ list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) {
++ int err;
++
++ if (likely(!can_revoke_vma(vma, mapping->host, to_exclude)))
++ continue;
++
++ err = revoke_mm(vma->vm_mm, mapping, to_exclude);
++ if (err == -EAGAIN) {
++ try_again = 1;
++ continue;
++ }
++ if (err == -EINTR)
++ goto restart;
++ }
++ if (try_again) {
++ cond_resched();
++ goto restart;
++ }
++}
++
++static void revoke_mapping(struct address_space *mapping, struct file *to_exclude)
++{
++ spin_lock(&mapping->i_mmap_lock);
++ if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
++ revoke_mapping_tree(mapping, to_exclude);
++ if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
++ revoke_mapping_list(mapping, to_exclude);
++ spin_unlock(&mapping->i_mmap_lock);
++}
++
++static void restore_file(struct revokefs_inode_info *info)
++{
++ struct files_struct *files;
++
++ files = get_files_struct(info->owner);
++ if (files) {
++ struct fdtable *fdt;
++ struct file *filp;
++
++ spin_lock(&files->file_lock);
++ fdt = files_fdtable(files);
++
++ filp = fdt->fd[info->fd];
++ if (filp)
++ fput(filp);
++
++ rcu_assign_pointer(fdt->fd[info->fd], info->file);
++ FD_SET(info->fd, fdt->close_on_exec);
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++ }
++ put_task_struct(info->owner);
++ info->owner = NULL; /* To avoid double-restore. */
++}
++
++static void restore_files(struct revoke_details *details)
++{
++ unsigned long i;
++
++ for (i = details->restore_start; i < details->fset->end; i++) {
++ struct revokefs_inode_info *info;
++ struct file *filp;
++
++ filp = details->fset->files[i];
++ info = revokefs_i(filp->f_dentry->d_inode);
++
++ restore_file(info);
++ }
++}
++
++static int revoke_files(struct revoke_details *details)
++{
++ unsigned long i;
++ int err = 0;
++
++ for (i = 0; i < details->fset->end; i++) {
++ struct revokefs_inode_info *info;
++ struct file *this, *filp;
++ struct inode *inode;
++
++ this = details->fset->files[i];
++ inode = this->f_dentry->d_inode;
++ info = revokefs_i(inode);
++
++ /*
++ * Increase count before attempting to close file as
++ * an partially closed file can no longer be restored.
++ */
++ details->restore_start++;
++ filp = info->file;
++ err = filp->f_op->revoke(filp, inode->i_mapping);
++ put_task_struct(info->owner);
++ info->owner = NULL; /* To avoid restoring closed file. */
++ if (err)
++ goto out;
++ }
++ out:
++ return err;
++}
++
++/*
++ * Returns the maximum number of file descriptors pointing to an inode.
++ *
++ * LOCKING: read_lock(&tasklist_lock)
++ */
++static unsigned long inode_fds(struct inode *inode, struct file *to_exclude)
++{
++ struct task_struct *g, *p;
++ unsigned long nr_fds = 0;
++
++ do_each_thread(g, p) {
++ struct files_struct *files;
++ struct fdtable *fdt;
++ unsigned int fd;
++
++ files = get_files_struct(p);
++ if (!files)
++ continue;
++
++ spin_lock(&files->file_lock);
++ fdt = files_fdtable(files);
++ for (fd = 0; fd < fdt->max_fds; fd++) {
++ struct file *file;
++
++ file = fcheck_files(files, fd);
++ if (can_revoke_file(file, inode, to_exclude)) {
++ nr_fds += fdt->max_fds;
++ break;
++ }
++ }
++ spin_unlock(&files->file_lock);
++ put_files_struct(files);
++ }
++ while_each_thread(g, p);
++ return nr_fds;
++}
++
++static struct fileset *__alloc_revoke_fset(unsigned long size)
++{
++ struct fileset *fset;
++ int i;
++
++ fset = alloc_fset(size);
++ if (!fset)
++ return NULL;
++
++ for (i = 0; i < fset->nr; i++) {
++ struct file *filp;
++
++ filp = get_revoked_file();
++ if (!filp)
++ goto err;
++
++ fset->files[i] = filp;
++ }
++ return fset;
++ err:
++ free_fset(fset);
++ return NULL;
++}
++
++static struct fileset *alloc_revoke_fset(struct inode *inode, struct file *to_exclude)
++{
++ unsigned long nr_fds;
++
++ read_lock(&tasklist_lock);
++ nr_fds = inode_fds(inode, to_exclude);
++ read_unlock(&tasklist_lock);
++
++ return __alloc_revoke_fset(nr_fds);
++}
++
++static int do_revoke(struct inode *inode, struct file *to_exclude)
++{
++ struct revoke_details details;
++ struct fileset *fset = NULL;
++ struct task_struct *g, *p;
++ int err = 0;
++
++ if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
++ err = -EPERM;
++ goto out;
++ }
++
++ retry:
++ if (signal_pending(current)) {
++ err = -ERESTARTSYS;
++ goto out;
++ }
++
++ /*
++ * Pre-allocate memory because the first pass is done under
++ * tasklist_lock.
++ */
++ fset = alloc_revoke_fset(inode, to_exclude);
++ if (!fset) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ read_lock(&tasklist_lock);
++
++ /*
++ * If someone forked while we were allocating memory, try again.
++ */
++ if (inode_fds(inode, to_exclude) > fset->nr) {
++ read_unlock(&tasklist_lock);
++ free_fset(fset);
++ goto retry;
++ }
++
++ details.fset = fset;
++ details.restore_start = 0;
++
++ /*
++ * First revoke the descriptors. After we are done, no one can start
++ * new operations on them.
++ */
++ do_each_thread(g, p) {
++ err = revoke_fds(p, inode, to_exclude, fset);
++ if (err)
++ goto exit_loop;
++ }
++ while_each_thread(g, p);
++ exit_loop:
++ read_unlock(&tasklist_lock);
++
++ if (err)
++ goto out_restore;
++
++ /*
++ * Take down shared memory mappings.
++ */
++ revoke_mapping(inode->i_mapping, to_exclude);
++
++ /*
++ * Break COW for private mappings.
++ */
++ err = revoke_break_cow(fset, inode, to_exclude);
++ if (err)
++ goto out_restore;
++
++ /*
++ * Now, revoke the files for good.
++ */
++ err = revoke_files(&details);
++ if (err)
++ goto out_restore;
++
++ out_free_table:
++ free_fset(fset);
++ out:
++ return err;
++
++ out_restore:
++ restore_files(&details);
++ goto out_free_table;
++}
++
++asmlinkage long sys_revokeat(int dfd, const char __user * filename)
++{
++ struct nameidata nd;
++ int err;
++
++ err = __user_walk_fd(dfd, filename, 0, &nd);
++ if (!err) {
++ err = do_revoke(nd.dentry->d_inode, NULL);
++ path_release(&nd);
++ }
++ return err;
++}
++
++asmlinkage long sys_frevoke(unsigned int fd)
++{
++ struct file *file = fget(fd);
++ int err = -EBADF;
++
++ if (file) {
++ err = do_revoke(file->f_dentry->d_inode, file);
++ fput(file);
++ }
++ return err;
++}
++
++int generic_file_revoke(struct file *file, struct address_space *new_mapping)
++{
++ struct address_space *mapping = file->f_mapping;
++ int err;
++
++ /*
++ * Flush pending writes.
++ */
++ err = do_fsync(file, 1);
++ if (err)
++ goto out;
++
++ file->f_mapping = new_mapping;
++
++ /*
++ * Make pending reads fail.
++ */
++ err = invalidate_inode_pages2(mapping);
++
++ out:
++ return err;
++}
++EXPORT_SYMBOL(generic_file_revoke);
++
++/*
++ * Filesystem for revoked files.
++ */
++
++static struct inode *revokefs_alloc_inode(struct super_block *sb)
++{
++ struct revokefs_inode_info *info;
++
++ info = kmem_cache_alloc(revokefs_inode_cache, GFP_KERNEL);
++ if (!info)
++ return NULL;
++
++ return &info->vfs_inode;
++}
++
++static void revokefs_destroy_inode(struct inode *inode)
++{
++ kmem_cache_free(revokefs_inode_cache, revokefs_i(inode));
++}
++
++static struct super_operations revokefs_super_ops = {
++ .alloc_inode = revokefs_alloc_inode,
++ .destroy_inode = revokefs_destroy_inode,
++ .drop_inode = generic_delete_inode,
++};
++
++static int revokefs_get_sb(struct file_system_type *fs_type,
++ int flags, const char *dev_name, void *data,
++ struct vfsmount *mnt)
++{
++ return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops,
++ REVOKEFS_MAGIC, mnt);
++}
++
++static struct file_system_type revokefs_fs_type = {
++ .name = "revokefs",
++ .get_sb = revokefs_get_sb,
++ .kill_sb = kill_anon_super
++};
++
++static void revokefs_init_inode(void *obj, struct kmem_cache *cache,
++ unsigned long flags)
++{
++ struct revokefs_inode_info *info = obj;
++
++ info->owner = NULL;
++ inode_init_once(&info->vfs_inode);
++}
++
++static int __init revokefs_init(void)
++{
++ int err = -ENOMEM;
++
++ revokefs_inode_cache =
++ kmem_cache_create("revokefs_inode_cache",
++ sizeof(struct revokefs_inode_info),
++ 0,
++ (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
++ SLAB_MEM_SPREAD), revokefs_init_inode, NULL);
++ if (!revokefs_inode_cache)
++ goto out;
++
++ err = register_filesystem(&revokefs_fs_type);
++ if (err)
++ goto err_register;
++
++ revokefs_mnt = kern_mount(&revokefs_fs_type);
++ if (IS_ERR(revokefs_mnt)) {
++ err = PTR_ERR(revokefs_mnt);
++ goto err_mnt;
++ }
++ out:
++ return err;
++ err_mnt:
++ unregister_filesystem(&revokefs_fs_type);
++ err_register:
++ kmem_cache_destroy(revokefs_inode_cache);
++ return err;
++}
++
++late_initcall(revokefs_init);
+diff -Nurb linux-2.6.22-570/fs/revoked_inode.c linux-2.6.22-591/fs/revoked_inode.c
+--- linux-2.6.22-570/fs/revoked_inode.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/revoked_inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,417 @@
++/*
++ * fs/revoked_inode.c
++ *
++ * Copyright (C) 2007 Pekka Enberg
++ *
++ * Provide stub functions for revoked inodes. Based on fs/bad_inode.c which is
++ *
++ * Copyright (C) 1997 Stephen Tweedie
++ *
++ * This file is released under the GPLv2.
++ */
++
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/stat.h>
++#include <linux/time.h>
++#include <linux/smp_lock.h>
++#include <linux/namei.h>
++#include <linux/poll.h>
++#include <linux/revoked_fs_i.h>
++
++static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_read(struct file *filp, char __user * buf,
++ size_t size, loff_t * ppos)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_special_file_read(struct file *filp, char __user * buf,
++ size_t size, loff_t * ppos)
++{
++ return 0;
++}
++
++static ssize_t revoked_file_write(struct file *filp, const char __user * buf,
++ size_t siz, loff_t * ppos)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_aio_read(struct kiocb *iocb,
++ const struct iovec *iov,
++ unsigned long nr_segs, loff_t pos)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_aio_write(struct kiocb *iocb,
++ const struct iovec *iov,
++ unsigned long nr_segs, loff_t pos)
++{
++ return -EBADF;
++}
++
++static int revoked_file_readdir(struct file *filp, void *dirent,
++ filldir_t filldir)
++{
++ return -EBADF;
++}
++
++static unsigned int revoked_file_poll(struct file *filp, poll_table * wait)
++{
++ return POLLERR;
++}
++
++static int revoked_file_ioctl(struct inode *inode, struct file *filp,
++ unsigned int cmd, unsigned long arg)
++{
++ return -EBADF;
++}
++
++static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd,
++ unsigned long arg)
++{
++ return -EBADF;
++}
++
++static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ return -EBADF;
++}
++
++static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ return -EBADF;
++}
++
++static int revoked_file_open(struct inode *inode, struct file *filp)
++{
++ return -EBADF;
++}
++
++static int revoked_file_flush(struct file *file, fl_owner_t id)
++{
++ return filp_close(file, id);
++}
++
++static int revoked_file_release(struct inode *inode, struct file *filp)
++{
++ return -EBADF;
++}
++
++static int revoked_file_fsync(struct file *file, struct dentry *dentry,
++ int datasync)
++{
++ return -EBADF;
++}
++
++static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync)
++{
++ return -EBADF;
++}
++
++static int revoked_file_fasync(int fd, struct file *filp, int on)
++{
++ return -EBADF;
++}
++
++static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos,
++ size_t count, read_actor_t actor,
++ void *target)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_sendpage(struct file *file, struct page *page,
++ int off, size_t len, loff_t * pos,
++ int more)
++{
++ return -EBADF;
++}
++
++static unsigned long revoked_file_get_unmapped_area(struct file *file,
++ unsigned long addr,
++ unsigned long len,
++ unsigned long pgoff,
++ unsigned long flags)
++{
++ return -EBADF;
++}
++
++static int revoked_file_check_flags(int flags)
++{
++ return -EBADF;
++}
++
++static int revoked_file_dir_notify(struct file *file, unsigned long arg)
++{
++ return -EBADF;
++}
++
++static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe,
++ struct file *out, loff_t * ppos,
++ size_t len, unsigned int flags)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos,
++ struct pipe_inode_info *pipe,
++ size_t len, unsigned int flags)
++{
++ return -EBADF;
++}
++
++static const struct file_operations revoked_file_ops = {
++ .llseek = revoked_file_llseek,
++ .read = revoked_file_read,
++ .write = revoked_file_write,
++ .aio_read = revoked_file_aio_read,
++ .aio_write = revoked_file_aio_write,
++ .readdir = revoked_file_readdir,
++ .poll = revoked_file_poll,
++ .ioctl = revoked_file_ioctl,
++ .unlocked_ioctl = revoked_file_unlocked_ioctl,
++ .compat_ioctl = revoked_file_compat_ioctl,
++ .mmap = revoked_file_mmap,
++ .open = revoked_file_open,
++ .flush = revoked_file_flush,
++ .release = revoked_file_release,
++ .fsync = revoked_file_fsync,
++ .aio_fsync = revoked_file_aio_fsync,
++ .fasync = revoked_file_fasync,
++ .lock = revoked_file_lock,
++ .sendfile = revoked_file_sendfile,
++ .sendpage = revoked_file_sendpage,
++ .get_unmapped_area = revoked_file_get_unmapped_area,
++ .check_flags = revoked_file_check_flags,
++ .dir_notify = revoked_file_dir_notify,
++ .flock = revoked_file_flock,
++ .splice_write = revoked_file_splice_write,
++ .splice_read = revoked_file_splice_read,
++};
++
++static const struct file_operations revoked_special_file_ops = {
++ .llseek = revoked_file_llseek,
++ .read = revoked_special_file_read,
++ .write = revoked_file_write,
++ .aio_read = revoked_file_aio_read,
++ .aio_write = revoked_file_aio_write,
++ .readdir = revoked_file_readdir,
++ .poll = revoked_file_poll,
++ .ioctl = revoked_file_ioctl,
++ .unlocked_ioctl = revoked_file_unlocked_ioctl,
++ .compat_ioctl = revoked_file_compat_ioctl,
++ .mmap = revoked_file_mmap,
++ .open = revoked_file_open,
++ .flush = revoked_file_flush,
++ .release = revoked_file_release,
++ .fsync = revoked_file_fsync,
++ .aio_fsync = revoked_file_aio_fsync,
++ .fasync = revoked_file_fasync,
++ .lock = revoked_file_lock,
++ .sendfile = revoked_file_sendfile,
++ .sendpage = revoked_file_sendpage,
++ .get_unmapped_area = revoked_file_get_unmapped_area,
++ .check_flags = revoked_file_check_flags,
++ .dir_notify = revoked_file_dir_notify,
++ .flock = revoked_file_flock,
++ .splice_write = revoked_file_splice_write,
++ .splice_read = revoked_file_splice_read,
++};
++
++static int revoked_inode_create(struct inode *dir, struct dentry *dentry,
++ int mode, struct nameidata *nd)
++{
++ return -EBADF;
++}
++
++static struct dentry *revoked_inode_lookup(struct inode *dir,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ return ERR_PTR(-EBADF);
++}
++
++static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir,
++ struct dentry *dentry)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry,
++ const char *symname)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry,
++ int mode)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry,
++ int mode, dev_t rdev)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_rename(struct inode *old_dir,
++ struct dentry *old_dentry,
++ struct inode *new_dir,
++ struct dentry *new_dentry)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer,
++ int buflen)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_permission(struct inode *inode, int mask,
++ struct nameidata *nd)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_setxattr(struct dentry *dentry, const char *name,
++ const void *value, size_t size, int flags)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name,
++ void *buffer, size_t size)
++{
++ return -EBADF;
++}
++
++static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer,
++ size_t buffer_size)
++{
++ return -EBADF;
++}
++
++static int revoked_inode_removexattr(struct dentry *dentry, const char *name)
++{
++ return -EBADF;
++}
++
++static struct inode_operations revoked_inode_ops = {
++ .create = revoked_inode_create,
++ .lookup = revoked_inode_lookup,
++ .link = revoked_inode_link,
++ .unlink = revoked_inode_unlink,
++ .symlink = revoked_inode_symlink,
++ .mkdir = revoked_inode_mkdir,
++ .rmdir = revoked_inode_rmdir,
++ .mknod = revoked_inode_mknod,
++ .rename = revoked_inode_rename,
++ .readlink = revoked_inode_readlink,
++ /* follow_link must be no-op, otherwise unmounting this inode
++ won't work */
++ /* put_link returns void */
++ /* truncate returns void */
++ .permission = revoked_inode_permission,
++ .getattr = revoked_inode_getattr,
++ .setattr = revoked_inode_setattr,
++ .setxattr = revoked_inode_setxattr,
++ .getxattr = revoked_inode_getxattr,
++ .listxattr = revoked_inode_listxattr,
++ .removexattr = revoked_inode_removexattr,
++ /* truncate_range returns void */
++};
++
++static int revoked_readpage(struct file *file, struct page *page)
++{
++ return -EIO;
++}
++
++static int revoked_writepage(struct page *page, struct writeback_control *wbc)
++{
++ return -EIO;
++}
++
++static int revoked_prepare_write(struct file *file, struct page *page,
++ unsigned from, unsigned to)
++{
++ return -EIO;
++}
++
++static int revoked_commit_write(struct file *file, struct page *page,
++ unsigned from, unsigned to)
++{
++ return -EIO;
++}
++
++static ssize_t revoked_direct_IO(int rw, struct kiocb *iocb,
++ const struct iovec *iov, loff_t offset,
++ unsigned long nr_segs)
++{
++ return -EIO;
++}
++
++static const struct address_space_operations revoked_aops = {
++ .readpage = revoked_readpage,
++ .writepage = revoked_writepage,
++ .prepare_write = revoked_prepare_write,
++ .commit_write = revoked_commit_write,
++ .direct_IO = revoked_direct_IO,
++};
++
++void make_revoked_inode(struct inode *inode, int mode)
++{
++ remove_inode_hash(inode);
++
++ inode->i_mode = mode;
++ inode->i_atime = inode->i_mtime = inode->i_ctime =
++ current_fs_time(inode->i_sb);
++ inode->i_op = &revoked_inode_ops;
++
++ if (special_file(mode))
++ inode->i_fop = &revoked_special_file_ops;
++ else
++ inode->i_fop = &revoked_file_ops;
++
++ inode->i_mapping->a_ops = &revoked_aops;
++}
+diff -Nurb linux-2.6.22-570/fs/splice.c linux-2.6.22-591/fs/splice.c
+--- linux-2.6.22-570/fs/splice.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/fs/splice.c 2007-12-21 15:36:14.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/module.h>
+ #include <linux/syscalls.h>
+ #include <linux/uio.h>
++#include <linux/security.h>
+
+ struct partial_page {
+ unsigned int offset;
+@@ -932,6 +933,10 @@
+ if (unlikely(ret < 0))
+ return ret;
+
++ ret = security_file_permission(out, MAY_WRITE);
++ if (unlikely(ret < 0))
++ return ret;
++
+ return out->f_op->splice_write(pipe, out, ppos, len, flags);
+ }
+
+@@ -954,6 +959,10 @@
+ if (unlikely(ret < 0))
+ return ret;
+
++ ret = security_file_permission(in, MAY_READ);
++ if (unlikely(ret < 0))
++ return ret;
++
+ return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ }
+
+@@ -1272,6 +1281,7 @@
+ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
+ {
++ long err;
+ struct pipe_inode_info *pipe;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+@@ -1290,6 +1300,10 @@
+ else if (unlikely(!nr_segs))
+ return 0;
+
++ err = security_file_permission(file, MAY_WRITE);
++ if (unlikely(err < 0))
++ return err;
++
+ spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
+ flags & SPLICE_F_GIFT);
+ if (spd.nr_pages <= 0)
+diff -Nurb linux-2.6.22-570/fs/stack.c linux-2.6.22-591/fs/stack.c
+--- linux-2.6.22-570/fs/stack.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/stack.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,20 @@
++/*
++ * Copyright (c) 2006-2007 Erez Zadok
++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2006-2007 Stony Brook University
++ * Copyright (c) 2006-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/fs_stack.h>
+
+-/* does _NOT_ require i_mutex to be held.
++/*
++ * does _NOT_ require i_mutex to be held.
+ *
+ * This function cannot be inlined since i_size_{read,write} is rather
+ * heavy-weight on 32-bit systems
+@@ -14,7 +26,8 @@
+ }
+ EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
+
+-/* copy all attributes; get_nlinks is optional way to override the i_nlink
++/*
++ * copy all attributes; get_nlinks is optional way to override the i_nlink
+ * copying
+ */
+ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
+diff -Nurb linux-2.6.22-570/fs/sync.c linux-2.6.22-591/fs/sync.c
+--- linux-2.6.22-570/fs/sync.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sync.c 2007-12-21 15:36:14.000000000 -0500
+@@ -174,6 +174,9 @@
+ * already-instantiated disk blocks, there are no guarantees here that the data
+ * will be available after a crash.
+ */
++/* It would be nice if people remember that not all the world's an i386
++ when they introduce new system calls */
++
+ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+ unsigned int flags)
+ {
+Files linux-2.6.22-570/fs/sysfs/.symlink.c.swp and linux-2.6.22-591/fs/sysfs/.symlink.c.swp differ
+diff -Nurb linux-2.6.22-570/fs/sysfs/bin.c linux-2.6.22-591/fs/sysfs/bin.c
+--- linux-2.6.22-570/fs/sysfs/bin.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/bin.c 2007-12-22 02:12:47.000000000 -0500
+@@ -20,29 +20,41 @@
+
+ #include "sysfs.h"
+
++struct bin_buffer {
++ struct mutex mutex;
++ void *buffer;
++ int mmapped;
++};
++
+ static int
+ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
+ {
+- struct bin_attribute * attr = to_bin_attr(dentry);
+- struct kobject * kobj = to_kobj(dentry->d_parent);
++ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++ int rc;
++
++ /* need attr_sd for attr, its parent for kobj */
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
++
++ rc = -EIO;
++ if (attr->read)
++ rc = attr->read(kobj, attr, buffer, off, count);
+
+- if (!attr->read)
+- return -EIO;
++ sysfs_put_active_two(attr_sd);
+
+- return attr->read(kobj, buffer, off, count);
++ return rc;
+ }
+
+ static ssize_t
+-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
++read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
+ {
+- char *buffer = file->private_data;
++ struct bin_buffer *bb = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+ int size = dentry->d_inode->i_size;
+ loff_t offs = *off;
+- int ret;
+-
+- if (count > PAGE_SIZE)
+- count = PAGE_SIZE;
++ int count = min_t(size_t, bytes, PAGE_SIZE);
+
+ if (size) {
+ if (offs > size)
+@@ -51,43 +63,56 @@
+ count = size - offs;
+ }
+
+- ret = fill_read(dentry, buffer, offs, count);
+- if (ret < 0)
+- return ret;
+- count = ret;
++ mutex_lock(&bb->mutex);
+
+- if (copy_to_user(userbuf, buffer, count))
+- return -EFAULT;
++ count = fill_read(dentry, bb->buffer, offs, count);
++ if (count < 0)
++ goto out_unlock;
++
++ if (copy_to_user(userbuf, bb->buffer, count)) {
++ count = -EFAULT;
++ goto out_unlock;
++ }
+
+- pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
++ pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
+
+ *off = offs + count;
+
++ out_unlock:
++ mutex_unlock(&bb->mutex);
+ return count;
+ }
+
+ static int
+ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
+ {
+- struct bin_attribute *attr = to_bin_attr(dentry);
+- struct kobject *kobj = to_kobj(dentry->d_parent);
++ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++ int rc;
+
+- if (!attr->write)
+- return -EIO;
++ /* need attr_sd for attr, its parent for kobj */
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
+
+- return attr->write(kobj, buffer, offset, count);
++ rc = -EIO;
++ if (attr->write)
++ rc = attr->write(kobj, attr, buffer, offset, count);
++
++ sysfs_put_active_two(attr_sd);
++
++ return rc;
+ }
+
+-static ssize_t write(struct file * file, const char __user * userbuf,
+- size_t count, loff_t * off)
++static ssize_t write(struct file *file, const char __user *userbuf,
++ size_t bytes, loff_t *off)
+ {
+- char *buffer = file->private_data;
++ struct bin_buffer *bb = file->private_data;
+ struct dentry *dentry = file->f_path.dentry;
+ int size = dentry->d_inode->i_size;
+ loff_t offs = *off;
++ int count = min_t(size_t, bytes, PAGE_SIZE);
+
+- if (count > PAGE_SIZE)
+- count = PAGE_SIZE;
+ if (size) {
+ if (offs > size)
+ return 0;
+@@ -95,72 +120,100 @@
+ count = size - offs;
+ }
+
+- if (copy_from_user(buffer, userbuf, count))
+- return -EFAULT;
++ mutex_lock(&bb->mutex);
+
+- count = flush_write(dentry, buffer, offs, count);
++ if (copy_from_user(bb->buffer, userbuf, count)) {
++ count = -EFAULT;
++ goto out_unlock;
++ }
++
++ count = flush_write(dentry, bb->buffer, offs, count);
+ if (count > 0)
+ *off = offs + count;
++
++ out_unlock:
++ mutex_unlock(&bb->mutex);
+ return count;
+ }
+
+ static int mmap(struct file *file, struct vm_area_struct *vma)
+ {
+- struct dentry *dentry = file->f_path.dentry;
+- struct bin_attribute *attr = to_bin_attr(dentry);
+- struct kobject *kobj = to_kobj(dentry->d_parent);
++ struct bin_buffer *bb = file->private_data;
++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++ int rc;
++
++ mutex_lock(&bb->mutex);
++
++ /* need attr_sd for attr, its parent for kobj */
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
++
++ rc = -EINVAL;
++ if (attr->mmap)
++ rc = attr->mmap(kobj, attr, vma);
++
++ if (rc == 0 && !bb->mmapped)
++ bb->mmapped = 1;
++ else
++ sysfs_put_active_two(attr_sd);
+
+- if (!attr->mmap)
+- return -EINVAL;
++ mutex_unlock(&bb->mutex);
+
+- return attr->mmap(kobj, attr, vma);
++ return rc;
+ }
+
+ static int open(struct inode * inode, struct file * file)
+ {
+- struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+- struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+- int error = -EINVAL;
+-
+- if (!kobj || !attr)
+- goto Done;
+-
+- /* Grab the module reference for this attribute if we have one */
+- error = -ENODEV;
+- if (!try_module_get(attr->attr.owner))
+- goto Done;
++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++ struct bin_buffer *bb = NULL;
++ int error;
++
++ /* need attr_sd for attr */
++ if (!sysfs_get_active(attr_sd))
++ return -ENODEV;
+
+ error = -EACCES;
+ if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
+- goto Error;
++ goto err_out;
+ if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
+- goto Error;
++ goto err_out;
+
+ error = -ENOMEM;
+- file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+- if (!file->private_data)
+- goto Error;
+-
+- error = 0;
+- goto Done;
+-
+- Error:
+- module_put(attr->attr.owner);
+- Done:
+- if (error)
+- kobject_put(kobj);
++ bb = kzalloc(sizeof(*bb), GFP_KERNEL);
++ if (!bb)
++ goto err_out;
++
++ bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!bb->buffer)
++ goto err_out;
++
++ mutex_init(&bb->mutex);
++ file->private_data = bb;
++
++ /* open succeeded, put active reference and pin attr_sd */
++ sysfs_put_active(attr_sd);
++ sysfs_get(attr_sd);
++ return 0;
++
++ err_out:
++ sysfs_put_active(attr_sd);
++ kfree(bb);
+ return error;
+ }
+
+ static int release(struct inode * inode, struct file * file)
+ {
+- struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
+- struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+- u8 * buffer = file->private_data;
+-
+- kobject_put(kobj);
+- module_put(attr->attr.owner);
+- kfree(buffer);
++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++ struct bin_buffer *bb = file->private_data;
++
++ if (bb->mmapped)
++ sysfs_put_active_two(attr_sd);
++ sysfs_put(attr_sd);
++ kfree(bb->buffer);
++ kfree(bb);
+ return 0;
+ }
+
+@@ -181,9 +234,9 @@
+
+ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
+- BUG_ON(!kobj || !kobj->dentry || !attr);
++ BUG_ON(!kobj || !kobj->sd || !attr);
+
+- return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
++ return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+ }
+
+
+@@ -195,7 +248,7 @@
+
+ void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
+- if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
++ if (sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name) < 0) {
+ printk(KERN_ERR "%s: "
+ "bad dentry or inode or no such file: \"%s\"\n",
+ __FUNCTION__, attr->attr.name);
+diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c linux-2.6.22-591/fs/sysfs/dir.c
+--- linux-2.6.22-570/fs/sysfs/dir.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/dir.c 2007-12-23 01:58:30.000000000 -0500
+@@ -9,21 +9,442 @@
+ #include <linux/module.h>
+ #include <linux/kobject.h>
+ #include <linux/namei.h>
++#include <linux/idr.h>
++#include <linux/completion.h>
+ #include <asm/semaphore.h>
+ #include "sysfs.h"
+
+-DECLARE_RWSEM(sysfs_rename_sem);
+-spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED;
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd);
++
++DEFINE_MUTEX(sysfs_mutex);
++spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
++
++static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
++static DEFINE_IDA(sysfs_ino_ida);
++
++static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target)
++{
++ /* Find the shadow directory for the specified tag */
++ struct sysfs_dirent *sd;
++
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++ if (sd->s_name != target)
++ continue;
++ break;
++ }
++ return sd;
++}
++
++static const void *find_shadow_tag(struct kobject *kobj)
++{
++ /* Find the tag the current kobj is cached with */
++ return kobj->sd->s_parent->s_name;
++}
++
++/**
++ * sysfs_link_sibling - link sysfs_dirent into sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Link @sd into its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sysfs_mutex)
++ */
++
++/**
++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Unlink @sd from its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sysfs_mutex)
++ */
++
++void sysfs_link_sibling(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent *parent_sd = sd->s_parent;
++
++ BUG_ON(sd->s_sibling);
++ sd->s_sibling = parent_sd->s_children;
++ parent_sd->s_children = sd;
++}
++/**
++ * sysfs_get_dentry - get dentry for the given sysfs_dirent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get dentry for @sd. Dentry is looked up if currently not
++ * present. This function climbs sysfs_dirent tree till it
++ * reaches a sysfs_dirent with valid dentry attached and descends
++ * down from there looking up dentry for each step.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep)
++ *
++ * RETURNS:
++ * Pointer to found dentry on success, ERR_PTR() value on error.
++ */
++struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent *cur;
++ struct dentry *parent_dentry, *dentry;
++ int i, depth;
++
++ /* Find the first parent which has valid s_dentry and get the
++ * dentry.
++ */
++ mutex_lock(&sysfs_mutex);
++ restart0:
++ spin_lock(&sysfs_assoc_lock);
++ restart1:
++ spin_lock(&dcache_lock);
++
++ dentry = NULL;
++ depth = 0;
++ cur = sd;
++ while (!cur->s_dentry || !cur->s_dentry->d_inode) {
++ if (cur->s_flags & SYSFS_FLAG_REMOVED) {
++ dentry = ERR_PTR(-ENOENT);
++ depth = 0;
++ break;
++ }
++ cur = cur->s_parent;
++ depth++;
++ }
++ if (!IS_ERR(dentry))
++ dentry = dget_locked(cur->s_dentry);
++
++ spin_unlock(&dcache_lock);
++ spin_unlock(&sysfs_assoc_lock);
++
++ /* from the found dentry, look up depth times */
++ while (depth--) {
++ /* find and get depth'th ancestor */
++ for (cur = sd, i = 0; cur && i < depth; i++)
++ cur = cur->s_parent;
++
++ /* This can happen if tree structure was modified due
++ * to move/rename. Restart.
++ */
++ if (i != depth) {
++ dput(dentry);
++ goto restart0;
++ }
++
++ sysfs_get(cur);
++
++ mutex_unlock(&sysfs_mutex);
++
++ /* look it up */
++ parent_dentry = dentry;
++ dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
++ strlen(cur->s_name));
++ dput(parent_dentry);
++
++ if (IS_ERR(dentry)) {
++ sysfs_put(cur);
++ return dentry;
++ }
++
++ mutex_lock(&sysfs_mutex);
++ spin_lock(&sysfs_assoc_lock);
++
++ /* This, again, can happen if tree structure has
++ * changed and we looked up the wrong thing. Restart.
++ */
++ if (cur->s_dentry != dentry) {
++ dput(dentry);
++ sysfs_put(cur);
++ goto restart1;
++ }
++
++ spin_unlock(&sysfs_assoc_lock);
++
++ sysfs_put(cur);
++ }
++
++ mutex_unlock(&sysfs_mutex);
++ return dentry;
++}
++
++/**
++ * sysfs_link_sibling - link sysfs_dirent into sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Link @sd into its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex)
++ */
++
++/**
++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Unlink @sd from its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex)
++ */
++void sysfs_unlink_sibling(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent **pos;
++
++ for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
++ if (*pos == sd) {
++ *pos = sd->s_sibling;
++ sd->s_sibling = NULL;
++ break;
++ }
++ }
++}
++
++/**
++ * sysfs_get_dentry - get dentry for the given sysfs_dirent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get dentry for @sd. Dentry is looked up if currently not
++ * present. This function climbs sysfs_dirent tree till it
++ * reaches a sysfs_dirent with valid dentry attached and descends
++ * down from there looking up dentry for each step.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep)
++ *
++ * RETURNS:
++ * Pointer to found dentry on success, ERR_PTR() value on error.
++ */
++
++/**
++ * sysfs_get_active - get an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to get an active reference to
++ *
++ * Get an active reference of @sd. This function is noop if @sd
++ * is NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++/**
++ * sysfs_put_active - put an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to put an active reference to
++ *
++ * Put an active reference to @sd. This function is noop if @sd
++ * is NULL.
++ */
++void sysfs_put_active(struct sysfs_dirent *sd)
++{
++ struct completion *cmpl;
++ int v;
++
++ if (unlikely(!sd))
++ return;
++
++ v = atomic_dec_return(&sd->s_active);
++ if (likely(v != SD_DEACTIVATED_BIAS))
++ return;
++
++ /* atomic_dec_return() is a mb(), we'll always see the updated
++ * sd->s_sibling.
++ */
++ cmpl = (void *)sd->s_sibling;
++ complete(cmpl);
++}
++
++/**
++ * sysfs_get_active_two - get active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get active reference to @sd and its parent. Parent's active
++ * reference is grabbed first. This function is noop if @sd is
++ * NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
++{
++ if (sd) {
++ if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
++ return NULL;
++ if (unlikely(!sysfs_get_active(sd))) {
++ sysfs_put_active(sd->s_parent);
++ return NULL;
++ }
++ }
++ return sd;
++}
++
++/**
++ * sysfs_put_active_two - put active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Put active references to @sd and its parent. This function is
++ * noop if @sd is NULL.
++ */
++void sysfs_put_active_two(struct sysfs_dirent *sd)
++{
++ if (sd) {
++ sysfs_put_active(sd);
++ sysfs_put_active(sd->s_parent);
++ }
++}
++
++/**
++ * sysfs_deactivate - deactivate sysfs_dirent
++ * @sd: sysfs_dirent to deactivate
++ *
++ * Deny new active references and drain existing ones.
++ */
++static void sysfs_deactivate(struct sysfs_dirent *sd)
++{
++ DECLARE_COMPLETION_ONSTACK(wait);
++ int v;
++
++ BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
++ sd->s_sibling = (void *)&wait;
++
++ /* atomic_add_return() is a mb(), put_active() will always see
++ * the updated sd->s_sibling.
++ */
++ v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
++
++ if (v != SD_DEACTIVATED_BIAS)
++ wait_for_completion(&wait);
++
++ sd->s_sibling = NULL;
++}
++
++/**
++ * sysfs_get_active - get an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to get an active reference to
++ *
++ * Get an active reference of @sd. This function is noop if @sd
++ * is NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
++{
++ if (unlikely(!sd))
++ return NULL;
++
++ while (1) {
++ int v, t;
++
++ v = atomic_read(&sd->s_active);
++ if (unlikely(v < 0))
++ return NULL;
++
++ t = atomic_cmpxchg(&sd->s_active, v, v + 1);
++ if (likely(t == v))
++ return sd;
++ if (t < 0)
++ return NULL;
++
++ cpu_relax();
++ }
++}
++/**
++ * sysfs_put_active - put an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to put an active reference to
++ *
++ * Put an active reference to @sd. This function is noop if @sd
++ * is NULL.
++ */
++
++/**
++ * sysfs_get_active_two - get active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get active reference to @sd and its parent. Parent's active
++ * reference is grabbed first. This function is noop if @sd is
++ * NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++
++/**
++ * sysfs_put_active_two - put active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Put active references to @sd and its parent. This function is
++ * noop if @sd is NULL.
++ */
++
++/**
++ * sysfs_deactivate - deactivate sysfs_dirent
++ * @sd: sysfs_dirent to deactivate
++ *
++ * Deny new active references and drain existing ones. s_active
++ * will be unlocked when the sysfs_dirent is released.
++ */
++
++static int sysfs_alloc_ino(ino_t *pino)
++{
++ int ino, rc;
++
++ retry:
++ spin_lock(&sysfs_ino_lock);
++ rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
++ spin_unlock(&sysfs_ino_lock);
++
++ if (rc == -EAGAIN) {
++ if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
++ goto retry;
++ rc = -ENOMEM;
++ }
++
++ *pino = ino;
++ return rc;
++}
++
++static void sysfs_free_ino(ino_t ino)
++{
++ spin_lock(&sysfs_ino_lock);
++ ida_remove(&sysfs_ino_ida, ino);
++ spin_unlock(&sysfs_ino_lock);
++}
++
++void release_sysfs_dirent(struct sysfs_dirent * sd)
++{
++ struct sysfs_dirent *parent_sd;
++
++ repeat:
++ /* Moving/renaming is always done while holding reference.
++ * sd->s_parent won't change beneath us.
++ */
++ parent_sd = sd->s_parent;
++
++ if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
++ sysfs_put(sd->s_elem.symlink.target_sd);
++ if (sysfs_type(sd) & SYSFS_COPY_NAME)
++ kfree(sd->s_name);
++ kfree(sd->s_iattr);
++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
++ sysfs_free_ino(sd->s_ino);
++ kmem_cache_free(sysfs_dir_cachep, sd);
++
++ sd = parent_sd;
++ if (sd && atomic_dec_and_test(&sd->s_count))
++ goto repeat;
++}
+
+ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
+ {
+ struct sysfs_dirent * sd = dentry->d_fsdata;
+
+ if (sd) {
+- /* sd->s_dentry is protected with sysfs_lock. This
+- * allows sysfs_drop_dentry() to dereference it.
++ /* sd->s_dentry is protected with sysfs_assoc_lock.
++ * This allows sysfs_drop_dentry() to dereference it.
+ */
+- spin_lock(&sysfs_lock);
++ spin_lock(&sysfs_assoc_lock);
+
+ /* The dentry might have been deleted or another
+ * lookup could have happened updating sd->s_dentry to
+@@ -32,7 +453,7 @@
+ */
+ if (sd->s_dentry == dentry)
+ sd->s_dentry = NULL;
+- spin_unlock(&sysfs_lock);
++ spin_unlock(&sysfs_assoc_lock);
+ sysfs_put(sd);
+ }
+ iput(inode);
+@@ -42,344 +463,594 @@
+ .d_iput = sysfs_d_iput,
+ };
+
+-static unsigned int sysfs_inode_counter;
+-ino_t sysfs_get_inum(void)
++struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
+ {
+- if (unlikely(sysfs_inode_counter < 3))
+- sysfs_inode_counter = 3;
+- return sysfs_inode_counter++;
+-}
++ char *dup_name = NULL;
++ struct sysfs_dirent *sd = NULL;
+
+-/*
+- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
+- */
+-static struct sysfs_dirent * __sysfs_new_dirent(void * element)
+-{
+- struct sysfs_dirent * sd;
++ if (type & SYSFS_COPY_NAME) {
++ name = dup_name = kstrdup(name, GFP_KERNEL);
++ if (!name)
++ goto err_out;
++ }
+
+ sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
+ if (!sd)
+- return NULL;
++ goto err_out;
++
++ if (sysfs_alloc_ino(&sd->s_ino))
++ goto err_out;
+
+- sd->s_ino = sysfs_get_inum();
+ atomic_set(&sd->s_count, 1);
++ atomic_set(&sd->s_active, 0);
+ atomic_set(&sd->s_event, 1);
+- INIT_LIST_HEAD(&sd->s_children);
+- INIT_LIST_HEAD(&sd->s_sibling);
+- sd->s_element = element;
++
++ sd->s_name = name;
++ sd->s_mode = mode;
++ sd->s_flags = type;
+
+ return sd;
++
++ err_out:
++ kfree(dup_name);
++ kmem_cache_free(sysfs_dir_cachep, sd);
++ return NULL;
+ }
+
+-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
+- struct sysfs_dirent *sd)
++/**
++ * sysfs_attach_dentry - associate sysfs_dirent with dentry
++ * @sd: target sysfs_dirent
++ * @dentry: dentry to associate
++ *
++ * Associate @sd with @dentry. This is protected by
++ * sysfs_assoc_lock to avoid race with sysfs_d_iput().
++ *
++ * LOCKING:
++ * mutex_lock(sysfs_mutex)
++ */
++static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
+ {
+- if (sd)
+- list_add(&sd->s_sibling, &parent_sd->s_children);
++ dentry->d_op = &sysfs_dentry_ops;
++ dentry->d_fsdata = sysfs_get(sd);
++
++ /* protect sd->s_dentry against sysfs_d_iput */
++ spin_lock(&sysfs_assoc_lock);
++ sd->s_dentry = dentry;
++ spin_unlock(&sysfs_assoc_lock);
++
++ if (dentry->d_flags & DCACHE_UNHASHED)
++ d_rehash(dentry);
+ }
+
+-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
+- void * element)
++static int sysfs_ilookup_test(struct inode *inode, void *arg)
+ {
+- struct sysfs_dirent *sd;
+- sd = __sysfs_new_dirent(element);
+- __sysfs_list_dirent(parent_sd, sd);
+- return sd;
++ struct sysfs_dirent *sd = arg;
++ return inode->i_ino == sd->s_ino;
+ }
+
+-/*
++/**
++ * sysfs_addrm_start - prepare for sysfs_dirent add/remove
++ * @acxt: pointer to sysfs_addrm_cxt to be used
++ * @parent_sd: parent sysfs_dirent
+ *
+- * Return -EEXIST if there is already a sysfs element with the same name for
+- * the same parent.
++ * This function is called when the caller is about to add or
++ * remove sysfs_dirent under @parent_sd. This function acquires
++ * sysfs_mutex, grabs inode for @parent_sd if available and lock
++ * i_mutex of it. @acxt is used to keep and pass context to
++ * other addrm functions.
+ *
+- * called with parent inode's i_mutex held
++ * LOCKING:
++ * Kernel thread context (may sleep). sysfs_mutex is locked on
++ * return. i_mutex of parent inode is locked on return if
++ * available.
+ */
+-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
+- const unsigned char *new)
++void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *parent_sd)
+ {
+- struct sysfs_dirent * sd;
++ struct inode *inode;
+
+- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+- if (sd->s_element) {
+- const unsigned char *existing = sysfs_get_name(sd);
+- if (strcmp(existing, new))
+- continue;
+- else
+- return -EEXIST;
+- }
++ memset(acxt, 0, sizeof(*acxt));
++ acxt->parent_sd = parent_sd;
++
++ /* Lookup parent inode. inode initialization and I_NEW
++ * clearing are protected by sysfs_mutex. By grabbing it and
++ * looking up with _nowait variant, inode state can be
++ * determined reliably.
++ */
++ mutex_lock(&sysfs_mutex);
++
++ inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
++ parent_sd);
++
++ if (inode && !(inode->i_state & I_NEW)) {
++ /* parent inode available */
++ acxt->parent_inode = inode;
++
++ /* sysfs_mutex is below i_mutex in lock hierarchy.
++ * First, trylock i_mutex. If fails, unlock
++ * sysfs_mutex and lock them in order.
++ */
++ if (!mutex_trylock(&inode->i_mutex)) {
++ mutex_unlock(&sysfs_mutex);
++ mutex_lock(&inode->i_mutex);
++ mutex_lock(&sysfs_mutex);
+ }
++ } else
++ iput(inode);
++}
+
+- return 0;
++/**
++ * sysfs_add_one - add sysfs_dirent to parent
++ * @acxt: addrm context to use
++ * @sd: sysfs_dirent to be added
++ *
++ * Get @acxt->parent_sd and set sd->s_parent to it and increment
++ * nlink of parent inode if @sd is a directory. @sd is NOT
++ * linked into the children list of the parent. The caller
++ * should invoke sysfs_link_sibling() after this function
++ * completes if @sd needs to be on the children list.
++ *
++ * This function should be called between calls to
++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ * passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ * LOCKING:
++ * Determined by sysfs_addrm_start().
++ */
++void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++ sd->s_parent = sysfs_get(acxt->parent_sd);
++
++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++ inc_nlink(acxt->parent_inode);
++
++ acxt->cnt++;
+ }
+
++/**
++ * sysfs_remove_one - remove sysfs_dirent from parent
++ * @acxt: addrm context to use
++ * @sd: sysfs_dirent to be added
++ *
++ * Mark @sd removed and drop nlink of parent inode if @sd is a
++ * directory. @sd is NOT unlinked from the children list of the
++ * parent. The caller is repsonsible for removing @sd from the
++ * children list before calling this function.
++ *
++ * This function should be called between calls to
++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ * passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ * LOCKING:
++ * Determined by sysfs_addrm_start().
++ */
++void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++ BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
++
++ sd->s_flags |= SYSFS_FLAG_REMOVED;
++ sd->s_sibling = acxt->removed;
++ acxt->removed = sd;
++
++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++ drop_nlink(acxt->parent_inode);
++
++ acxt->cnt++;
++}
+
+-static struct sysfs_dirent *
+-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
++/**
++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
++ * @sd: target sysfs_dirent
++ *
++ * Drop dentry for @sd. @sd must have been unlinked from its
++ * parent on entry to this function such that it can't be looked
++ * up anymore.
++ *
++ * @sd->s_dentry which is protected with sysfs_assoc_lock points
++ * to the currently associated dentry but we're not holding a
++ * reference to it and racing with dput(). Grab dcache_lock and
++ * verify dentry before dropping it. If @sd->s_dentry is NULL or
++ * dput() beats us, no need to bother.
++ */
++static void sysfs_drop_dentry(struct sysfs_dirent *sd)
+ {
+- struct sysfs_dirent * sd;
++ struct dentry *dentry = NULL;
++ struct inode *inode;
+
+- sd = __sysfs_new_dirent(element);
+- if (!sd)
+- goto out;
++ /* We're not holding a reference to ->s_dentry dentry but the
++ * field will stay valid as long as sysfs_assoc_lock is held.
++ */
++ spin_lock(&sysfs_assoc_lock);
++ spin_lock(&dcache_lock);
+
+- sd->s_mode = mode;
+- sd->s_type = type;
+- sd->s_dentry = dentry;
+- if (dentry) {
+- dentry->d_fsdata = sysfs_get(sd);
+- dentry->d_op = &sysfs_dentry_ops;
+- }
++ /* drop dentry if it's there and dput() didn't kill it yet */
++ if (sd->s_dentry && sd->s_dentry->d_inode) {
++ dentry = dget_locked(sd->s_dentry);
++ spin_lock(&dentry->d_lock);
++ __d_drop(dentry);
++ spin_unlock(&dentry->d_lock);
++ }
++
++ spin_unlock(&dcache_lock);
++ spin_unlock(&sysfs_assoc_lock);
++
++ /* dentries for shadowed directories are pinned, unpin */
++ if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) ||
++ (sd->s_flags & SYSFS_FLAG_SHADOWED))
++ dput(dentry);
++ dput(dentry);
+
+-out:
+- return sd;
++ /* adjust nlink and update timestamp */
++ inode = ilookup(sysfs_sb, sd->s_ino);
++ if (inode) {
++ mutex_lock(&inode->i_mutex);
++
++ inode->i_ctime = CURRENT_TIME;
++ drop_nlink(inode);
++ if (sysfs_type(sd) == SYSFS_DIR)
++ drop_nlink(inode);
++
++ mutex_unlock(&inode->i_mutex);
++ iput(inode);
++ }
+ }
+
+-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
+- void * element, umode_t mode, int type)
++/**
++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
++ * @sd: target sysfs_dirent
++ *
++ * Drop dentry for @sd. @sd must have been unlinked from its
++ * parent on entry to this function such that it can't be looked
++ * up anymore.
++ *
++ * @sd->s_dentry which is protected with sysfs_assoc_lock points
++ * to the currently associated dentry but we're not holding a
++ * reference to it and racing with dput(). Grab dcache_lock and
++ * verify dentry before dropping it. If @sd->s_dentry is NULL or
++ * dput() beats us, no need to bother.
++ */
++
++
++/**
++ * sysfs_addrm_finish - finish up sysfs_dirent add/remove
++ * @acxt: addrm context to finish up
++ *
++ * Finish up sysfs_dirent add/remove. Resources acquired by
++ * sysfs_addrm_start() are released and removed sysfs_dirents are
++ * cleaned up. Timestamps on the parent inode are updated.
++ *
++ * LOCKING:
++ * All mutexes acquired by sysfs_addrm_start() are released.
++ *
++ * RETURNS:
++ * Number of added/removed sysfs_dirents since sysfs_addrm_start().
++ */
++int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
+ {
+- struct sysfs_dirent *sd;
++ /* release resources acquired by sysfs_addrm_start() */
++ mutex_unlock(&sysfs_mutex);
++ if (acxt->parent_inode) {
++ struct inode *inode = acxt->parent_inode;
++
++ /* if added/removed, update timestamps on the parent */
++ if (acxt->cnt)
++ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+- sd = __sysfs_make_dirent(dentry, element, mode, type);
+- __sysfs_list_dirent(parent_sd, sd);
++ mutex_unlock(&inode->i_mutex);
++ iput(inode);
++ }
+
+- return sd ? 0 : -ENOMEM;
++ /* kill removed sysfs_dirents */
++ while (acxt->removed) {
++ struct sysfs_dirent *sd = acxt->removed;
++
++ acxt->removed = sd->s_sibling;
++ sd->s_sibling = NULL;
++
++ sysfs_prune_shadow_sd(sd->s_parent);
++ sysfs_drop_dentry(sd);
++ sysfs_deactivate(sd);
++ sysfs_put(sd);
++ }
++
++ return acxt->cnt;
+ }
+
+-static int init_dir(struct inode * inode)
++/**
++ * sysfs_find_dirent - find sysfs_dirent with the given name
++ * @parent_sd: sysfs_dirent to search under
++ * @name: name to look for
++ *
++ * Look for sysfs_dirent with name @name under @parent_sd.
++ *
++ * LOCKING:
++ * mutex_lock(sysfs_mutex)
++ *
++ * RETURNS:
++ * Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name)
+ {
+- inode->i_op = &sysfs_dir_inode_operations;
+- inode->i_fop = &sysfs_dir_operations;
++ struct sysfs_dirent *sd;
+
+- /* directory inodes start off with i_nlink == 2 (for "." entry) */
+- inc_nlink(inode);
+- return 0;
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
++ if (sysfs_type(sd) && !strcmp(sd->s_name, name))
++ return sd;
++ return NULL;
+ }
+
+-static int init_file(struct inode * inode)
++/**
++ * sysfs_get_dirent - find and get sysfs_dirent with the given name
++ * @parent_sd: sysfs_dirent to search under
++ * @name: name to look for
++ *
++ * Look for sysfs_dirent with name @name under @parent_sd and get
++ * it if found.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep). Grabs sysfs_mutex.
++ *
++ * RETURNS:
++ * Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name)
+ {
+- inode->i_size = PAGE_SIZE;
+- inode->i_fop = &sysfs_file_operations;
+- return 0;
+-}
++ struct sysfs_dirent *sd;
+
+-static int init_symlink(struct inode * inode)
+-{
+- inode->i_op = &sysfs_symlink_inode_operations;
+- return 0;
++ mutex_lock(&sysfs_mutex);
++ sd = sysfs_find_dirent(parent_sd, name);
++ sysfs_get(sd);
++ mutex_unlock(&sysfs_mutex);
++
++ return sd;
+ }
+
+-static int create_dir(struct kobject * k, struct dentry * p,
+- const char * n, struct dentry ** d)
++static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
++ const char *name, struct sysfs_dirent **p_sd)
+ {
+- int error;
+ umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent *sd;
++ int err;
+
+- mutex_lock(&p->d_inode->i_mutex);
+- *d = lookup_one_len(n, p, strlen(n));
+- if (!IS_ERR(*d)) {
+- if (sysfs_dirent_exist(p->d_fsdata, n))
+- error = -EEXIST;
+- else
+- error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
+- SYSFS_DIR);
+- if (!error) {
+- error = sysfs_create(*d, mode, init_dir);
+- if (!error) {
+- inc_nlink(p->d_inode);
+- (*d)->d_op = &sysfs_dentry_ops;
+- d_rehash(*d);
+- }
++ /* allocate */
++ sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
++ if (!sd)
++ return -ENOMEM;
++ sd->s_elem.dir.kobj = kobj;
++
++ /* link in */
++ sysfs_addrm_start(&acxt, parent_sd);
++ err = -ENOENT;
++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++ goto addrm_finish;
++
++ err = -EEXIST;
++ if (!sysfs_find_dirent(acxt.parent_sd, name)) {
++ sysfs_add_one(&acxt, sd);
++ sysfs_link_sibling(sd);
++ err = 0;
++ }
++addrm_finish:
++ if (sysfs_addrm_finish(&acxt)) {
++ *p_sd = sd;
++ return 0;
+ }
+- if (error && (error != -EEXIST)) {
+- struct sysfs_dirent *sd = (*d)->d_fsdata;
+- if (sd) {
+- list_del_init(&sd->s_sibling);
++
+ sysfs_put(sd);
+- }
+- d_drop(*d);
+- }
+- dput(*d);
+- } else
+- error = PTR_ERR(*d);
+- mutex_unlock(&p->d_inode->i_mutex);
+- return error;
++ return err;
+ }
+
+-
+-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
++int sysfs_create_subdir(struct kobject *kobj, const char *name,
++ struct sysfs_dirent **p_sd)
+ {
+- return create_dir(k,k->dentry,n,d);
++ return create_dir(kobj, kobj->sd, name, p_sd);
+ }
+
+ /**
+ * sysfs_create_dir - create a directory for an object.
+ * @kobj: object we're creating directory for.
+- * @shadow_parent: parent parent object.
+ */
+-
+-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
++int sysfs_create_dir(struct kobject * kobj)
+ {
+- struct dentry * dentry = NULL;
+- struct dentry * parent;
++ struct sysfs_dirent *parent_sd, *sd;
+ int error = 0;
+
+ BUG_ON(!kobj);
+
+- if (shadow_parent)
+- parent = shadow_parent;
+- else if (kobj->parent)
+- parent = kobj->parent->dentry;
++ if (kobj->parent)
++ parent_sd = kobj->parent->sd;
+ else if (sysfs_mount && sysfs_mount->mnt_sb)
+- parent = sysfs_mount->mnt_sb->s_root;
++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
+ else
+ return -EFAULT;
+
+- error = create_dir(kobj,parent,kobject_name(kobj),&dentry);
++ error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
+ if (!error)
+- kobj->dentry = dentry;
++ kobj->sd = sd;
+ return error;
+ }
+
+-/* attaches attribute's sysfs_dirent to the dentry corresponding to the
+- * attribute file
+- */
+-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
++static int sysfs_count_nlink(struct sysfs_dirent *sd)
+ {
+- struct attribute * attr = NULL;
+- struct bin_attribute * bin_attr = NULL;
+- int (* init) (struct inode *) = NULL;
+- int error = 0;
++ struct sysfs_dirent *child;
++ int nr = 0;
+
+- if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
+- bin_attr = sd->s_element;
+- attr = &bin_attr->attr;
+- } else {
+- attr = sd->s_element;
+- init = init_file;
+- }
++ for (child = sd->s_children; child; child = child->s_sibling)
++ if (sysfs_type(child) == SYSFS_DIR)
++ nr++;
++ return nr + 2;
++}
+
+- dentry->d_fsdata = sysfs_get(sd);
+- /* protect sd->s_dentry against sysfs_d_iput */
+- spin_lock(&sysfs_lock);
+- sd->s_dentry = dentry;
+- spin_unlock(&sysfs_lock);
+- error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
+- if (error) {
+- sysfs_put(sd);
+- return error;
+- }
++static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
++ struct sysfs_dirent * sd;
++ struct bin_attribute *bin_attr;
++ struct inode *inode;
++ int found = 0;
+
+- if (bin_attr) {
+- dentry->d_inode->i_size = bin_attr->size;
+- dentry->d_inode->i_fop = &bin_fops;
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++ if (sysfs_type(sd) &&
++ !strcmp(sd->s_name, dentry->d_name.name)) {
++ found = 1;
++ break;
++ }
+ }
+- dentry->d_op = &sysfs_dentry_ops;
+- d_rehash(dentry);
+
+- return 0;
+-}
++ /* no such entry */
++ if (!found)
++ return NULL;
+
+-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
+-{
+- int err = 0;
++ /* attach dentry and inode */
++ inode = sysfs_get_inode(sd);
++ if (!inode)
++ return ERR_PTR(-ENOMEM);
++
++ mutex_lock(&sysfs_mutex);
++
++ if (inode->i_state & I_NEW) {
++ /* initialize inode according to type */
++ switch (sysfs_type(sd)) {
++ case SYSFS_DIR:
++ inode->i_op = &sysfs_dir_inode_operations;
++ inode->i_fop = &sysfs_dir_operations;
++ inode->i_nlink = sysfs_count_nlink(sd);
++ break;
++ case SYSFS_KOBJ_ATTR:
++ inode->i_size = PAGE_SIZE;
++ inode->i_fop = &sysfs_file_operations;
++ break;
++ case SYSFS_KOBJ_BIN_ATTR:
++ bin_attr = sd->s_elem.bin_attr.bin_attr;
++ inode->i_size = bin_attr->size;
++ inode->i_fop = &bin_fops;
++ break;
++ case SYSFS_KOBJ_LINK:
++ inode->i_op = &sysfs_symlink_inode_operations;
++ break;
++ default:
++ BUG();
++ }
++ }
+
+- dentry->d_fsdata = sysfs_get(sd);
+- /* protect sd->s_dentry against sysfs_d_iput */
+- spin_lock(&sysfs_lock);
+- sd->s_dentry = dentry;
+- spin_unlock(&sysfs_lock);
+- err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
+- if (!err) {
+- dentry->d_op = &sysfs_dentry_ops;
+- d_rehash(dentry);
+- } else
+- sysfs_put(sd);
++ sysfs_instantiate(dentry, inode);
++ sysfs_attach_dentry(sd, dentry);
+
+- return err;
++ mutex_unlock(&sysfs_mutex);
++
++ return NULL;
+ }
+
+-static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
+- struct nameidata *nd)
++static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+- struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
+- struct sysfs_dirent * sd;
+- int err = 0;
++ struct sysfs_dirent *sd;
++ struct dentry *dest;
+
+- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+- if (sd->s_type & SYSFS_NOT_PINNED) {
+- const unsigned char * name = sysfs_get_name(sd);
++ sd = dentry->d_fsdata;
++ dest = NULL;
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const struct shadow_dir_operations *shadow_ops;
++ const void *tag;
+
+- if (strcmp(name, dentry->d_name.name))
+- continue;
++ mutex_lock(&sysfs_mutex);
+
+- if (sd->s_type & SYSFS_KOBJ_LINK)
+- err = sysfs_attach_link(sd, dentry);
+- else
+- err = sysfs_attach_attr(sd, dentry);
+- break;
+- }
++ shadow_ops = dentry->d_inode->i_private;
++ tag = shadow_ops->current_tag();
++
++ sd = find_shadow_sd(sd, tag);
++ if (sd)
++ dest = sd->s_dentry;
++ dget(dest);
++
++ mutex_unlock(&sysfs_mutex);
+ }
++ if (!dest)
++ dest = dget(dentry);
++ dput(nd->dentry);
++ nd->dentry = dest;
+
+- return ERR_PTR(err);
++ return NULL;
+ }
+
++
+ const struct inode_operations sysfs_dir_inode_operations = {
+ .lookup = sysfs_lookup,
+ .setattr = sysfs_setattr,
++ .follow_link = sysfs_shadow_follow_link,
+ };
+
+-static void remove_dir(struct dentry * d)
++static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+ {
+- struct dentry * parent = dget(d->d_parent);
+- struct sysfs_dirent * sd;
++ sysfs_unlink_sibling(sd);
++ sysfs_remove_one(acxt, sd);
++}
+
+- mutex_lock(&parent->d_inode->i_mutex);
+- d_delete(d);
+- sd = d->d_fsdata;
+- list_del_init(&sd->s_sibling);
+- sysfs_put(sd);
+- if (d->d_inode)
+- simple_rmdir(parent->d_inode,d);
++static void remove_dir(struct sysfs_dirent *sd)
++{
++ struct sysfs_addrm_cxt acxt;
+
+- pr_debug(" o %s removing done (%d)\n",d->d_name.name,
+- atomic_read(&d->d_count));
++ sysfs_addrm_start(&acxt, sd->s_parent);
++ __remove_dir(&acxt, sd);
++ sysfs_addrm_finish(&acxt);
++}
+
+- mutex_unlock(&parent->d_inode->i_mutex);
+- dput(parent);
++void sysfs_remove_subdir(struct sysfs_dirent *sd)
++{
++ remove_dir(sd);
+ }
+
+-void sysfs_remove_subdir(struct dentry * d)
++static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *dir_sd)
+ {
+- remove_dir(d);
++ struct sysfs_dirent **pos;
++
++ pos = &dir_sd->s_children;
++ while (*pos) {
++ struct sysfs_dirent *sd = *pos;
++
++ if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
++ *pos = sd->s_sibling;
++ sd->s_sibling = NULL;
++ sysfs_remove_one(acxt, sd);
++ } else
++ pos = &(*pos)->s_sibling;
++ }
+ }
+
++static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt,
++ struct sysfs_dirent *dir_sd)
++{
++ struct sysfs_dirent **pos;
++
++ pos = &dir_sd->s_children;
++ while (*pos) {
++ struct sysfs_dirent *sd = *pos;
++
++ sysfs_empty_dir(acxt, sd);
++ __remove_dir(acxt, sd);
++ }
++}
+
+-static void __sysfs_remove_dir(struct dentry *dentry)
++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
+ {
+- struct sysfs_dirent * parent_sd;
+- struct sysfs_dirent * sd, * tmp;
++ struct sysfs_addrm_cxt acxt;
+
+- dget(dentry);
+- if (!dentry)
++ if (!dir_sd)
+ return;
+
+- pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
+- mutex_lock(&dentry->d_inode->i_mutex);
+- parent_sd = dentry->d_fsdata;
+- list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
+- if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
+- continue;
+- list_del_init(&sd->s_sibling);
+- sysfs_drop_dentry(sd, dentry);
+- sysfs_put(sd);
+- }
+- mutex_unlock(&dentry->d_inode->i_mutex);
++ pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
++ sysfs_addrm_start(&acxt, dir_sd);
++ if (sysfs_type(dir_sd) == SYSFS_DIR)
++ sysfs_empty_dir(&acxt, dir_sd);
++ else
++ sysfs_remove_shadows(&acxt, dir_sd);
++ sysfs_addrm_finish(&acxt);
+
+- remove_dir(dentry);
+- /**
+- * Drop reference from dget() on entrance.
+- */
+- dput(dentry);
++ remove_dir(dir_sd);
+ }
+
+ /**
+@@ -393,102 +1064,154 @@
+
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+- __sysfs_remove_dir(kobj->dentry);
+- kobj->dentry = NULL;
++ struct sysfs_dirent *sd = kobj->sd;
++
++ spin_lock(&sysfs_assoc_lock);
++ kobj->sd = NULL;
++ spin_unlock(&sysfs_assoc_lock);
++
++ __sysfs_remove_dir(sd);
+ }
+
+-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
+- const char *new_name)
++int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+ {
+- int error = 0;
+- struct dentry * new_dentry;
++ struct dentry *old_dentry, *new_dentry, *parent;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent *sd;
++ const char *dup_name;
++ int error;
+
+- if (!new_parent)
+- return -EFAULT;
++ dup_name = NULL;
++ new_dentry = NULL;
+
+- down_write(&sysfs_rename_sem);
+- mutex_lock(&new_parent->d_inode->i_mutex);
++ sd = kobj->sd;
++ sysfs_addrm_start(&acxt, sd->s_parent);
++ error = -ENOENT;
++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++ goto addrm_finish;
++
++ error = -EEXIST;
++ if (sysfs_find_dirent(acxt.parent_sd, new_name))
++ goto addrm_finish;
+
+- new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
+- if (!IS_ERR(new_dentry)) {
+- /* By allowing two different directories with the
+- * same d_parent we allow this routine to move
+- * between different shadows of the same directory
+- */
+- if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
+- return -EINVAL;
+- else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
+ error = -EINVAL;
+- else if (new_dentry == kobj->dentry)
++ if ((sd->s_parent == acxt.parent_sd) &&
++ (strcmp(new_name, sd->s_name) == 0))
++ goto addrm_finish;
++
++ old_dentry = sd->s_dentry;
++ parent = acxt.parent_sd->s_dentry;
++ if (old_dentry) {
++ old_dentry = sd->s_dentry;
++ parent = acxt.parent_sd->s_dentry;
++ new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
++ if (IS_ERR(new_dentry)) {
++ error = PTR_ERR(new_dentry);
++ goto addrm_finish;
++ }
++
+ error = -EINVAL;
+- else if (!new_dentry->d_inode) {
++ if (old_dentry == new_dentry)
++ goto addrm_finish;
++ }
++
++ /* rename kobject and sysfs_dirent */
++ error = -ENOMEM;
++ new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
++ if (!new_name)
++ goto addrm_finish;
++
+ error = kobject_set_name(kobj, "%s", new_name);
+- if (!error) {
+- struct sysfs_dirent *sd, *parent_sd;
++ if (error)
++ goto addrm_finish;
+
+- d_add(new_dentry, NULL);
+- d_move(kobj->dentry, new_dentry);
++ dup_name = sd->s_name;
++ sd->s_name = new_name;
+
+- sd = kobj->dentry->d_fsdata;
+- parent_sd = new_parent->d_fsdata;
++ /* move under the new parent */
++ sysfs_unlink_sibling(sd);
++ sysfs_get(acxt.parent_sd);
++ sysfs_put(sd->s_parent);
++ sd->s_parent = acxt.parent_sd;
++ sysfs_link_sibling(sd);
+
+- list_del_init(&sd->s_sibling);
+- list_add(&sd->s_sibling, &parent_sd->s_children);
+- }
+- else
+- d_drop(new_dentry);
+- } else
+- error = -EEXIST;
+- dput(new_dentry);
++ if (new_dentry) {
++ d_add(new_dentry, NULL);
++ d_move(old_dentry, new_dentry);
+ }
+- mutex_unlock(&new_parent->d_inode->i_mutex);
+- up_write(&sysfs_rename_sem);
++ error = 0;
++addrm_finish:
++ sysfs_addrm_finish(&acxt);
+
++ kfree(dup_name);
++ dput(new_dentry);
+ return error;
+ }
+
+-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
++int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
+ {
+- struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
+- struct sysfs_dirent *new_parent_sd, *sd;
++ struct sysfs_dirent *sd = kobj->sd;
++ struct sysfs_dirent *new_parent_sd;
++ struct dentry *old_parent, *new_parent = NULL;
++ struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ int error;
+
+- old_parent_dentry = kobj->parent ?
+- kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
+- new_parent_dentry = new_parent ?
+- new_parent->dentry : sysfs_mount->mnt_sb->s_root;
++ BUG_ON(!sd->s_parent);
++ new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
++
++ /* get dentries */
++ old_dentry = sysfs_get_dentry(sd);
++ if (IS_ERR(old_dentry)) {
++ error = PTR_ERR(old_dentry);
++ goto out_dput;
++ }
++ old_parent = sd->s_parent->s_dentry;
++
++ new_parent = sysfs_get_dentry(new_parent_sd);
++ if (IS_ERR(new_parent)) {
++ error = PTR_ERR(new_parent);
++ goto out_dput;
++ }
+
+- if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
+- return 0; /* nothing to move */
++ if (old_parent->d_inode == new_parent->d_inode) {
++ error = 0;
++ goto out_dput; /* nothing to move */
++ }
+ again:
+- mutex_lock(&old_parent_dentry->d_inode->i_mutex);
+- if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
+- mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
++ mutex_lock(&old_parent->d_inode->i_mutex);
++ if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
++ mutex_unlock(&old_parent->d_inode->i_mutex);
+ goto again;
+ }
+
+- new_parent_sd = new_parent_dentry->d_fsdata;
+- sd = kobj->dentry->d_fsdata;
+-
+- new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
+- strlen(kobj->name));
++ new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
+ if (IS_ERR(new_dentry)) {
+ error = PTR_ERR(new_dentry);
+- goto out;
++ goto out_unlock;
+ } else
+ error = 0;
+ d_add(new_dentry, NULL);
+- d_move(kobj->dentry, new_dentry);
++ d_move(sd->s_dentry, new_dentry);
+ dput(new_dentry);
+
+ /* Remove from old parent's list and insert into new parent's list. */
+- list_del_init(&sd->s_sibling);
+- list_add(&sd->s_sibling, &new_parent_sd->s_children);
++ mutex_lock(&sysfs_mutex);
+
+-out:
+- mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
+- mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
++ sysfs_unlink_sibling(sd);
++ sysfs_get(new_parent_sd);
++ sysfs_put(sd->s_parent);
++ sd->s_parent = new_parent_sd;
++ sysfs_link_sibling(sd);
++
++ mutex_unlock(&sysfs_mutex);
+
++ out_unlock:
++ mutex_unlock(&new_parent->d_inode->i_mutex);
++ mutex_unlock(&old_parent->d_inode->i_mutex);
++ out_dput:
++ dput(new_parent);
++ dput(old_dentry);
++ dput(new_dentry);
+ return error;
+ }
+
+@@ -496,23 +1219,27 @@
+ {
+ struct dentry * dentry = file->f_path.dentry;
+ struct sysfs_dirent * parent_sd = dentry->d_fsdata;
++ struct sysfs_dirent * sd;
+
+- mutex_lock(&dentry->d_inode->i_mutex);
+- file->private_data = sysfs_new_dirent(parent_sd, NULL);
+- mutex_unlock(&dentry->d_inode->i_mutex);
+-
+- return file->private_data ? 0 : -ENOMEM;
++ sd = sysfs_new_dirent("_DIR_", 0, 0);
++ if (sd) {
++ mutex_lock(&sysfs_mutex);
++ sd->s_parent = sysfs_get(parent_sd);
++ sysfs_link_sibling(sd);
++ mutex_unlock(&sysfs_mutex);
++ }
+
++ file->private_data = sd;
++ return sd ? 0 : -ENOMEM;
+ }
+
+ static int sysfs_dir_close(struct inode *inode, struct file *file)
+ {
+- struct dentry * dentry = file->f_path.dentry;
+ struct sysfs_dirent * cursor = file->private_data;
+
+- mutex_lock(&dentry->d_inode->i_mutex);
+- list_del_init(&cursor->s_sibling);
+- mutex_unlock(&dentry->d_inode->i_mutex);
++ mutex_lock(&sysfs_mutex);
++ sysfs_unlink_sibling(cursor);
++ mutex_unlock(&sysfs_mutex);
+
+ release_sysfs_dirent(cursor);
+
+@@ -530,7 +1257,7 @@
+ struct dentry *dentry = filp->f_path.dentry;
+ struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ struct sysfs_dirent *cursor = filp->private_data;
+- struct list_head *p, *q = &cursor->s_sibling;
++ struct sysfs_dirent **pos;
+ ino_t ino;
+ int i = filp->f_pos;
+
+@@ -543,38 +1270,55 @@
+ i++;
+ /* fallthrough */
+ case 1:
+- ino = parent_ino(dentry);
++ if (parent_sd->s_parent)
++ ino = parent_sd->s_parent->s_ino;
++ else
++ ino = parent_sd->s_ino;
+ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ break;
+ filp->f_pos++;
+ i++;
+ /* fallthrough */
+ default:
++ /* If I am the shadow master return nothing. */
++ if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED)
++ return 0;
++
++ mutex_lock(&sysfs_mutex);
++ pos = &parent_sd->s_children;
++ while (*pos != cursor)
++ pos = &(*pos)->s_sibling;
++
++ /* unlink cursor */
++ *pos = cursor->s_sibling;
++
+ if (filp->f_pos == 2)
+- list_move(q, &parent_sd->s_children);
++ pos = &parent_sd->s_children;
+
+- for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
+- struct sysfs_dirent *next;
++ for ( ; *pos; pos = &(*pos)->s_sibling) {
++ struct sysfs_dirent *next = *pos;
+ const char * name;
+ int len;
+
+- next = list_entry(p, struct sysfs_dirent,
+- s_sibling);
+- if (!next->s_element)
++ if (!sysfs_type(next))
+ continue;
+
+- name = sysfs_get_name(next);
++ name = next->s_name;
+ len = strlen(name);
+ ino = next->s_ino;
+
+ if (filldir(dirent, name, len, filp->f_pos, ino,
+ dt_type(next)) < 0)
+- return 0;
++ break;
+
+- list_move(q, p);
+- p = q;
+ filp->f_pos++;
+ }
++
++ /* put cursor back in */
++ cursor->s_sibling = *pos;
++ *pos = cursor;
++
++ mutex_unlock(&sysfs_mutex);
+ }
+ return 0;
+ }
+@@ -583,7 +1327,6 @@
+ {
+ struct dentry * dentry = file->f_path.dentry;
+
+- mutex_lock(&dentry->d_inode->i_mutex);
+ switch (origin) {
+ case 1:
+ offset += file->f_pos;
+@@ -591,127 +1334,224 @@
+ if (offset >= 0)
+ break;
+ default:
+- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ return -EINVAL;
+ }
+ if (offset != file->f_pos) {
++ mutex_lock(&sysfs_mutex);
++
+ file->f_pos = offset;
+ if (file->f_pos >= 2) {
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ struct sysfs_dirent *cursor = file->private_data;
+- struct list_head *p;
++ struct sysfs_dirent **pos;
+ loff_t n = file->f_pos - 2;
+
+- list_del(&cursor->s_sibling);
+- p = sd->s_children.next;
+- while (n && p != &sd->s_children) {
+- struct sysfs_dirent *next;
+- next = list_entry(p, struct sysfs_dirent,
+- s_sibling);
+- if (next->s_element)
++ sysfs_unlink_sibling(cursor);
++
++ pos = &sd->s_children;
++ while (n && *pos) {
++ struct sysfs_dirent *next = *pos;
++ if (sysfs_type(next))
+ n--;
+- p = p->next;
++ pos = &(*pos)->s_sibling;
+ }
+- list_add_tail(&cursor->s_sibling, p);
++
++ cursor->s_sibling = *pos;
++ *pos = cursor;
+ }
++
++ mutex_unlock(&sysfs_mutex);
+ }
+- mutex_unlock(&dentry->d_inode->i_mutex);
++
+ return offset;
+ }
+
++const struct file_operations sysfs_dir_operations = {
++ .open = sysfs_dir_open,
++ .release = sysfs_dir_close,
++ .llseek = sysfs_dir_lseek,
++ .read = generic_read_dir,
++ .readdir = sysfs_readdir,
++};
+
+-/**
+- * sysfs_make_shadowed_dir - Setup so a directory can be shadowed
+- * @kobj: object we're creating shadow of.
+- */
+
+-int sysfs_make_shadowed_dir(struct kobject *kobj,
+- void * (*follow_link)(struct dentry *, struct nameidata *))
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd)
+ {
+- struct inode *inode;
+- struct inode_operations *i_op;
++ struct sysfs_addrm_cxt acxt;
+
+- inode = kobj->dentry->d_inode;
+- if (inode->i_op != &sysfs_dir_inode_operations)
+- return -EINVAL;
++ /* If a shadow directory goes empty remove it. */
++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
++ return;
+
+- i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
+- if (!i_op)
+- return -ENOMEM;
++ if (sd->s_children)
++ return;
+
+- memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
+- i_op->follow_link = follow_link;
++ sysfs_addrm_start(&acxt, sd->s_parent);
+
+- /* Locking of inode->i_op?
+- * Since setting i_op is a single word write and they
+- * are atomic we should be ok here.
+- */
+- inode->i_op = i_op;
+- return 0;
+-}
++ if (sd->s_flags & SYSFS_FLAG_REMOVED)
++ goto addrm_finish;
+
+-/**
+- * sysfs_create_shadow_dir - create a shadow directory for an object.
+- * @kobj: object we're creating directory for.
+- *
+- * sysfs_make_shadowed_dir must already have been called on this
+- * directory.
+- */
++ if (sd->s_children)
++ goto addrm_finish;
+
+-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
++ __remove_dir(&acxt, sd);
++addrm_finish:
++ sysfs_addrm_finish(&acxt);
++}
++
++static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag)
+ {
+- struct sysfs_dirent *sd;
+- struct dentry *parent, *dir, *shadow;
++ struct sysfs_dirent *sd = NULL;
++ struct dentry *dir, *shadow;
+ struct inode *inode;
+
+- dir = kobj->dentry;
++ dir = parent_sd->s_dentry;
+ inode = dir->d_inode;
+- parent = dir->d_parent;
+- shadow = ERR_PTR(-EINVAL);
+- if (!sysfs_is_shadowed_inode(inode))
+- goto out;
+
+- shadow = d_alloc(parent, &dir->d_name);
++ shadow = d_alloc(dir->d_parent, &dir->d_name);
+ if (!shadow)
+- goto nomem;
++ goto out;
++
++ /* Since the shadow directory is reachable make it look
++ * like it is actually hashed.
++ */
++ shadow->d_hash.pprev = &shadow->d_hash.next;
++ shadow->d_hash.next = NULL;
++ shadow->d_flags &= ~DCACHE_UNHASHED;
+
+- sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
++ sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR);
+ if (!sd)
+- goto nomem;
++ goto error;
+
+- d_instantiate(shadow, igrab(inode));
+- inc_nlink(inode);
+- inc_nlink(parent->d_inode);
+- shadow->d_op = &sysfs_dentry_ops;
++ sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj;
++ sd->s_parent = sysfs_get(parent_sd);
+
+- dget(shadow); /* Extra count - pin the dentry in core */
++ /* Use the inode number of the parent we are shadowing */
++ sysfs_free_ino(sd->s_ino);
++ sd->s_ino = parent_sd->s_ino;
+
++ inc_nlink(inode);
++ inc_nlink(dir->d_parent->d_inode);
++
++ sysfs_link_sibling(sd);
++ __iget(inode);
++ sysfs_instantiate(shadow, inode);
++ sysfs_attach_dentry(sd, shadow);
+ out:
+- return shadow;
+-nomem:
++ return sd;
++error:
+ dput(shadow);
+- shadow = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
++int sysfs_resolve_for_create(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd)
++{
++ const struct shadow_dir_operations *shadow_ops;
++ struct sysfs_dirent *sd, *shadow_sd;
++
++ sd = *parent_sd;
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ sd = sd->s_parent;
++
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const void *tag;
++
++ shadow_ops = sd->s_dentry->d_inode->i_private;
++ tag = shadow_ops->kobject_tag(kobj);
++
++ shadow_sd = find_shadow_sd(sd, tag);
++ if (!shadow_sd)
++ shadow_sd = add_shadow_sd(sd, tag);
++ sd = shadow_sd;
++ }
++ if (sd) {
++ *parent_sd = sd;
++ return 1;
++ }
++ return 0;
++}
++
++int sysfs_resolve_for_remove(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd)
++{
++ struct sysfs_dirent *sd;
++ /* If dentry is a shadow directory find the shadow that is
++ * stored under the same tag as kobj. This allows removal
++ * of dirents to function properly even if the value of
++ * kobject_tag() has changed since we initially created
++ * the dirents assoctated with kobj.
++ */
++
++ sd = *parent_sd;
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ sd = sd->s_parent;
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const void *tag;
++
++ tag = find_shadow_tag(kobj);
++ sd = find_shadow_sd(sd, tag);
++ }
++ if (sd) {
++ *parent_sd = sd;
++ return 1;
++ }
++ return 0;
++}
++
+ /**
+- * sysfs_remove_shadow_dir - remove an object's directory.
+- * @shadow: dentry of shadow directory
++ * sysfs_enable_shadowing - Automatically create shadows of a directory
++ * @kobj: object to automatically shadow
+ *
+- * The only thing special about this is that we remove any files in
+- * the directory before we remove the directory, and we've inlined
+- * what used to be sysfs_rmdir() below, instead of calling separately.
++ * Once shadowing has been enabled on a directory the contents
++ * of the directory become dependent upon context.
++ *
++ * shadow_ops->current_tag() returns the context for the current
++ * process.
++ *
++ * shadow_ops->kobject_tag() returns the context that a given kobj
++ * resides in.
++ *
++ * Using those methods the sysfs code on shadowed directories
++ * carefully stores the files so that when we lookup files
++ * we get the proper answer for our context.
++ *
++ * If the context of a kobject is changed it is expected that
++ * the kobject will be renamed so the appopriate sysfs data structures
++ * can be updated.
+ */
+-
+-void sysfs_remove_shadow_dir(struct dentry *shadow)
++int sysfs_enable_shadowing(struct kobject *kobj,
++ const struct shadow_dir_operations *shadow_ops)
+ {
+- __sysfs_remove_dir(shadow);
++ struct sysfs_dirent *sd;
++ struct dentry *dentry;
++ int err;
++
++ /* Find the dentry for the shadowed directory and
++ * increase it's count.
++ */
++ err = -ENOENT;
++ sd = kobj->sd;
++ dentry = sysfs_get_dentry(sd);
++ if (!dentry)
++ goto out;
++
++ mutex_lock(&sysfs_mutex);
++ err = -EINVAL;
++ /* We can only enable shadowing on empty directories
++ * where shadowing is not already enabled.
++ */
++ if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) &&
++ !(sd->s_flags & SYSFS_FLAG_REMOVED) &&
++ !(sd->s_flags & SYSFS_FLAG_SHADOWED)) {
++ sd->s_flags |= SYSFS_FLAG_SHADOWED;
++ dentry->d_inode->i_private = (void *)shadow_ops;
++ err = 0;
++ }
++ mutex_unlock(&sysfs_mutex);
++out:
++ if (err)
++ dput(dentry);
++ return err;
+ }
+
+-const struct file_operations sysfs_dir_operations = {
+- .open = sysfs_dir_open,
+- .release = sysfs_dir_close,
+- .llseek = sysfs_dir_lseek,
+- .read = generic_read_dir,
+- .readdir = sysfs_readdir,
+-};
+diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c.orig linux-2.6.22-591/fs/sysfs/dir.c.orig
+--- linux-2.6.22-570/fs/sysfs/dir.c.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/sysfs/dir.c.orig 2007-12-22 20:43:14.000000000 -0500
+@@ -0,0 +1,1558 @@
++/*
++ * dir.c - Operations for sysfs directories.
++ */
++
++#undef DEBUG
++
++#include <linux/fs.h>
++#include <linux/mount.h>
++#include <linux/module.h>
++#include <linux/kobject.h>
++#include <linux/namei.h>
++#include <linux/idr.h>
++#include <linux/completion.h>
++#include <asm/semaphore.h>
++#include "sysfs.h"
++
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd);
++
++DEFINE_MUTEX(sysfs_mutex);
++spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
++
++static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
++static DEFINE_IDA(sysfs_ino_ida);
++
++static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target)
++{
++ /* Find the shadow directory for the specified tag */
++ struct sysfs_dirent *sd;
++
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++ if (sd->s_name != target)
++ continue;
++ break;
++ }
++ return sd;
++}
++
++static const void *find_shadow_tag(struct kobject *kobj)
++{
++ /* Find the tag the current kobj is cached with */
++ return kobj->sd->s_parent->s_name;
++}
++
++/**
++ * sysfs_link_sibling - link sysfs_dirent into sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Link @sd into its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sysfs_mutex)
++ */
++
++/**
++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Unlink @sd from its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sysfs_mutex)
++ */
++
++
++/**
++ * sysfs_get_dentry - get dentry for the given sysfs_dirent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get dentry for @sd. Dentry is looked up if currently not
++ * present. This function climbs sysfs_dirent tree till it
++ * reaches a sysfs_dirent with valid dentry attached and descends
++ * down from there looking up dentry for each step.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep)
++ *
++ * RETURNS:
++ * Pointer to found dentry on success, ERR_PTR() value on error.
++ */
++struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent *cur;
++ struct dentry *parent_dentry, *dentry;
++ int i, depth;
++
++ /* Find the first parent which has valid s_dentry and get the
++ * dentry.
++ */
++ mutex_lock(&sysfs_mutex);
++ restart0:
++ spin_lock(&sysfs_assoc_lock);
++ restart1:
++ spin_lock(&dcache_lock);
++
++ dentry = NULL;
++ depth = 0;
++ cur = sd;
++ while (!cur->s_dentry || !cur->s_dentry->d_inode) {
++ if (cur->s_flags & SYSFS_FLAG_REMOVED) {
++ dentry = ERR_PTR(-ENOENT);
++ depth = 0;
++ break;
++ }
++ cur = cur->s_parent;
++ depth++;
++ }
++ if (!IS_ERR(dentry))
++ dentry = dget_locked(cur->s_dentry);
++
++ spin_unlock(&dcache_lock);
++ spin_unlock(&sysfs_assoc_lock);
++
++ /* from the found dentry, look up depth times */
++ while (depth--) {
++ /* find and get depth'th ancestor */
++ for (cur = sd, i = 0; cur && i < depth; i++)
++ cur = cur->s_parent;
++
++ /* This can happen if tree structure was modified due
++ * to move/rename. Restart.
++ */
++ if (i != depth) {
++ dput(dentry);
++ goto restart0;
++ }
++
++ sysfs_get(cur);
++
++ mutex_unlock(&sysfs_mutex);
++
++ /* look it up */
++ parent_dentry = dentry;
++ dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
++ strlen(cur->s_name));
++ dput(parent_dentry);
++
++ if (IS_ERR(dentry)) {
++ sysfs_put(cur);
++ return dentry;
++ }
++
++ mutex_lock(&sysfs_mutex);
++ spin_lock(&sysfs_assoc_lock);
++
++ /* This, again, can happen if tree structure has
++ * changed and we looked up the wrong thing. Restart.
++ */
++ if (cur->s_dentry != dentry) {
++ dput(dentry);
++ sysfs_put(cur);
++ goto restart1;
++ }
++
++ spin_unlock(&sysfs_assoc_lock);
++
++ sysfs_put(cur);
++ }
++
++ mutex_unlock(&sysfs_mutex);
++ return dentry;
++}
++
++/**
++ * sysfs_link_sibling - link sysfs_dirent into sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Link @sd into its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex)
++ */
++void sysfs_link_sibling(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent *parent_sd = sd->s_parent;
++
++ BUG_ON(sd->s_sibling);
++ sd->s_sibling = parent_sd->s_children;
++ parent_sd->s_children = sd;
++}
++
++/**
++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
++ * @sd: sysfs_dirent of interest
++ *
++ * Unlink @sd from its sibling list which starts from
++ * sd->s_parent->s_children.
++ *
++ * Locking:
++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex)
++ */
++void sysfs_unlink_sibling(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent **pos;
++
++ for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
++ if (*pos == sd) {
++ *pos = sd->s_sibling;
++ sd->s_sibling = NULL;
++ break;
++ }
++ }
++}
++
++/**
++ * sysfs_get_dentry - get dentry for the given sysfs_dirent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get dentry for @sd. Dentry is looked up if currently not
++ * present. This function climbs sysfs_dirent tree till it
++ * reaches a sysfs_dirent with valid dentry attached and descends
++ * down from there looking up dentry for each step.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep)
++ *
++ * RETURNS:
++ * Pointer to found dentry on success, ERR_PTR() value on error.
++ */
++
++/**
++ * sysfs_get_active - get an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to get an active reference to
++ *
++ * Get an active reference of @sd. This function is noop if @sd
++ * is NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++/**
++ * sysfs_put_active - put an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to put an active reference to
++ *
++ * Put an active reference to @sd. This function is noop if @sd
++ * is NULL.
++ */
++void sysfs_put_active(struct sysfs_dirent *sd)
++{
++ struct completion *cmpl;
++ int v;
++
++ if (unlikely(!sd))
++ return;
++
++ v = atomic_dec_return(&sd->s_active);
++ if (likely(v != SD_DEACTIVATED_BIAS))
++ return;
++
++ /* atomic_dec_return() is a mb(), we'll always see the updated
++ * sd->s_sibling.
++ */
++ cmpl = (void *)sd->s_sibling;
++ complete(cmpl);
++}
++
++/**
++ * sysfs_get_active_two - get active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get active reference to @sd and its parent. Parent's active
++ * reference is grabbed first. This function is noop if @sd is
++ * NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
++{
++ if (sd) {
++ if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
++ return NULL;
++ if (unlikely(!sysfs_get_active(sd))) {
++ sysfs_put_active(sd->s_parent);
++ return NULL;
++ }
++ }
++ return sd;
++}
++
++/**
++ * sysfs_put_active_two - put active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Put active references to @sd and its parent. This function is
++ * noop if @sd is NULL.
++ */
++void sysfs_put_active_two(struct sysfs_dirent *sd)
++{
++ if (sd) {
++ sysfs_put_active(sd);
++ sysfs_put_active(sd->s_parent);
++ }
++}
++
++/**
++ * sysfs_deactivate - deactivate sysfs_dirent
++ * @sd: sysfs_dirent to deactivate
++ *
++ * Deny new active references and drain existing ones.
++ */
++static void sysfs_deactivate(struct sysfs_dirent *sd)
++{
++ DECLARE_COMPLETION_ONSTACK(wait);
++ int v;
++
++ BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
++ sd->s_sibling = (void *)&wait;
++
++ /* atomic_add_return() is a mb(), put_active() will always see
++ * the updated sd->s_sibling.
++ */
++ v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
++
++ if (v != SD_DEACTIVATED_BIAS)
++ wait_for_completion(&wait);
++
++ sd->s_sibling = NULL;
++}
++
++/**
++ * sysfs_get_active - get an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to get an active reference to
++ *
++ * Get an active reference of @sd. This function is noop if @sd
++ * is NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
++{
++ if (unlikely(!sd))
++ return NULL;
++
++ while (1) {
++ int v, t;
++
++ v = atomic_read(&sd->s_active);
++ if (unlikely(v < 0))
++ return NULL;
++
++ t = atomic_cmpxchg(&sd->s_active, v, v + 1);
++ if (likely(t == v))
++ return sd;
++ if (t < 0)
++ return NULL;
++
++ cpu_relax();
++ }
++}
++/**
++ * sysfs_put_active - put an active reference to sysfs_dirent
++ * @sd: sysfs_dirent to put an active reference to
++ *
++ * Put an active reference to @sd. This function is noop if @sd
++ * is NULL.
++ */
++
++/**
++ * sysfs_get_active_two - get active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Get active reference to @sd and its parent. Parent's active
++ * reference is grabbed first. This function is noop if @sd is
++ * NULL.
++ *
++ * RETURNS:
++ * Pointer to @sd on success, NULL on failure.
++ */
++
++/**
++ * sysfs_put_active_two - put active references to sysfs_dirent and parent
++ * @sd: sysfs_dirent of interest
++ *
++ * Put active references to @sd and its parent. This function is
++ * noop if @sd is NULL.
++ */
++
++/**
++ * sysfs_deactivate - deactivate sysfs_dirent
++ * @sd: sysfs_dirent to deactivate
++ *
++ * Deny new active references and drain existing ones. s_active
++ * will be unlocked when the sysfs_dirent is released.
++ */
++
++static int sysfs_alloc_ino(ino_t *pino)
++{
++ int ino, rc;
++
++ retry:
++ spin_lock(&sysfs_ino_lock);
++ rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
++ spin_unlock(&sysfs_ino_lock);
++
++ if (rc == -EAGAIN) {
++ if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
++ goto retry;
++ rc = -ENOMEM;
++ }
++
++ *pino = ino;
++ return rc;
++}
++
++static void sysfs_free_ino(ino_t ino)
++{
++ spin_lock(&sysfs_ino_lock);
++ ida_remove(&sysfs_ino_ida, ino);
++ spin_unlock(&sysfs_ino_lock);
++}
++
++void release_sysfs_dirent(struct sysfs_dirent * sd)
++{
++ struct sysfs_dirent *parent_sd;
++
++ repeat:
++ /* Moving/renaming is always done while holding reference.
++ * sd->s_parent won't change beneath us.
++ */
++ parent_sd = sd->s_parent;
++
++ if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
++ sysfs_put(sd->s_elem.symlink.target_sd);
++ if (sysfs_type(sd) & SYSFS_COPY_NAME)
++ kfree(sd->s_name);
++ kfree(sd->s_iattr);
++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
++ sysfs_free_ino(sd->s_ino);
++ kmem_cache_free(sysfs_dir_cachep, sd);
++
++ sd = parent_sd;
++ if (sd && atomic_dec_and_test(&sd->s_count))
++ goto repeat;
++}
++
++static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
++{
++ struct sysfs_dirent * sd = dentry->d_fsdata;
++
++ if (sd) {
++ /* sd->s_dentry is protected with sysfs_assoc_lock.
++ * This allows sysfs_drop_dentry() to dereference it.
++ */
++ spin_lock(&sysfs_assoc_lock);
++
++ /* The dentry might have been deleted or another
++ * lookup could have happened updating sd->s_dentry to
++ * point the new dentry. Ignore if it isn't pointing
++ * to this dentry.
++ */
++ if (sd->s_dentry == dentry)
++ sd->s_dentry = NULL;
++ spin_unlock(&sysfs_assoc_lock);
++ sysfs_put(sd);
++ }
++ iput(inode);
++}
++
++static struct dentry_operations sysfs_dentry_ops = {
++ .d_iput = sysfs_d_iput,
++};
++
++struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
++{
++ char *dup_name = NULL;
++ struct sysfs_dirent *sd = NULL;
++
++ if (type & SYSFS_COPY_NAME) {
++ name = dup_name = kstrdup(name, GFP_KERNEL);
++ if (!name)
++ goto err_out;
++ }
++
++ sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
++ if (!sd)
++ goto err_out;
++
++ if (sysfs_alloc_ino(&sd->s_ino))
++ goto err_out;
++
++ atomic_set(&sd->s_count, 1);
++ atomic_set(&sd->s_active, 0);
++ atomic_set(&sd->s_event, 1);
++
++ sd->s_name = name;
++ sd->s_mode = mode;
++ sd->s_flags = type;
++
++ return sd;
++
++ err_out:
++ kfree(dup_name);
++ kmem_cache_free(sysfs_dir_cachep, sd);
++ return NULL;
++}
++
++/**
++ * sysfs_attach_dentry - associate sysfs_dirent with dentry
++ * @sd: target sysfs_dirent
++ * @dentry: dentry to associate
++ *
++ * Associate @sd with @dentry. This is protected by
++ * sysfs_assoc_lock to avoid race with sysfs_d_iput().
++ *
++ * LOCKING:
++ * mutex_lock(sysfs_mutex)
++ */
++static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
++{
++ dentry->d_op = &sysfs_dentry_ops;
++ dentry->d_fsdata = sysfs_get(sd);
++
++ /* protect sd->s_dentry against sysfs_d_iput */
++ spin_lock(&sysfs_assoc_lock);
++ sd->s_dentry = dentry;
++ spin_unlock(&sysfs_assoc_lock);
++
++ if (dentry->d_flags & DCACHE_UNHASHED)
++ d_rehash(dentry);
++}
++
++static int sysfs_ilookup_test(struct inode *inode, void *arg)
++{
++ struct sysfs_dirent *sd = arg;
++ return inode->i_ino == sd->s_ino;
++}
++
++/**
++ * sysfs_addrm_start - prepare for sysfs_dirent add/remove
++ * @acxt: pointer to sysfs_addrm_cxt to be used
++ * @parent_sd: parent sysfs_dirent
++ *
++ * This function is called when the caller is about to add or
++ * remove sysfs_dirent under @parent_sd. This function acquires
++ * sysfs_mutex, grabs inode for @parent_sd if available and lock
++ * i_mutex of it. @acxt is used to keep and pass context to
++ * other addrm functions.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep). sysfs_mutex is locked on
++ * return. i_mutex of parent inode is locked on return if
++ * available.
++ */
++void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *parent_sd)
++{
++ struct inode *inode;
++
++ memset(acxt, 0, sizeof(*acxt));
++ acxt->parent_sd = parent_sd;
++
++ /* Lookup parent inode. inode initialization and I_NEW
++ * clearing are protected by sysfs_mutex. By grabbing it and
++ * looking up with _nowait variant, inode state can be
++ * determined reliably.
++ */
++ mutex_lock(&sysfs_mutex);
++
++ inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
++ parent_sd);
++
++ if (inode && !(inode->i_state & I_NEW)) {
++ /* parent inode available */
++ acxt->parent_inode = inode;
++
++ /* sysfs_mutex is below i_mutex in lock hierarchy.
++ * First, trylock i_mutex. If fails, unlock
++ * sysfs_mutex and lock them in order.
++ */
++ if (!mutex_trylock(&inode->i_mutex)) {
++ mutex_unlock(&sysfs_mutex);
++ mutex_lock(&inode->i_mutex);
++ mutex_lock(&sysfs_mutex);
++ }
++ } else
++ iput(inode);
++}
++
++/**
++ * sysfs_add_one - add sysfs_dirent to parent
++ * @acxt: addrm context to use
++ * @sd: sysfs_dirent to be added
++ *
++ * Get @acxt->parent_sd and set sd->s_parent to it and increment
++ * nlink of parent inode if @sd is a directory. @sd is NOT
++ * linked into the children list of the parent. The caller
++ * should invoke sysfs_link_sibling() after this function
++ * completes if @sd needs to be on the children list.
++ *
++ * This function should be called between calls to
++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ * passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ * LOCKING:
++ * Determined by sysfs_addrm_start().
++ */
++void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++ sd->s_parent = sysfs_get(acxt->parent_sd);
++
++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++ inc_nlink(acxt->parent_inode);
++
++ acxt->cnt++;
++}
++
++/**
++ * sysfs_remove_one - remove sysfs_dirent from parent
++ * @acxt: addrm context to use
++ * @sd: sysfs_dirent to be added
++ *
++ * Mark @sd removed and drop nlink of parent inode if @sd is a
++ * directory. @sd is NOT unlinked from the children list of the
++ * parent. The caller is repsonsible for removing @sd from the
++ * children list before calling this function.
++ *
++ * This function should be called between calls to
++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ * passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ * LOCKING:
++ * Determined by sysfs_addrm_start().
++ */
++void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++ BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
++
++ sd->s_flags |= SYSFS_FLAG_REMOVED;
++ sd->s_sibling = acxt->removed;
++ acxt->removed = sd;
++
++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++ drop_nlink(acxt->parent_inode);
++
++ acxt->cnt++;
++}
++
++/**
++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
++ * @sd: target sysfs_dirent
++ *
++ * Drop dentry for @sd. @sd must have been unlinked from its
++ * parent on entry to this function such that it can't be looked
++ * up anymore.
++ *
++ * @sd->s_dentry which is protected with sysfs_assoc_lock points
++ * to the currently associated dentry but we're not holding a
++ * reference to it and racing with dput(). Grab dcache_lock and
++ * verify dentry before dropping it. If @sd->s_dentry is NULL or
++ * dput() beats us, no need to bother.
++ */
++static void sysfs_drop_dentry(struct sysfs_dirent *sd)
++{
++ struct dentry *dentry = NULL;
++ struct inode *inode;
++
++ /* We're not holding a reference to ->s_dentry dentry but the
++ * field will stay valid as long as sysfs_assoc_lock is held.
++ */
++ spin_lock(&sysfs_assoc_lock);
++ spin_lock(&dcache_lock);
++
++ /* drop dentry if it's there and dput() didn't kill it yet */
++ if (sd->s_dentry && sd->s_dentry->d_inode) {
++ dentry = dget_locked(sd->s_dentry);
++ spin_lock(&dentry->d_lock);
++ __d_drop(dentry);
++ spin_unlock(&dentry->d_lock);
++ }
++
++ spin_unlock(&dcache_lock);
++ spin_unlock(&sysfs_assoc_lock);
++
++ /* dentries for shadowed directories are pinned, unpin */
++ if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) ||
++ (sd->s_flags & SYSFS_FLAG_SHADOWED))
++ dput(dentry);
++ dput(dentry);
++
++ /* adjust nlink and update timestamp */
++ inode = ilookup(sysfs_sb, sd->s_ino);
++ if (inode) {
++ mutex_lock(&inode->i_mutex);
++
++ inode->i_ctime = CURRENT_TIME;
++ drop_nlink(inode);
++ if (sysfs_type(sd) == SYSFS_DIR)
++ drop_nlink(inode);
++
++ mutex_unlock(&inode->i_mutex);
++ iput(inode);
++ }
++}
++
++/**
++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
++ * @sd: target sysfs_dirent
++ *
++ * Drop dentry for @sd. @sd must have been unlinked from its
++ * parent on entry to this function such that it can't be looked
++ * up anymore.
++ *
++ * @sd->s_dentry which is protected with sysfs_assoc_lock points
++ * to the currently associated dentry but we're not holding a
++ * reference to it and racing with dput(). Grab dcache_lock and
++ * verify dentry before dropping it. If @sd->s_dentry is NULL or
++ * dput() beats us, no need to bother.
++ */
++
++
++/**
++ * sysfs_addrm_finish - finish up sysfs_dirent add/remove
++ * @acxt: addrm context to finish up
++ *
++ * Finish up sysfs_dirent add/remove. Resources acquired by
++ * sysfs_addrm_start() are released and removed sysfs_dirents are
++ * cleaned up. Timestamps on the parent inode are updated.
++ *
++ * LOCKING:
++ * All mutexes acquired by sysfs_addrm_start() are released.
++ *
++ * RETURNS:
++ * Number of added/removed sysfs_dirents since sysfs_addrm_start().
++ */
++int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
++{
++ /* release resources acquired by sysfs_addrm_start() */
++ mutex_unlock(&sysfs_mutex);
++ if (acxt->parent_inode) {
++ struct inode *inode = acxt->parent_inode;
++
++ /* if added/removed, update timestamps on the parent */
++ if (acxt->cnt)
++ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++
++ mutex_unlock(&inode->i_mutex);
++ iput(inode);
++ }
++
++ /* kill removed sysfs_dirents */
++ while (acxt->removed) {
++ struct sysfs_dirent *sd = acxt->removed;
++
++ acxt->removed = sd->s_sibling;
++ sd->s_sibling = NULL;
++
++ sysfs_prune_shadow_sd(sd->s_parent);
++ sysfs_drop_dentry(sd);
++ sysfs_deactivate(sd);
++ sysfs_put(sd);
++ }
++
++ return acxt->cnt;
++}
++
++/**
++ * sysfs_find_dirent - find sysfs_dirent with the given name
++ * @parent_sd: sysfs_dirent to search under
++ * @name: name to look for
++ *
++ * Look for sysfs_dirent with name @name under @parent_sd.
++ *
++ * LOCKING:
++ * mutex_lock(sysfs_mutex)
++ *
++ * RETURNS:
++ * Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name)
++{
++ struct sysfs_dirent *sd;
++
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
++ if (sysfs_type(sd) && !strcmp(sd->s_name, name))
++ return sd;
++ return NULL;
++}
++
++/**
++ * sysfs_get_dirent - find and get sysfs_dirent with the given name
++ * @parent_sd: sysfs_dirent to search under
++ * @name: name to look for
++ *
++ * Look for sysfs_dirent with name @name under @parent_sd and get
++ * it if found.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep). Grabs sysfs_mutex.
++ *
++ * RETURNS:
++ * Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name)
++{
++ struct sysfs_dirent *sd;
++
++ mutex_lock(&sysfs_mutex);
++ sd = sysfs_find_dirent(parent_sd, name);
++ sysfs_get(sd);
++ mutex_unlock(&sysfs_mutex);
++
++ return sd;
++}
++
++static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
++ const char *name, struct sysfs_dirent **p_sd)
++{
++ umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent *sd;
++ int err;
++
++ /* allocate */
++ sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
++ if (!sd)
++ return -ENOMEM;
++ sd->s_elem.dir.kobj = kobj;
++
++ /* link in */
++ sysfs_addrm_start(&acxt, parent_sd);
++ err = -ENOENT;
++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++ goto addrm_finish;
++
++ err = -EEXIST;
++ if (!sysfs_find_dirent(acxt.parent_sd, name)) {
++ sysfs_add_one(&acxt, sd);
++ sysfs_link_sibling(sd);
++ err = 0;
++ }
++addrm_finish:
++ if (sysfs_addrm_finish(&acxt)) {
++ *p_sd = sd;
++ return 0;
++ }
++
++ sysfs_put(sd);
++ return err;
++}
++
++int sysfs_create_subdir(struct kobject *kobj, const char *name,
++ struct sysfs_dirent **p_sd)
++{
++ return create_dir(kobj, kobj->sd, name, p_sd);
++}
++
++/**
++ * sysfs_create_dir - create a directory for an object.
++ * @kobj: object we're creating directory for.
++ */
++int sysfs_create_dir(struct kobject * kobj)
++{
++ struct sysfs_dirent *parent_sd, *sd;
++ int error = 0;
++
++ BUG_ON(!kobj);
++
++ if (kobj->parent)
++ parent_sd = kobj->parent->sd;
++ else if (sysfs_mount && sysfs_mount->mnt_sb)
++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
++ else
++ return -EFAULT;
++
++ error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
++ if (!error)
++ kobj->sd = sd;
++ return error;
++}
++
++static int sysfs_count_nlink(struct sysfs_dirent *sd)
++{
++ struct sysfs_dirent *child;
++ int nr = 0;
++
++ for (child = sd->s_children; child; child = child->s_sibling)
++ if (sysfs_type(child) == SYSFS_DIR)
++ nr++;
++ return nr + 2;
++}
++
++static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
++ struct sysfs_dirent * sd;
++ struct bin_attribute *bin_attr;
++ struct inode *inode;
++ int found = 0;
++
++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++ if (sysfs_type(sd) &&
++ !strcmp(sd->s_name, dentry->d_name.name)) {
++ found = 1;
++ break;
++ }
++ }
++
++ /* no such entry */
++ if (!found)
++ return NULL;
++
++ /* attach dentry and inode */
++ inode = sysfs_get_inode(sd);
++ if (!inode)
++ return ERR_PTR(-ENOMEM);
++
++ mutex_lock(&sysfs_mutex);
++
++ if (inode->i_state & I_NEW) {
++ /* initialize inode according to type */
++ switch (sysfs_type(sd)) {
++ case SYSFS_DIR:
++ inode->i_op = &sysfs_dir_inode_operations;
++ inode->i_fop = &sysfs_dir_operations;
++ inode->i_nlink = sysfs_count_nlink(sd);
++ break;
++ case SYSFS_KOBJ_ATTR:
++ inode->i_size = PAGE_SIZE;
++ inode->i_fop = &sysfs_file_operations;
++ break;
++ case SYSFS_KOBJ_BIN_ATTR:
++ bin_attr = sd->s_elem.bin_attr.bin_attr;
++ inode->i_size = bin_attr->size;
++ inode->i_fop = &bin_fops;
++ break;
++ case SYSFS_KOBJ_LINK:
++ inode->i_op = &sysfs_symlink_inode_operations;
++ break;
++ default:
++ BUG();
++ }
++ }
++
++ sysfs_instantiate(dentry, inode);
++ sysfs_attach_dentry(sd, dentry);
++
++ mutex_unlock(&sysfs_mutex);
++
++ return NULL;
++}
++
++static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++ struct sysfs_dirent *sd;
++ struct dentry *dest;
++
++ sd = dentry->d_fsdata;
++ dest = NULL;
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const struct shadow_dir_operations *shadow_ops;
++ const void *tag;
++
++ mutex_lock(&sysfs_mutex);
++
++ shadow_ops = dentry->d_inode->i_private;
++ tag = shadow_ops->current_tag();
++
++ sd = find_shadow_sd(sd, tag);
++ if (sd)
++ dest = sd->s_dentry;
++ dget(dest);
++
++ mutex_unlock(&sysfs_mutex);
++ }
++ if (!dest)
++ dest = dget(dentry);
++ dput(nd->dentry);
++ nd->dentry = dest;
++
++ return NULL;
++}
++
++
++const struct inode_operations sysfs_dir_inode_operations = {
++ .lookup = sysfs_lookup,
++ .setattr = sysfs_setattr,
++ .follow_link = sysfs_shadow_follow_link,
++};
++
++static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++ sysfs_unlink_sibling(sd);
++ sysfs_remove_one(acxt, sd);
++}
++
++static void remove_dir(struct sysfs_dirent *sd)
++{
++ struct sysfs_addrm_cxt acxt;
++
++ sysfs_addrm_start(&acxt, sd->s_parent);
++ __remove_dir(&acxt, sd);
++ sysfs_addrm_finish(&acxt);
++}
++
++void sysfs_remove_subdir(struct sysfs_dirent *sd)
++{
++ remove_dir(sd);
++}
++
++static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *dir_sd)
++{
++ struct sysfs_dirent **pos;
++
++ pos = &dir_sd->s_children;
++ while (*pos) {
++ struct sysfs_dirent *sd = *pos;
++
++ if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
++ *pos = sd->s_sibling;
++ sd->s_sibling = NULL;
++ sysfs_remove_one(acxt, sd);
++ } else
++ pos = &(*pos)->s_sibling;
++ }
++}
++
++static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt,
++ struct sysfs_dirent *dir_sd)
++{
++ struct sysfs_dirent **pos;
++
++ pos = &dir_sd->s_children;
++ while (*pos) {
++ struct sysfs_dirent *sd = *pos;
++
++ sysfs_empty_dir(acxt, sd);
++ __remove_dir(acxt, sd);
++ }
++}
++
++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
++{
++ struct sysfs_addrm_cxt acxt;
++
++ if (!dir_sd)
++ return;
++
++ pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
++ sysfs_addrm_start(&acxt, dir_sd);
++ if (sysfs_type(dir_sd) == SYSFS_DIR)
++ sysfs_empty_dir(&acxt, dir_sd);
++ else
++ sysfs_remove_shadows(&acxt, dir_sd);
++ sysfs_addrm_finish(&acxt);
++
++ remove_dir(dir_sd);
++}
++
++/**
++ * sysfs_remove_dir - remove an object's directory.
++ * @kobj: object.
++ *
++ * The only thing special about this is that we remove any files in
++ * the directory before we remove the directory, and we've inlined
++ * what used to be sysfs_rmdir() below, instead of calling separately.
++ */
++
++void sysfs_remove_dir(struct kobject * kobj)
++{
++ struct sysfs_dirent *sd = kobj->sd;
++
++ spin_lock(&sysfs_assoc_lock);
++ kobj->sd = NULL;
++ spin_unlock(&sysfs_assoc_lock);
++
++ __sysfs_remove_dir(sd);
++}
++
++int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
++{
++ struct dentry *old_dentry, *new_dentry, *parent;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent *sd;
++ const char *dup_name;
++ int error;
++
++ dup_name = NULL;
++ new_dentry = NULL;
++
++ sd = kobj->sd;
++ sysfs_addrm_start(&acxt, sd->s_parent);
++ error = -ENOENT;
++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++ goto addrm_finish;
++
++ error = -EEXIST;
++ if (sysfs_find_dirent(acxt.parent_sd, new_name))
++ goto addrm_finish;
++
++ error = -EINVAL;
++ if ((sd->s_parent == acxt.parent_sd) &&
++ (strcmp(new_name, sd->s_name) == 0))
++ goto addrm_finish;
++
++ old_dentry = sd->s_dentry;
++ parent = acxt.parent_sd->s_dentry;
++ if (old_dentry) {
++ old_dentry = sd->s_dentry;
++ parent = acxt.parent_sd->s_dentry;
++ new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
++ if (IS_ERR(new_dentry)) {
++ error = PTR_ERR(new_dentry);
++ goto addrm_finish;
++ }
++
++ error = -EINVAL;
++ if (old_dentry == new_dentry)
++ goto addrm_finish;
++ }
++
++ /* rename kobject and sysfs_dirent */
++ error = -ENOMEM;
++ new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
++ if (!new_name)
++ goto addrm_finish;
++
++ error = kobject_set_name(kobj, "%s", new_name);
++ if (error)
++ goto addrm_finish;
++
++ dup_name = sd->s_name;
++ sd->s_name = new_name;
++
++ /* move under the new parent */
++ sysfs_unlink_sibling(sd);
++ sysfs_get(acxt.parent_sd);
++ sysfs_put(sd->s_parent);
++ sd->s_parent = acxt.parent_sd;
++ sysfs_link_sibling(sd);
++
++ if (new_dentry) {
++ d_add(new_dentry, NULL);
++ d_move(old_dentry, new_dentry);
++ }
++ error = 0;
++addrm_finish:
++ sysfs_addrm_finish(&acxt);
++
++ kfree(dup_name);
++ dput(new_dentry);
++ return error;
++}
++
++int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
++{
++ struct sysfs_dirent *sd = kobj->sd;
++ struct sysfs_dirent *new_parent_sd;
++ struct dentry *old_parent, *new_parent = NULL;
++ struct dentry *old_dentry = NULL, *new_dentry = NULL;
++ int error;
++
++ BUG_ON(!sd->s_parent);
++ new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
++
++ /* get dentries */
++ old_dentry = sysfs_get_dentry(sd);
++ if (IS_ERR(old_dentry)) {
++ error = PTR_ERR(old_dentry);
++ goto out_dput;
++ }
++ old_parent = sd->s_parent->s_dentry;
++
++ new_parent = sysfs_get_dentry(new_parent_sd);
++ if (IS_ERR(new_parent)) {
++ error = PTR_ERR(new_parent);
++ goto out_dput;
++ }
++
++ if (old_parent->d_inode == new_parent->d_inode) {
++ error = 0;
++ goto out_dput; /* nothing to move */
++ }
++again:
++ mutex_lock(&old_parent->d_inode->i_mutex);
++ if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
++ mutex_unlock(&old_parent->d_inode->i_mutex);
++ goto again;
++ }
++
++ new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
++ if (IS_ERR(new_dentry)) {
++ error = PTR_ERR(new_dentry);
++ goto out_unlock;
++ } else
++ error = 0;
++ d_add(new_dentry, NULL);
++ d_move(sd->s_dentry, new_dentry);
++ dput(new_dentry);
++
++ /* Remove from old parent's list and insert into new parent's list. */
++ mutex_lock(&sysfs_mutex);
++
++ sysfs_unlink_sibling(sd);
++ sysfs_get(new_parent_sd);
++ sysfs_put(sd->s_parent);
++ sd->s_parent = new_parent_sd;
++ sysfs_link_sibling(sd);
++
++ mutex_unlock(&sysfs_mutex);
++
++ out_unlock:
++ mutex_unlock(&new_parent->d_inode->i_mutex);
++ mutex_unlock(&old_parent->d_inode->i_mutex);
++ out_dput:
++ dput(new_parent);
++ dput(old_dentry);
++ dput(new_dentry);
++ return error;
++}
++
++static int sysfs_dir_open(struct inode *inode, struct file *file)
++{
++ struct dentry * dentry = file->f_path.dentry;
++ struct sysfs_dirent * parent_sd = dentry->d_fsdata;
++ struct sysfs_dirent * sd;
++
++ sd = sysfs_new_dirent("_DIR_", 0, 0);
++ if (sd) {
++ mutex_lock(&sysfs_mutex);
++ sd->s_parent = sysfs_get(parent_sd);
++ sysfs_link_sibling(sd);
++ mutex_unlock(&sysfs_mutex);
++ }
++
++ file->private_data = sd;
++ return sd ? 0 : -ENOMEM;
++}
++
++static int sysfs_dir_close(struct inode *inode, struct file *file)
++{
++ struct sysfs_dirent * cursor = file->private_data;
++
++ mutex_lock(&sysfs_mutex);
++ sysfs_unlink_sibling(cursor);
++ mutex_unlock(&sysfs_mutex);
++
++ release_sysfs_dirent(cursor);
++
++ return 0;
++}
++
++/* Relationship between s_mode and the DT_xxx types */
++static inline unsigned char dt_type(struct sysfs_dirent *sd)
++{
++ return (sd->s_mode >> 12) & 15;
++}
++
++static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
++{
++ struct dentry *dentry = filp->f_path.dentry;
++ struct sysfs_dirent * parent_sd = dentry->d_fsdata;
++ struct sysfs_dirent *cursor = filp->private_data;
++ struct sysfs_dirent **pos;
++ ino_t ino;
++ int i = filp->f_pos;
++
++ switch (i) {
++ case 0:
++ ino = parent_sd->s_ino;
++ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
++ break;
++ filp->f_pos++;
++ i++;
++ /* fallthrough */
++ case 1:
++ if (parent_sd->s_parent)
++ ino = parent_sd->s_parent->s_ino;
++ else
++ ino = parent_sd->s_ino;
++ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
++ break;
++ filp->f_pos++;
++ i++;
++ /* fallthrough */
++ default:
++ /* If I am the shadow master return nothing. */
++ if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED)
++ return 0;
++
++ mutex_lock(&sysfs_mutex);
++ pos = &parent_sd->s_children;
++ while (*pos != cursor)
++ pos = &(*pos)->s_sibling;
++
++ /* unlink cursor */
++ *pos = cursor->s_sibling;
++
++ if (filp->f_pos == 2)
++ pos = &parent_sd->s_children;
++
++ for ( ; *pos; pos = &(*pos)->s_sibling) {
++ struct sysfs_dirent *next = *pos;
++ const char * name;
++ int len;
++
++ if (!sysfs_type(next))
++ continue;
++
++ name = next->s_name;
++ len = strlen(name);
++ ino = next->s_ino;
++
++ if (filldir(dirent, name, len, filp->f_pos, ino,
++ dt_type(next)) < 0)
++ break;
++
++ filp->f_pos++;
++ }
++
++ /* put cursor back in */
++ cursor->s_sibling = *pos;
++ *pos = cursor;
++
++ mutex_unlock(&sysfs_mutex);
++ }
++ return 0;
++}
++
++static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
++{
++ struct dentry * dentry = file->f_path.dentry;
++
++ switch (origin) {
++ case 1:
++ offset += file->f_pos;
++ case 0:
++ if (offset >= 0)
++ break;
++ default:
++ return -EINVAL;
++ }
++ if (offset != file->f_pos) {
++ mutex_lock(&sysfs_mutex);
++
++ file->f_pos = offset;
++ if (file->f_pos >= 2) {
++ struct sysfs_dirent *sd = dentry->d_fsdata;
++ struct sysfs_dirent *cursor = file->private_data;
++ struct sysfs_dirent **pos;
++ loff_t n = file->f_pos - 2;
++
++ sysfs_unlink_sibling(cursor);
++
++ pos = &sd->s_children;
++ while (n && *pos) {
++ struct sysfs_dirent *next = *pos;
++ if (sysfs_type(next))
++ n--;
++ pos = &(*pos)->s_sibling;
++ }
++
++ cursor->s_sibling = *pos;
++ *pos = cursor;
++ }
++
++ mutex_unlock(&sysfs_mutex);
++ }
++
++ return offset;
++}
++
++const struct file_operations sysfs_dir_operations = {
++ .open = sysfs_dir_open,
++ .release = sysfs_dir_close,
++ .llseek = sysfs_dir_lseek,
++ .read = generic_read_dir,
++ .readdir = sysfs_readdir,
++};
++
++
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd)
++{
++ struct sysfs_addrm_cxt acxt;
++
++ /* If a shadow directory goes empty remove it. */
++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
++ return;
++
++ if (sd->s_children)
++ return;
++
++ sysfs_addrm_start(&acxt, sd->s_parent);
++
++ if (sd->s_flags & SYSFS_FLAG_REMOVED)
++ goto addrm_finish;
++
++ if (sd->s_children)
++ goto addrm_finish;
++
++ __remove_dir(&acxt, sd);
++addrm_finish:
++ sysfs_addrm_finish(&acxt);
++}
++
++static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag)
++{
++ struct sysfs_dirent *sd = NULL;
++ struct dentry *dir, *shadow;
++ struct inode *inode;
++
++ dir = parent_sd->s_dentry;
++ inode = dir->d_inode;
++
++ shadow = d_alloc(dir->d_parent, &dir->d_name);
++ if (!shadow)
++ goto out;
++
++ /* Since the shadow directory is reachable make it look
++ * like it is actually hashed.
++ */
++ shadow->d_hash.pprev = &shadow->d_hash.next;
++ shadow->d_hash.next = NULL;
++ shadow->d_flags &= ~DCACHE_UNHASHED;
++
++ sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR);
++ if (!sd)
++ goto error;
++
++ sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj;
++ sd->s_parent = sysfs_get(parent_sd);
++
++ /* Use the inode number of the parent we are shadowing */
++ sysfs_free_ino(sd->s_ino);
++ sd->s_ino = parent_sd->s_ino;
++
++ inc_nlink(inode);
++ inc_nlink(dir->d_parent->d_inode);
++
++ sysfs_link_sibling(sd);
++ __iget(inode);
++ sysfs_instantiate(shadow, inode);
++ sysfs_attach_dentry(sd, shadow);
++out:
++ return sd;
++error:
++ dput(shadow);
++ goto out;
++}
++
++int sysfs_resolve_for_create(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd)
++{
++ const struct shadow_dir_operations *shadow_ops;
++ struct sysfs_dirent *sd, *shadow_sd;
++
++ sd = *parent_sd;
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ sd = sd->s_parent;
++
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const void *tag;
++
++ shadow_ops = sd->s_dentry->d_inode->i_private;
++ tag = shadow_ops->kobject_tag(kobj);
++
++ shadow_sd = find_shadow_sd(sd, tag);
++ if (!shadow_sd)
++ shadow_sd = add_shadow_sd(sd, tag);
++ sd = shadow_sd;
++ }
++ if (sd) {
++ *parent_sd = sd;
++ return 1;
++ }
++ return 0;
++}
++
++int sysfs_resolve_for_remove(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd)
++{
++ struct sysfs_dirent *sd;
++ /* If dentry is a shadow directory find the shadow that is
++ * stored under the same tag as kobj. This allows removal
++ * of dirents to function properly even if the value of
++ * kobject_tag() has changed since we initially created
++ * the dirents assoctated with kobj.
++ */
++
++ sd = *parent_sd;
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ sd = sd->s_parent;
++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++ const void *tag;
++
++ tag = find_shadow_tag(kobj);
++ sd = find_shadow_sd(sd, tag);
++ }
++ if (sd) {
++ *parent_sd = sd;
++ return 1;
++ }
++ return 0;
++}
++
++/**
++ * sysfs_enable_shadowing - Automatically create shadows of a directory
++ * @kobj: object to automatically shadow
++ *
++ * Once shadowing has been enabled on a directory the contents
++ * of the directory become dependent upon context.
++ *
++ * shadow_ops->current_tag() returns the context for the current
++ * process.
++ *
++ * shadow_ops->kobject_tag() returns the context that a given kobj
++ * resides in.
++ *
++ * Using those methods the sysfs code on shadowed directories
++ * carefully stores the files so that when we lookup files
++ * we get the proper answer for our context.
++ *
++ * If the context of a kobject is changed it is expected that
++ * the kobject will be renamed so the appopriate sysfs data structures
++ * can be updated.
++ */
++int sysfs_enable_shadowing(struct kobject *kobj,
++ const struct shadow_dir_operations *shadow_ops)
++{
++ struct sysfs_dirent *sd;
++ struct dentry *dentry;
++ int err;
++
++ /* Find the dentry for the shadowed directory and
++ * increase it's count.
++ */
++ err = -ENOENT;
++ sd = kobj->sd;
++ dentry = sysfs_get_dentry(sd);
++ if (!dentry)
++ goto out;
++
++ mutex_lock(&sysfs_mutex);
++ err = -EINVAL;
++ /* We can only enable shadowing on empty directories
++ * where shadowing is not already enabled.
++ */
++ if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) &&
++ !(sd->s_flags & SYSFS_FLAG_REMOVED) &&
++ !(sd->s_flags & SYSFS_FLAG_SHADOWED)) {
++ sd->s_flags |= SYSFS_FLAG_SHADOWED;
++ dentry->d_inode->i_private = (void *)shadow_ops;
++ err = 0;
++ }
++ mutex_unlock(&sysfs_mutex);
++out:
++ if (err)
++ dput(dentry);
++ return err;
++}
++
+diff -Nurb linux-2.6.22-570/fs/sysfs/file.c linux-2.6.22-591/fs/sysfs/file.c
+--- linux-2.6.22-570/fs/sysfs/file.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/fs/sysfs/file.c 2007-12-21 15:36:14.000000000 -0500
+@@ -50,29 +50,15 @@
+ .store = subsys_attr_store,
+ };
+
+-/**
+- * add_to_collection - add buffer to a collection
+- * @buffer: buffer to be added
+- * @node: inode of set to add to
+- */
+-
+-static inline void
+-add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
+-{
+- struct sysfs_buffer_collection *set = node->i_private;
+-
+- mutex_lock(&node->i_mutex);
+- list_add(&buffer->associates, &set->associates);
+- mutex_unlock(&node->i_mutex);
+-}
+-
+-static inline void
+-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
+-{
+- mutex_lock(&node->i_mutex);
+- list_del(&buffer->associates);
+- mutex_unlock(&node->i_mutex);
+-}
++struct sysfs_buffer {
++ size_t count;
++ loff_t pos;
++ char * page;
++ struct sysfs_ops * ops;
++ struct semaphore sem;
++ int needs_read_fill;
++ int event;
++};
+
+ /**
+ * fill_read_buffer - allocate and fill buffer from object.
+@@ -87,9 +73,8 @@
+ */
+ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
+ {
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- struct attribute * attr = to_attr(dentry);
+- struct kobject * kobj = to_kobj(dentry->d_parent);
++ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ struct sysfs_ops * ops = buffer->ops;
+ int ret = 0;
+ ssize_t count;
+@@ -99,8 +84,15 @@
+ if (!buffer->page)
+ return -ENOMEM;
+
+- buffer->event = atomic_read(&sd->s_event);
+- count = ops->show(kobj,attr,buffer->page);
++ /* need attr_sd for attr and ops, its parent for kobj */
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
++
++ buffer->event = atomic_read(&attr_sd->s_event);
++ count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
++
++ sysfs_put_active_two(attr_sd);
++
+ BUG_ON(count > (ssize_t)PAGE_SIZE);
+ if (count >= 0) {
+ buffer->needs_read_fill = 0;
+@@ -138,9 +130,6 @@
+
+ down(&buffer->sem);
+ if (buffer->needs_read_fill) {
+- if (buffer->orphaned)
+- retval = -ENODEV;
+- else
+ retval = fill_read_buffer(file->f_path.dentry,buffer);
+ if (retval)
+ goto out;
+@@ -199,11 +188,20 @@
+ static int
+ flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
+ {
+- struct attribute * attr = to_attr(dentry);
+- struct kobject * kobj = to_kobj(dentry->d_parent);
++ struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ struct sysfs_ops * ops = buffer->ops;
++ int rc;
++
++ /* need attr_sd for attr and ops, its parent for kobj */
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
++
++ rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
++
++ sysfs_put_active_two(attr_sd);
+
+- return ops->store(kobj,attr,buffer->page,count);
++ return rc;
+ }
+
+
+@@ -231,37 +229,29 @@
+ ssize_t len;
+
+ down(&buffer->sem);
+- if (buffer->orphaned) {
+- len = -ENODEV;
+- goto out;
+- }
+ len = fill_write_buffer(buffer, buf, count);
+ if (len > 0)
+ len = flush_write_buffer(file->f_path.dentry, buffer, len);
+ if (len > 0)
+ *ppos += len;
+-out:
+ up(&buffer->sem);
+ return len;
+ }
+
+ static int sysfs_open_file(struct inode *inode, struct file *file)
+ {
+- struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+- struct attribute * attr = to_attr(file->f_path.dentry);
+- struct sysfs_buffer_collection *set;
++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ struct sysfs_buffer * buffer;
++
+ struct sysfs_ops * ops = NULL;
+- int error = 0;
++ int error;
+
+- if (!kobj || !attr)
+- goto Einval;
+
+- /* Grab the module reference for this attribute if we have one */
+- if (!try_module_get(attr->owner)) {
+- error = -ENODEV;
+- goto Done;
+- }
++ /* need attr_sr for attr and ops, its parent for kobj */
++
++ if (!sysfs_get_active_two(attr_sd))
++ return -ENODEV;
+
+ /* if the kobject has no ktype, then we assume that it is a subsystem
+ * itself, and use ops for it.
+@@ -277,20 +267,7 @@
+ * or the subsystem have no operations.
+ */
+ if (!ops)
+- goto Eaccess;
+-
+- /* make sure we have a collection to add our buffers to */
+- mutex_lock(&inode->i_mutex);
+- if (!(set = inode->i_private)) {
+- if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
+- mutex_unlock(&inode->i_mutex);
+- error = -ENOMEM;
+- goto Done;
+- } else {
+- INIT_LIST_HEAD(&set->associates);
+- }
+- }
+- mutex_unlock(&inode->i_mutex);
++ goto err_out;
+
+ /* File needs write support.
+ * The inode's perms must say it's ok,
+@@ -299,7 +276,7 @@
+ if (file->f_mode & FMODE_WRITE) {
+
+ if (!(inode->i_mode & S_IWUGO) || !ops->store)
+- goto Eaccess;
++ goto err_out;
+
+ }
+
+@@ -309,48 +286,38 @@
+ */
+ if (file->f_mode & FMODE_READ) {
+ if (!(inode->i_mode & S_IRUGO) || !ops->show)
+- goto Eaccess;
++ goto err_out;
+ }
+
+ /* No error? Great, allocate a buffer for the file, and store it
+ * it in file->private_data for easy access.
+ */
++ error = -ENOMEM;
+ buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
+- if (buffer) {
+- INIT_LIST_HEAD(&buffer->associates);
++ if (!buffer)
++ goto err_out;
++
+ init_MUTEX(&buffer->sem);
+ buffer->needs_read_fill = 1;
+ buffer->ops = ops;
+- add_to_collection(buffer, inode);
+ file->private_data = buffer;
+- } else
+- error = -ENOMEM;
+- goto Done;
+
+- Einval:
+- error = -EINVAL;
+- goto Done;
+- Eaccess:
+- error = -EACCES;
+- module_put(attr->owner);
+- Done:
+- if (error)
+- kobject_put(kobj);
++ /* open succeeded, put active references and pin attr_sd */
++ sysfs_put_active_two(attr_sd);
++ sysfs_get(attr_sd);
++ return 0;
++
++ err_out:
++ sysfs_put_active_two(attr_sd);
+ return error;
+ }
+
+ static int sysfs_release(struct inode * inode, struct file * filp)
+ {
+- struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+- struct attribute * attr = to_attr(filp->f_path.dentry);
+- struct module * owner = attr->owner;
+- struct sysfs_buffer * buffer = filp->private_data;
++ struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
++ struct sysfs_buffer *buffer = filp->private_data;
+
+- if (buffer)
+- remove_from_collection(buffer, inode);
+- kobject_put(kobj);
+- /* After this point, attr should not be accessed. */
+- module_put(owner);
++ sysfs_put(attr_sd);
+
+ if (buffer) {
+ if (buffer->page)
+@@ -377,57 +344,43 @@
+ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
+ {
+ struct sysfs_buffer * buffer = filp->private_data;
+- struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+- struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
+- int res = 0;
+-
+- poll_wait(filp, &kobj->poll, wait);
++ struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+
+- if (buffer->event != atomic_read(&sd->s_event)) {
+- res = POLLERR|POLLPRI;
+- buffer->needs_read_fill = 1;
+- }
++ /* need parent for the kobj, grab both */
++ if (!sysfs_get_active_two(attr_sd))
++ goto trigger;
+
+- return res;
+-}
++ poll_wait(filp, &kobj->poll, wait);
+
++ sysfs_put_active_two(attr_sd);
+
+-static struct dentry *step_down(struct dentry *dir, const char * name)
+-{
+- struct dentry * de;
++ if (buffer->event != atomic_read(&attr_sd->s_event))
++ goto trigger;
+
+- if (dir == NULL || dir->d_inode == NULL)
+- return NULL;
++ return 0;
+
+- mutex_lock(&dir->d_inode->i_mutex);
+- de = lookup_one_len(name, dir, strlen(name));
+- mutex_unlock(&dir->d_inode->i_mutex);
+- dput(dir);
+- if (IS_ERR(de))
+- return NULL;
+- if (de->d_inode == NULL) {
+- dput(de);
+- return NULL;
+- }
+- return de;
++ trigger:
++ buffer->needs_read_fill = 1;
++ return POLLERR|POLLPRI;
+ }
+
+-void sysfs_notify(struct kobject * k, char *dir, char *attr)
++void sysfs_notify(struct kobject *k, char *dir, char *attr)
+ {
+- struct dentry *de = k->dentry;
+- if (de)
+- dget(de);
+- if (de && dir)
+- de = step_down(de, dir);
+- if (de && attr)
+- de = step_down(de, attr);
+- if (de) {
+- struct sysfs_dirent * sd = de->d_fsdata;
+- if (sd)
++ struct sysfs_dirent *sd = k->sd;
++
++ mutex_lock(&sysfs_mutex);
++
++ if (sd && dir)
++ sd = sysfs_find_dirent(sd, dir);
++ if (sd && attr)
++ sd = sysfs_find_dirent(sd, attr);
++ if (sd) {
+ atomic_inc(&sd->s_event);
+ wake_up_interruptible(&k->poll);
+- dput(de);
+ }
++
++ mutex_unlock(&sysfs_mutex);
+ }
+ EXPORT_SYMBOL_GPL(sysfs_notify);
+
+@@ -441,19 +394,30 @@
+ };
+
+
+-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
++int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
++ int type)
+ {
+- struct sysfs_dirent * parent_sd = dir->d_fsdata;
+ umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
+- int error = -EEXIST;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent *sd;
+
+- mutex_lock(&dir->d_inode->i_mutex);
+- if (!sysfs_dirent_exist(parent_sd, attr->name))
+- error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
+- mode, type);
+- mutex_unlock(&dir->d_inode->i_mutex);
++ sd = sysfs_new_dirent(attr->name, mode, type);
++ if (!sd)
++ return -ENOMEM;
++ sd->s_elem.attr.attr = (void *)attr;
+
+- return error;
++ sysfs_addrm_start(&acxt, dir_sd);
++
++ if (!sysfs_find_dirent(dir_sd, attr->name)) {
++ sysfs_add_one(&acxt, sd);
++ sysfs_link_sibling(sd);
++ }
++
++ if (sysfs_addrm_finish(&acxt))
++ return 0;
++
++ sysfs_put(sd);
++ return -EEXIST;
+ }
+
+
+@@ -465,9 +429,9 @@
+
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
+- BUG_ON(!kobj || !kobj->dentry || !attr);
++ BUG_ON(!kobj || !kobj->sd || !attr);
+
+- return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
++ return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
+
+ }
+
+@@ -481,16 +445,16 @@
+ int sysfs_add_file_to_group(struct kobject *kobj,
+ const struct attribute *attr, const char *group)
+ {
+- struct dentry *dir;
++ struct sysfs_dirent *dir_sd;
+ int error;
+
+- dir = lookup_one_len(group, kobj->dentry, strlen(group));
+- if (IS_ERR(dir))
+- error = PTR_ERR(dir);
+- else {
+- error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR);
+- dput(dir);
+- }
++ dir_sd = sysfs_get_dirent(kobj->sd, group);
++ if (!dir_sd)
++ return -ENOENT;
++
++ error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
++ sysfs_put(dir_sd);
++
+ return error;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
+@@ -503,30 +467,31 @@
+ */
+ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
+ {
+- struct dentry * dir = kobj->dentry;
+- struct dentry * victim;
+- int res = -ENOENT;
+-
+- mutex_lock(&dir->d_inode->i_mutex);
+- victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+- if (!IS_ERR(victim)) {
+- /* make sure dentry is really there */
+- if (victim->d_inode &&
+- (victim->d_parent->d_inode == dir->d_inode)) {
+- victim->d_inode->i_mtime = CURRENT_TIME;
+- fsnotify_modify(victim);
+- res = 0;
+- } else
+- d_drop(victim);
++ struct sysfs_dirent *victim_sd = NULL;
++ struct dentry *victim = NULL;
++ int rc;
++
++ rc = -ENOENT;
++ victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
++ if (!victim_sd)
++ goto out;
+
+- /**
+- * Drop the reference acquired from lookup_one_len() above.
+- */
+- dput(victim);
++ victim = sysfs_get_dentry(victim_sd);
++ if (IS_ERR(victim)) {
++ rc = PTR_ERR(victim);
++ victim = NULL;
++ goto out;
+ }
+- mutex_unlock(&dir->d_inode->i_mutex);
+
+- return res;
++ mutex_lock(&victim->d_inode->i_mutex);
++ victim->d_inode->i_mtime = CURRENT_TIME;
++ fsnotify_modify(victim);
++ mutex_unlock(&victim->d_inode->i_mutex);
++ rc = 0;
++ out:
++ dput(victim);
++ sysfs_put(victim_sd);
++ return rc;
+ }
+
+
+@@ -539,30 +504,34 @@
+ */
+ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
+ {
+- struct dentry *dir = kobj->dentry;
+- struct dentry *victim;
++ struct sysfs_dirent *victim_sd = NULL;
++ struct dentry *victim = NULL;
+ struct inode * inode;
+ struct iattr newattrs;
+- int res = -ENOENT;
++ int rc;
++
++ rc = -ENOENT;
++ victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
++ if (!victim_sd)
++ goto out;
++
++ victim = sysfs_get_dentry(victim_sd);
++ if (IS_ERR(victim)) {
++ rc = PTR_ERR(victim);
++ victim = NULL;
++ goto out;
++ }
+
+- mutex_lock(&dir->d_inode->i_mutex);
+- victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+- if (!IS_ERR(victim)) {
+- if (victim->d_inode &&
+- (victim->d_parent->d_inode == dir->d_inode)) {
+ inode = victim->d_inode;
+ mutex_lock(&inode->i_mutex);
+- newattrs.ia_mode = (mode & S_IALLUGO) |
+- (inode->i_mode & ~S_IALLUGO);
++ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+- res = notify_change(victim, &newattrs);
++ rc = notify_change(victim, &newattrs);
+ mutex_unlock(&inode->i_mutex);
+- }
++ out:
+ dput(victim);
+- }
+- mutex_unlock(&dir->d_inode->i_mutex);
+-
+- return res;
++ sysfs_put(victim_sd);
++ return rc;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
+
+@@ -577,7 +546,7 @@
+
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
+- sysfs_hash_and_remove(kobj->dentry, attr->name);
++ sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
+ }
+
+
+@@ -590,12 +559,12 @@
+ void sysfs_remove_file_from_group(struct kobject *kobj,
+ const struct attribute *attr, const char *group)
+ {
+- struct dentry *dir;
++ struct sysfs_dirent *dir_sd;
+
+- dir = lookup_one_len(group, kobj->dentry, strlen(group));
+- if (!IS_ERR(dir)) {
+- sysfs_hash_and_remove(dir, attr->name);
+- dput(dir);
++ dir_sd = sysfs_get_dirent(kobj->sd, group);
++ if (dir_sd) {
++ sysfs_hash_and_remove(kobj, dir_sd, attr->name);
++ sysfs_put(dir_sd);
+ }
+ }
+ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
+diff -Nurb linux-2.6.22-570/fs/sysfs/group.c linux-2.6.22-591/fs/sysfs/group.c
+--- linux-2.6.22-570/fs/sysfs/group.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/group.c 2007-12-21 15:36:14.000000000 -0500
+@@ -13,31 +13,29 @@
+ #include <linux/dcache.h>
+ #include <linux/namei.h>
+ #include <linux/err.h>
+-#include <linux/fs.h>
+ #include <asm/semaphore.h>
+ #include "sysfs.h"
+
+
+-static void remove_files(struct dentry * dir,
+- const struct attribute_group * grp)
++static void remove_files(struct kobject *kobj, struct sysfs_dirent *dir_sd,
++ const struct attribute_group *grp)
+ {
+ struct attribute *const* attr;
+
+ for (attr = grp->attrs; *attr; attr++)
+- sysfs_hash_and_remove(dir,(*attr)->name);
++ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
+ }
+
+-static int create_files(struct dentry * dir,
+- const struct attribute_group * grp)
++static int create_files(struct kobject *kobj, struct sysfs_dirent *dir_sd,
++ const struct attribute_group *grp)
+ {
+ struct attribute *const* attr;
+ int error = 0;
+
+- for (attr = grp->attrs; *attr && !error; attr++) {
+- error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR);
+- }
++ for (attr = grp->attrs; *attr && !error; attr++)
++ error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+ if (error)
+- remove_files(dir,grp);
++ remove_files(kobj, dir_sd, grp);
+ return error;
+ }
+
+@@ -45,44 +43,44 @@
+ int sysfs_create_group(struct kobject * kobj,
+ const struct attribute_group * grp)
+ {
+- struct dentry * dir;
++ struct sysfs_dirent *sd;
+ int error;
+
+- BUG_ON(!kobj || !kobj->dentry);
++ BUG_ON(!kobj || !kobj->sd);
+
+ if (grp->name) {
+- error = sysfs_create_subdir(kobj,grp->name,&dir);
++ error = sysfs_create_subdir(kobj, grp->name, &sd);
+ if (error)
+ return error;
+ } else
+- dir = kobj->dentry;
+- dir = dget(dir);
+- if ((error = create_files(dir,grp))) {
++ sd = kobj->sd;
++ sysfs_get(sd);
++ error = create_files(kobj, sd, grp);
++ if (error) {
+ if (grp->name)
+- sysfs_remove_subdir(dir);
++ sysfs_remove_subdir(sd);
+ }
+- dput(dir);
++ sysfs_put(sd);
+ return error;
+ }
+
+ void sysfs_remove_group(struct kobject * kobj,
+ const struct attribute_group * grp)
+ {
+- struct dentry * dir;
++ struct sysfs_dirent *dir_sd = kobj->sd;
++ struct sysfs_dirent *sd;
+
+ if (grp->name) {
+- dir = lookup_one_len_kern(grp->name, kobj->dentry,
+- strlen(grp->name));
+- BUG_ON(IS_ERR(dir));
+- }
+- else
+- dir = dget(kobj->dentry);
++ sd = sysfs_get_dirent(dir_sd, grp->name);
++ BUG_ON(!sd);
++ } else
++ sd = sysfs_get(dir_sd);
+
+- remove_files(dir,grp);
++ remove_files(kobj, sd, grp);
+ if (grp->name)
+- sysfs_remove_subdir(dir);
+- /* release the ref. taken in this routine */
+- dput(dir);
++ sysfs_remove_subdir(sd);
++
++ sysfs_put(sd);
+ }
+
+
+diff -Nurb linux-2.6.22-570/fs/sysfs/inode.c linux-2.6.22-591/fs/sysfs/inode.c
+--- linux-2.6.22-570/fs/sysfs/inode.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/inode.c 2007-12-23 01:18:26.000000000 -0500
+@@ -34,16 +34,6 @@
+ .setattr = sysfs_setattr,
+ };
+
+-void sysfs_delete_inode(struct inode *inode)
+-{
+- /* Free the shadowed directory inode operations */
+- if (sysfs_is_shadowed_inode(inode)) {
+- kfree(inode->i_op);
+- inode->i_op = NULL;
+- }
+- return generic_delete_inode(inode);
+-}
+-
+ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
+ {
+ struct inode * inode = dentry->d_inode;
+@@ -133,10 +123,8 @@
+ */
+ static struct lock_class_key sysfs_inode_imutex_key;
+
+-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
++void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
+ {
+- struct inode * inode = new_inode(sysfs_sb);
+- if (inode) {
+ inode->i_blocks = 0;
+ inode->i_mapping->a_ops = &sysfs_aops;
+ inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+@@ -151,169 +139,81 @@
+ */
+ set_inode_attr(inode, sd->s_iattr);
+ } else
+- set_default_inode_attr(inode, mode);
+- }
+- return inode;
+-}
+-
+-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
+-{
+- int error = 0;
+- struct inode * inode = NULL;
+- if (dentry) {
+- if (!dentry->d_inode) {
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- if ((inode = sysfs_new_inode(mode, sd))) {
+- if (dentry->d_parent && dentry->d_parent->d_inode) {
+- struct inode *p_inode = dentry->d_parent->d_inode;
+- p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+- }
+- goto Proceed;
+- }
+- else
+- error = -ENOMEM;
+- } else
+- error = -EEXIST;
+- } else
+- error = -ENOENT;
+- goto Done;
+-
+- Proceed:
+- if (init)
+- error = init(inode);
+- if (!error) {
+- d_instantiate(dentry, inode);
+- if (S_ISDIR(mode))
+- dget(dentry); /* pin only directory dentry in core */
+- } else
+- iput(inode);
+- Done:
+- return error;
++ set_default_inode_attr(inode, sd->s_mode);
+ }
+
+-/*
+- * Get the name for corresponding element represented by the given sysfs_dirent
++/**
++ * sysfs_get_inode - get inode for sysfs_dirent
++ * @sd: sysfs_dirent to allocate inode for
++ *
++ * Get inode for @sd. If such inode doesn't exist, a new inode
++ * is allocated and basics are initialized. New inode is
++ * returned locked.
++ *
++ * LOCKING:
++ * Kernel thread context (may sleep).
++ *
++ * RETURNS:
++ * Pointer to allocated inode on success, NULL on failure.
+ */
+-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
++struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
+ {
+- struct attribute * attr;
+- struct bin_attribute * bin_attr;
+- struct sysfs_symlink * sl;
+-
+- BUG_ON(!sd || !sd->s_element);
+-
+- switch (sd->s_type) {
+- case SYSFS_DIR:
+- /* Always have a dentry so use that */
+- return sd->s_dentry->d_name.name;
+-
+- case SYSFS_KOBJ_ATTR:
+- attr = sd->s_element;
+- return attr->name;
+-
+- case SYSFS_KOBJ_BIN_ATTR:
+- bin_attr = sd->s_element;
+- return bin_attr->attr.name;
+-
+- case SYSFS_KOBJ_LINK:
+- sl = sd->s_element;
+- return sl->link_name;
+- }
+- return NULL;
+-}
++ struct inode *inode;
+
+-static inline void orphan_all_buffers(struct inode *node)
+-{
+- struct sysfs_buffer_collection *set;
+- struct sysfs_buffer *buf;
++ inode = iget_locked(sysfs_sb, sd->s_ino);
++ if (inode && (inode->i_state & I_NEW))
++ sysfs_init_inode(sd, inode);
+
+- mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
+- set = node->i_private;
+- if (set) {
+- list_for_each_entry(buf, &set->associates, associates) {
+- down(&buf->sem);
+- buf->orphaned = 1;
+- up(&buf->sem);
+- }
+- }
+- mutex_unlock(&node->i_mutex);
++ return inode;
+ }
+
+-
+-/*
+- * Unhashes the dentry corresponding to given sysfs_dirent
+- * Called with parent inode's i_mutex held.
++/**
++ * sysfs_instantiate - instantiate dentry
++ * @dentry: dentry to be instantiated
++ * @inode: inode associated with @sd
++ *
++ * Unlock @inode if locked and instantiate @dentry with @inode.
++ *
++ * LOCKING:
++ * None.
+ */
+-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
++void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
+ {
+- struct dentry *dentry = NULL;
+- struct inode *inode;
++ BUG_ON(!dentry || dentry->d_inode);
+
+- /* We're not holding a reference to ->s_dentry dentry but the
+- * field will stay valid as long as sysfs_lock is held.
+- */
+- spin_lock(&sysfs_lock);
+- spin_lock(&dcache_lock);
++ if (inode->i_state & I_NEW)
++ unlock_new_inode(inode);
+
+- /* dget dentry if it's still alive */
+- if (sd->s_dentry && sd->s_dentry->d_inode)
+- dentry = dget_locked(sd->s_dentry);
+-
+- spin_unlock(&dcache_lock);
+- spin_unlock(&sysfs_lock);
+-
+- /* drop dentry */
+- if (dentry) {
+- spin_lock(&dcache_lock);
+- spin_lock(&dentry->d_lock);
+- if (!d_unhashed(dentry) && dentry->d_inode) {
+- inode = dentry->d_inode;
+- spin_lock(&inode->i_lock);
+- __iget(inode);
+- spin_unlock(&inode->i_lock);
+- dget_locked(dentry);
+- __d_drop(dentry);
+- spin_unlock(&dentry->d_lock);
+- spin_unlock(&dcache_lock);
+- simple_unlink(parent->d_inode, dentry);
+- orphan_all_buffers(inode);
+- iput(inode);
+- } else {
+- spin_unlock(&dentry->d_lock);
+- spin_unlock(&dcache_lock);
+- }
+-
+- dput(dentry);
+- }
++ d_instantiate(dentry, inode);
+ }
+
+-int sysfs_hash_and_remove(struct dentry * dir, const char * name)
++int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name)
+ {
+- struct sysfs_dirent * sd;
+- struct sysfs_dirent * parent_sd;
+- int found = 0;
++ struct sysfs_addrm_cxt acxt;
++ struct sysfs_dirent **pos, *sd;
+
+- if (!dir)
++ if (!dir_sd)
+ return -ENOENT;
+
+- if (dir->d_inode == NULL)
+- /* no inode means this hasn't been made visible yet */
+- return -ENOENT;
+
+- parent_sd = dir->d_fsdata;
+- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+- if (!sd->s_element)
++ sysfs_addrm_start(&acxt, dir_sd);
++ if (!sysfs_resolve_for_remove(kobj, &acxt.parent_sd))
++ goto addrm_finish;
++
++ for (pos = &acxt.parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
++ sd = *pos;
++
++ if (!sysfs_type(sd))
+ continue;
+- if (!strcmp(sysfs_get_name(sd), name)) {
+- list_del_init(&sd->s_sibling);
+- sysfs_drop_dentry(sd, dir);
+- sysfs_put(sd);
+- found = 1;
++ if (!strcmp(sd->s_name, name)) {
++ *pos = sd->s_sibling;
++ sd->s_sibling = NULL;
++ sysfs_remove_one(&acxt, sd);
+ break;
+ }
+ }
+- mutex_unlock(&dir->d_inode->i_mutex);
+-
+- return found ? 0 : -ENOENT;
++addrm_finish:
++ if (sysfs_addrm_finish(&acxt))
++ return 0;
++ return -ENOENT;
+ }
+diff -Nurb linux-2.6.22-570/fs/sysfs/mount.c linux-2.6.22-591/fs/sysfs/mount.c
+--- linux-2.6.22-570/fs/sysfs/mount.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/sysfs/mount.c 2007-12-21 15:36:14.000000000 -0500
+@@ -17,28 +17,18 @@
+ struct super_block * sysfs_sb = NULL;
+ struct kmem_cache *sysfs_dir_cachep;
+
+-static void sysfs_clear_inode(struct inode *inode);
+-
+ static const struct super_operations sysfs_ops = {
+ .statfs = simple_statfs,
+- .drop_inode = sysfs_delete_inode,
+- .clear_inode = sysfs_clear_inode,
++ .drop_inode = generic_delete_inode,
+ };
+
+-static struct sysfs_dirent sysfs_root = {
+- .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling),
+- .s_children = LIST_HEAD_INIT(sysfs_root.s_children),
+- .s_element = NULL,
+- .s_type = SYSFS_ROOT,
+- .s_iattr = NULL,
++struct sysfs_dirent sysfs_root = {
++ .s_count = ATOMIC_INIT(1),
++ .s_flags = SYSFS_ROOT,
++ .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+ .s_ino = 1,
+ };
+
+-static void sysfs_clear_inode(struct inode *inode)
+-{
+- kfree(inode->i_private);
+-}
+-
+ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
+ {
+ struct inode *inode;
+@@ -51,17 +41,18 @@
+ sb->s_time_gran = 1;
+ sysfs_sb = sb;
+
+- inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+- &sysfs_root);
+- if (inode) {
++ inode = new_inode(sysfs_sb);
++ if (!inode) {
++ pr_debug("sysfs: could not get root inode\n");
++ return -ENOMEM;
++ }
++
++ sysfs_init_inode(&sysfs_root, inode);
++
+ inode->i_op = &sysfs_dir_inode_operations;
+ inode->i_fop = &sysfs_dir_operations;
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+- } else {
+- pr_debug("sysfs: could not get root inode\n");
+- return -ENOMEM;
+- }
+
+ root = d_alloc_root(inode);
+ if (!root) {
+@@ -69,6 +60,7 @@
+ iput(inode);
+ return -ENOMEM;
+ }
++ sysfs_root.s_dentry = root;
+ root->d_fsdata = &sysfs_root;
+ sb->s_root = root;
+ return 0;
+diff -Nurb linux-2.6.22-570/fs/sysfs/symlink.c linux-2.6.22-591/fs/sysfs/symlink.c
+--- linux-2.6.22-570/fs/sysfs/symlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/symlink.c 2007-12-21 15:36:14.000000000 -0500
+@@ -11,71 +11,49 @@
+
+ #include "sysfs.h"
+
+-static int object_depth(struct kobject * kobj)
++static int object_depth(struct sysfs_dirent *sd)
+ {
+- struct kobject * p = kobj;
+ int depth = 0;
+- do { depth++; } while ((p = p->parent));
++
++ for (; sd->s_parent; sd = sd->s_parent) {
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ continue;
++ depth++;
++ }
++
+ return depth;
+ }
+
+-static int object_path_length(struct kobject * kobj)
++static int object_path_length(struct sysfs_dirent * sd)
+ {
+- struct kobject * p = kobj;
+ int length = 1;
+- do {
+- length += strlen(kobject_name(p)) + 1;
+- p = p->parent;
+- } while (p);
++
++ for (; sd->s_parent; sd = sd->s_parent) {
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ continue;
++ length += strlen(sd->s_name) + 1;
++ }
++
+ return length;
+ }
+
+-static void fill_object_path(struct kobject * kobj, char * buffer, int length)
++static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
+ {
+- struct kobject * p;
+-
++ int cur;
+ --length;
+- for (p = kobj; p; p = p->parent) {
+- int cur = strlen(kobject_name(p));
++ for (; sd->s_parent; sd = sd->s_parent) {
++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++ continue;
++
++ cur = strlen(sd->s_name);
+
+ /* back up enough to print this bus id with '/' */
+ length -= cur;
+- strncpy(buffer + length,kobject_name(p),cur);
++ strncpy(buffer + length, sd->s_name, cur);
+ *(buffer + --length) = '/';
+ }
+ }
+
+-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
+-{
+- struct sysfs_dirent * parent_sd = parent->d_fsdata;
+- struct sysfs_symlink * sl;
+- int error = 0;
+-
+- error = -ENOMEM;
+- sl = kmalloc(sizeof(*sl), GFP_KERNEL);
+- if (!sl)
+- goto exit1;
+-
+- sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+- if (!sl->link_name)
+- goto exit2;
+-
+- strcpy(sl->link_name, name);
+- sl->target_kobj = kobject_get(target);
+-
+- error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
+- SYSFS_KOBJ_LINK);
+- if (!error)
+- return 0;
+-
+- kobject_put(target);
+- kfree(sl->link_name);
+-exit2:
+- kfree(sl);
+-exit1:
+- return error;
+-}
+-
+ /**
+ * sysfs_create_link - create symlink between two objects.
+ * @kobj: object whose directory we're creating the link in.
+@@ -84,29 +62,80 @@
+ */
+ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
+ {
+- struct dentry *dentry = NULL;
+- int error = -EEXIST;
++ struct sysfs_dirent *parent_sd = NULL;
++ struct sysfs_dirent *target_sd = NULL;
++ struct sysfs_dirent *sd = NULL;
++ struct sysfs_addrm_cxt acxt;
++ int error;
+
+ BUG_ON(!name);
+
+ if (!kobj) {
+ if (sysfs_mount && sysfs_mount->mnt_sb)
+- dentry = sysfs_mount->mnt_sb->s_root;
++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
+ } else
+- dentry = kobj->dentry;
++ parent_sd = kobj->sd;
++
++ error = -EFAULT;
++ if (!parent_sd)
++ goto out_put;
++
++ /* target->sd can go away beneath us but is protected with
++ * sysfs_assoc_lock. Fetch target_sd from it.
++ */
++ spin_lock(&sysfs_assoc_lock);
++ if (target->sd)
++ target_sd = sysfs_get(target->sd);
++ spin_unlock(&sysfs_assoc_lock);
++
++ error = -ENOENT;
++ if (!target_sd)
++ goto out_put;
++
++ error = -ENOMEM;
++ sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
++ if (!sd)
++ goto out_put;
++ sd->s_elem.symlink.target_sd = target_sd;
++
++ sysfs_addrm_start(&acxt, parent_sd);
++ if (!sysfs_resolve_for_create(target, &acxt.parent_sd))
++ goto addrm_finish;
++
++ if (!sysfs_find_dirent(acxt.parent_sd, name)) {
++ sysfs_add_one(&acxt, sd);
++ sysfs_link_sibling(sd);
++ }
+
+- if (!dentry)
+- return -EFAULT;
++addrm_finish:
++ if (sysfs_addrm_finish(&acxt))
++ return 0;
+
+- mutex_lock(&dentry->d_inode->i_mutex);
+- if (!sysfs_dirent_exist(dentry->d_fsdata, name))
+- error = sysfs_add_link(dentry, name, target);
+- mutex_unlock(&dentry->d_inode->i_mutex);
++ error = -EEXIST;
++ /* fall through */
++ out_put:
++ sysfs_put(target_sd);
++ sysfs_put(sd);
+ return error;
+ }
+
+
+ /**
++ * sysfs_delete_link - remove symlink in object's directory.
++ * @kobj: object we're acting for.
++ * @targ: object we're pointing to.
++ * @name: name of the symlink to remove.
++ *
++ * Unlike sysfs_remove_link sysfs_delete_link has enough information
++ * to successfully delete symlinks in shadow directories.
++ */
++void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
++ const char *name)
++{
++ sysfs_hash_and_remove(targ, kobj->sd, name);
++}
++
++/**
+ * sysfs_remove_link - remove symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @name: name of the symlink to remove.
+@@ -114,17 +143,33 @@
+
+ void sysfs_remove_link(struct kobject * kobj, const char * name)
+ {
+- sysfs_hash_and_remove(kobj->dentry,name);
++ sysfs_hash_and_remove(kobj, kobj->sd, name);
+ }
+
+-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
+- char *path)
++/**
++ * sysfs_rename_link - rename symlink in object's directory.
++ * @kobj: object we're acting for.
++ * @targ: object we're pointing to.
++ * @old: previous name of the symlink.
++ * @new: new name of the symlink.
++ *
++ * A helper function for the common rename symlink idiom.
++ */
++int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
++ const char *old, const char *new)
++{
++ sysfs_delete_link(kobj, targ, old);
++ return sysfs_create_link(kobj, targ, new);
++}
++
++static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
++ struct sysfs_dirent * target_sd, char *path)
+ {
+ char * s;
+ int depth, size;
+
+- depth = object_depth(kobj);
+- size = object_path_length(target) + depth * 3 - 1;
++ depth = object_depth(parent_sd);
++ size = object_path_length(target_sd) + depth * 3 - 1;
+ if (size > PATH_MAX)
+ return -ENAMETOOLONG;
+
+@@ -133,7 +178,7 @@
+ for (s = path; depth--; s += 3)
+ strcpy(s,"../");
+
+- fill_object_path(target, path, size);
++ fill_object_path(target_sd, path, size);
+ pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+
+ return 0;
+@@ -141,27 +186,16 @@
+
+ static int sysfs_getlink(struct dentry *dentry, char * path)
+ {
+- struct kobject *kobj, *target_kobj;
+- int error = 0;
+-
+- kobj = sysfs_get_kobject(dentry->d_parent);
+- if (!kobj)
+- return -EINVAL;
+-
+- target_kobj = sysfs_get_kobject(dentry);
+- if (!target_kobj) {
+- kobject_put(kobj);
+- return -EINVAL;
+- }
++ struct sysfs_dirent *sd = dentry->d_fsdata;
++ struct sysfs_dirent *parent_sd = sd->s_parent;
++ struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
++ int error;
++
++ mutex_lock(&sysfs_mutex);
++ error = sysfs_get_target_path(parent_sd, target_sd, path);
++ mutex_unlock(&sysfs_mutex);
+
+- down_read(&sysfs_rename_sem);
+- error = sysfs_get_target_path(kobj, target_kobj, path);
+- up_read(&sysfs_rename_sem);
+-
+- kobject_put(kobj);
+- kobject_put(target_kobj);
+ return error;
+-
+ }
+
+ static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+diff -Nurb linux-2.6.22-570/fs/sysfs/sysfs.h linux-2.6.22-591/fs/sysfs/sysfs.h
+--- linux-2.6.22-570/fs/sysfs/sysfs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/sysfs/sysfs.h 2007-12-21 15:36:14.000000000 -0500
+@@ -1,9 +1,40 @@
++struct sysfs_elem_dir {
++ struct kobject * kobj;
++};
++
++struct sysfs_elem_symlink {
++ struct sysfs_dirent * target_sd;
++};
++
++struct sysfs_elem_attr {
++ struct attribute * attr;
++};
++
++struct sysfs_elem_bin_attr {
++ struct bin_attribute * bin_attr;
++};
++
++/*
++ * As long as s_count reference is held, the sysfs_dirent itself is
++ * accessible. Dereferencing s_elem or any other outer entity
++ * requires s_active reference.
++ */
+ struct sysfs_dirent {
+ atomic_t s_count;
+- struct list_head s_sibling;
+- struct list_head s_children;
+- void * s_element;
+- int s_type;
++ atomic_t s_active;
++ struct sysfs_dirent * s_parent;
++ struct sysfs_dirent * s_sibling;
++ struct sysfs_dirent * s_children;
++ const char * s_name;
++
++ union {
++ struct sysfs_elem_dir dir;
++ struct sysfs_elem_symlink symlink;
++ struct sysfs_elem_attr attr;
++ struct sysfs_elem_bin_attr bin_attr;
++ } s_elem;
++
++ unsigned int s_flags;
+ umode_t s_mode;
+ ino_t s_ino;
+ struct dentry * s_dentry;
+@@ -11,30 +42,77 @@
+ atomic_t s_event;
+ };
+
++#define SD_DEACTIVATED_BIAS INT_MIN
++
++struct sysfs_addrm_cxt {
++ struct sysfs_dirent *parent_sd;
++ struct inode *parent_inode;
++ struct sysfs_dirent *removed;
++ int cnt;
++};
++
++/*
++ * A sysfs file which deletes another file when written to need to
++ * write lock the s_active of the victim while its s_active is read
++ * locked for the write operation. Tell lockdep that this is okay.
++ */
++enum sysfs_s_active_class
++{
++ SYSFS_S_ACTIVE_NORMAL, /* file r/w access, etc - default */
++ SYSFS_S_ACTIVE_DEACTIVATE, /* file deactivation */
++};
++
+ extern struct vfsmount * sysfs_mount;
++extern struct sysfs_dirent sysfs_root;
+ extern struct kmem_cache *sysfs_dir_cachep;
+
+-extern void sysfs_delete_inode(struct inode *inode);
+-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
+-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+-
+-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
+-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
+- umode_t, int);
+-
+-extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
+-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
++extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
++extern void sysfs_link_sibling(struct sysfs_dirent *sd);
++extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
++
++extern int sysfs_resolve_for_create(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd);
++extern int sysfs_resolve_for_remove(struct kobject *kobj,
++ struct sysfs_dirent **parent_sd);
++
++extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
++extern void sysfs_put_active(struct sysfs_dirent *sd);
++extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
++extern void sysfs_put_active_two(struct sysfs_dirent *sd);
++extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *parent_sd);
++extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *sd);
++extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
++ struct sysfs_dirent *sd);
++extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
++
++extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
++extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
++extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
++
++extern void release_sysfs_dirent(struct sysfs_dirent * sd);
++extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name);
++extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
++ const unsigned char *name);
++extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
++ int type);
++
++extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
++ const struct attribute *attr, int type);
++extern int sysfs_hash_and_remove(struct kobject *kobj,
++ struct sysfs_dirent *dir_sd, const char *name);
+ extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
+
+-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
+-extern void sysfs_remove_subdir(struct dentry *);
++extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
++ struct sysfs_dirent **p_sd);
++extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
+
+-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
+-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
+ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+
+-extern spinlock_t sysfs_lock;
+-extern struct rw_semaphore sysfs_rename_sem;
++extern spinlock_t sysfs_assoc_lock;
++extern struct mutex sysfs_mutex;
+ extern struct super_block * sysfs_sb;
+ extern const struct file_operations sysfs_dir_operations;
+ extern const struct file_operations sysfs_file_operations;
+@@ -42,73 +120,9 @@
+ extern const struct inode_operations sysfs_dir_inode_operations;
+ extern const struct inode_operations sysfs_symlink_inode_operations;
+
+-struct sysfs_symlink {
+- char * link_name;
+- struct kobject * target_kobj;
+-};
+-
+-struct sysfs_buffer {
+- struct list_head associates;
+- size_t count;
+- loff_t pos;
+- char * page;
+- struct sysfs_ops * ops;
+- struct semaphore sem;
+- int orphaned;
+- int needs_read_fill;
+- int event;
+-};
+-
+-struct sysfs_buffer_collection {
+- struct list_head associates;
+-};
+-
+-static inline struct kobject * to_kobj(struct dentry * dentry)
+-{
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- return ((struct kobject *) sd->s_element);
+-}
+-
+-static inline struct attribute * to_attr(struct dentry * dentry)
+-{
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- return ((struct attribute *) sd->s_element);
+-}
+-
+-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
+-{
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- return ((struct bin_attribute *) sd->s_element);
+-}
+-
+-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
++static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
+ {
+- struct kobject * kobj = NULL;
+-
+- spin_lock(&dcache_lock);
+- if (!d_unhashed(dentry)) {
+- struct sysfs_dirent * sd = dentry->d_fsdata;
+- if (sd->s_type & SYSFS_KOBJ_LINK) {
+- struct sysfs_symlink * sl = sd->s_element;
+- kobj = kobject_get(sl->target_kobj);
+- } else
+- kobj = kobject_get(sd->s_element);
+- }
+- spin_unlock(&dcache_lock);
+-
+- return kobj;
+-}
+-
+-static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
+-{
+- if (sd->s_type & SYSFS_KOBJ_LINK) {
+- struct sysfs_symlink * sl = sd->s_element;
+- kfree(sl->link_name);
+- kobject_put(sl->target_kobj);
+- kfree(sl);
+- }
+- kfree(sd->s_iattr);
+- kmem_cache_free(sysfs_dir_cachep, sd);
++ return sd->s_flags & SYSFS_TYPE_MASK;
+ }
+
+ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
+@@ -122,11 +136,6 @@
+
+ static inline void sysfs_put(struct sysfs_dirent * sd)
+ {
+- if (atomic_dec_and_test(&sd->s_count))
++ if (sd && atomic_dec_and_test(&sd->s_count))
+ release_sysfs_dirent(sd);
+ }
+-
+-static inline int sysfs_is_shadowed_inode(struct inode *inode)
+-{
+- return S_ISDIR(inode->i_mode) && inode->i_op->follow_link;
+-}
+diff -Nurb linux-2.6.22-570/fs/unionfs/Makefile linux-2.6.22-591/fs/unionfs/Makefile
+--- linux-2.6.22-570/fs/unionfs/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,7 @@
++obj-$(CONFIG_UNION_FS) += unionfs.o
++
++unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
++ rdstate.o copyup.o dirhelper.o rename.o unlink.o \
++ lookup.o commonfops.o dirfops.o sioq.o mmap.o
++
++unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
+diff -Nurb linux-2.6.22-570/fs/unionfs/commonfops.c linux-2.6.22-591/fs/unionfs/commonfops.c
+--- linux-2.6.22-570/fs/unionfs/commonfops.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/commonfops.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,748 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * 1) Copyup the file
++ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
++ * stolen from NFS's silly rename
++ */
++static int copyup_deleted_file(struct file *file, struct dentry *dentry,
++ int bstart, int bindex)
++{
++ static unsigned int counter;
++ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
++ const int countersize = sizeof(counter) * 2;
++ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
++ char name[nlen + 1];
++
++ int err;
++ struct dentry *tmp_dentry = NULL;
++ struct dentry *hidden_dentry;
++ struct dentry *hidden_dir_dentry = NULL;
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bstart);
++
++ sprintf(name, ".unionfs%*.*lx",
++ i_inosize, i_inosize, hidden_dentry->d_inode->i_ino);
++
++retry:
++ /*
++ * Loop, looking for an unused temp name to copyup to.
++ *
++ * It's somewhat silly that we look for a free temp tmp name in the
++ * source branch (bstart) instead of the dest branch (bindex), where
++ * the final name will be created. We _will_ catch it if somehow
++ * the name exists in the dest branch, but it'd be nice to catch it
++ * sooner than later.
++ */
++ tmp_dentry = NULL;
++ do {
++ char *suffix = name + nlen - countersize;
++
++ dput(tmp_dentry);
++ counter++;
++ sprintf(suffix, "%*.*x", countersize, countersize, counter);
++
++ printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n",
++ dentry->d_name.name, name);
++
++ tmp_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++ nlen);
++ if (IS_ERR(tmp_dentry)) {
++ err = PTR_ERR(tmp_dentry);
++ goto out;
++ }
++ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
++ dput(tmp_dentry);
++
++ err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
++ bindex, file->f_dentry->d_inode->i_size);
++ if (err == -EEXIST)
++ goto retry;
++ else if (err)
++ goto out;
++
++ /* bring it to the same state as an unlinked file */
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
++ hidden_dir_dentry = lock_parent(hidden_dentry);
++ err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry);
++ unlock_dir(hidden_dir_dentry);
++
++out:
++ return err;
++}
++
++/*
++ * put all references held by upper struct file and free lower file pointer
++ * array
++ */
++static void cleanup_file(struct file *file)
++{
++ int bindex, bstart, bend;
++ struct file **lf;
++ struct super_block *sb = file->f_dentry->d_sb;
++
++ lf = UNIONFS_F(file)->lower_files;
++ bstart = fbstart(file);
++ bend = fbend(file);
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ if (unionfs_lower_file_idx(file, bindex)) {
++ /*
++ * Find new index of matching branch with an open
++ * file, since branches could have been added or
++ * deleted causing the one with open files to shift.
++ */
++ int i; /* holds (possibly) updated branch index */
++ int old_bid;
++
++ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
++ i = branch_id_to_idx(sb, old_bid);
++ if (i < 0)
++ printk(KERN_ERR "unionfs: no superblock for "
++ "file %p\n", file);
++ else {
++ /* decrement count of open files */
++ branchput(sb, i);
++ /*
++ * fput will perform an mntput for us on the
++ * correct branch. Although we're using the
++ * file's old branch configuration, bindex,
++ * which is the old index, correctly points
++ * to the right branch in the file's branch
++ * list. In other words, we're going to
++ * mntput the correct branch even if
++ * branches have been added/removed.
++ */
++ fput(unionfs_lower_file_idx(file, bindex));
++ }
++ }
++ }
++
++ UNIONFS_F(file)->lower_files = NULL;
++ kfree(lf);
++ kfree(UNIONFS_F(file)->saved_branch_ids);
++ /* set to NULL because caller needs to know if to kfree on error */
++ UNIONFS_F(file)->saved_branch_ids = NULL;
++}
++
++/* open all lower files for a given file */
++static int open_all_files(struct file *file)
++{
++ int bindex, bstart, bend, err = 0;
++ struct file *hidden_file;
++ struct dentry *hidden_dentry;
++ struct dentry *dentry = file->f_dentry;
++ struct super_block *sb = dentry->d_sb;
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++
++ dget(hidden_dentry);
++ unionfs_mntget(dentry, bindex);
++ branchget(sb, bindex);
++
++ hidden_file =
++ dentry_open(hidden_dentry,
++ unionfs_lower_mnt_idx(dentry, bindex),
++ file->f_flags);
++ if (IS_ERR(hidden_file)) {
++ err = PTR_ERR(hidden_file);
++ goto out;
++ } else
++ unionfs_set_lower_file_idx(file, bindex, hidden_file);
++ }
++out:
++ return err;
++}
++
++/* open the highest priority file for a given upper file */
++static int open_highest_file(struct file *file, int willwrite)
++{
++ int bindex, bstart, bend, err = 0;
++ struct file *hidden_file;
++ struct dentry *hidden_dentry;
++
++ struct dentry *dentry = file->f_dentry;
++ struct inode *parent_inode = dentry->d_parent->d_inode;
++ struct super_block *sb = dentry->d_sb;
++ size_t inode_size = dentry->d_inode->i_size;
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
++ for (bindex = bstart - 1; bindex >= 0; bindex--) {
++ err = copyup_file(parent_inode, file, bstart, bindex,
++ inode_size);
++ if (!err)
++ break;
++ }
++ atomic_set(&UNIONFS_F(file)->generation,
++ atomic_read(&UNIONFS_I(dentry->d_inode)->
++ generation));
++ goto out;
++ }
++
++ dget(hidden_dentry);
++ unionfs_mntget(dentry, bstart);
++ branchget(sb, bstart);
++ hidden_file = dentry_open(hidden_dentry,
++ unionfs_lower_mnt_idx(dentry, bstart),
++ file->f_flags);
++ if (IS_ERR(hidden_file)) {
++ err = PTR_ERR(hidden_file);
++ goto out;
++ }
++ unionfs_set_lower_file(file, hidden_file);
++ /* Fix up the position. */
++ hidden_file->f_pos = file->f_pos;
++
++ memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
++out:
++ return err;
++}
++
++/* perform a delayed copyup of a read-write file on a read-only branch */
++static int do_delayed_copyup(struct file *file, struct dentry *dentry)
++{
++ int bindex, bstart, bend, err = 0;
++ struct inode *parent_inode = dentry->d_parent->d_inode;
++ loff_t inode_size = file->f_dentry->d_inode->i_size;
++
++ bstart = fbstart(file);
++ bend = fbend(file);
++
++ BUG_ON(!S_ISREG(file->f_dentry->d_inode->i_mode));
++
++ for (bindex = bstart - 1; bindex >= 0; bindex--) {
++ if (!d_deleted(file->f_dentry))
++ err = copyup_file(parent_inode, file, bstart,
++ bindex, inode_size);
++ else
++ err = copyup_deleted_file(file, dentry, bstart,
++ bindex);
++
++ if (!err)
++ break;
++ }
++ if (!err && (bstart > fbstart(file))) {
++ bend = fbend(file);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ if (unionfs_lower_file_idx(file, bindex)) {
++ branchput(dentry->d_sb, bindex);
++ fput(unionfs_lower_file_idx(file, bindex));
++ unionfs_set_lower_file_idx(file, bindex, NULL);
++ }
++ }
++ fbend(file) = bend;
++ }
++ return err;
++}
++
++/*
++ * Revalidate the struct file
++ * @file: file to revalidate
++ * @willwrite: 1 if caller may cause changes to the file; 0 otherwise.
++ */
++int unionfs_file_revalidate(struct file *file, int willwrite)
++{
++ struct super_block *sb;
++ struct dentry *dentry;
++ int sbgen, fgen, dgen;
++ int bstart, bend;
++ int size;
++
++ int err = 0;
++
++ dentry = file->f_dentry;
++ unionfs_lock_dentry(dentry);
++ sb = dentry->d_sb;
++
++ /*
++ * First revalidate the dentry inside struct file,
++ * but not unhashed dentries.
++ */
++ if (!d_deleted(dentry) &&
++ !__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out_nofree;
++ }
++
++ sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
++ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++ fgen = atomic_read(&UNIONFS_F(file)->generation);
++
++ BUG_ON(sbgen > dgen);
++
++ /*
++ * There are two cases we are interested in. The first is if the
++ * generation is lower than the super-block. The second is if
++ * someone has copied up this file from underneath us, we also need
++ * to refresh things.
++ */
++ if (!d_deleted(dentry) &&
++ (sbgen > fgen || dbstart(dentry) != fbstart(file))) {
++ /* First we throw out the existing files. */
++ cleanup_file(file);
++
++ /* Now we reopen the file(s) as in unionfs_open. */
++ bstart = fbstart(file) = dbstart(dentry);
++ bend = fbend(file) = dbend(dentry);
++
++ size = sizeof(struct file *) * sbmax(sb);
++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
++ if (!UNIONFS_F(file)->lower_files) {
++ err = -ENOMEM;
++ goto out;
++ }
++ size = sizeof(int) * sbmax(sb);
++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
++ if (!UNIONFS_F(file)->saved_branch_ids) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ if (S_ISDIR(dentry->d_inode->i_mode)) {
++ /* We need to open all the files. */
++ err = open_all_files(file);
++ if (err)
++ goto out;
++ } else {
++ /* We only open the highest priority branch. */
++ err = open_highest_file(file, willwrite);
++ if (err)
++ goto out;
++ }
++ atomic_set(&UNIONFS_F(file)->generation,
++ atomic_read(&UNIONFS_I(dentry->d_inode)->
++ generation));
++ }
++
++ /* Copyup on the first write to a file on a readonly branch. */
++ if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
++ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
++ is_robranch(dentry)) {
++ printk(KERN_DEBUG "unionfs: Doing delayed copyup of a "
++ "read-write file on a read-only branch.\n");
++ err = do_delayed_copyup(file, dentry);
++ }
++
++out:
++ if (err) {
++ kfree(UNIONFS_F(file)->lower_files);
++ kfree(UNIONFS_F(file)->saved_branch_ids);
++ }
++out_nofree:
++ unionfs_unlock_dentry(dentry);
++ return err;
++}
++
++/* unionfs_open helper function: open a directory */
++static int __open_dir(struct inode *inode, struct file *file)
++{
++ struct dentry *hidden_dentry;
++ struct file *hidden_file;
++ int bindex, bstart, bend;
++
++ bstart = fbstart(file) = dbstart(file->f_dentry);
++ bend = fbend(file) = dbend(file->f_dentry);
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry =
++ unionfs_lower_dentry_idx(file->f_dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++
++ dget(hidden_dentry);
++ unionfs_mntget(file->f_dentry, bindex);
++ hidden_file = dentry_open(hidden_dentry,
++ unionfs_lower_mnt_idx(file->f_dentry,
++ bindex),
++ file->f_flags);
++ if (IS_ERR(hidden_file))
++ return PTR_ERR(hidden_file);
++
++ unionfs_set_lower_file_idx(file, bindex, hidden_file);
++
++ /*
++ * The branchget goes after the open, because otherwise
++ * we would miss the reference on release.
++ */
++ branchget(inode->i_sb, bindex);
++ }
++
++ return 0;
++}
++
++/* unionfs_open helper function: open a file */
++static int __open_file(struct inode *inode, struct file *file)
++{
++ struct dentry *hidden_dentry;
++ struct file *hidden_file;
++ int hidden_flags;
++ int bindex, bstart, bend;
++
++ hidden_dentry = unionfs_lower_dentry(file->f_dentry);
++ hidden_flags = file->f_flags;
++
++ bstart = fbstart(file) = dbstart(file->f_dentry);
++ bend = fbend(file) = dbend(file->f_dentry);
++
++ /*
++ * check for the permission for hidden file. If the error is
++ * COPYUP_ERR, copyup the file.
++ */
++ if (hidden_dentry->d_inode && is_robranch(file->f_dentry)) {
++ /*
++ * if the open will change the file, copy it up otherwise
++ * defer it.
++ */
++ if (hidden_flags & O_TRUNC) {
++ int size = 0;
++ int err = -EROFS;
++
++ /* copyup the file */
++ for (bindex = bstart - 1; bindex >= 0; bindex--) {
++ err = copyup_file(
++ file->f_dentry->d_parent->d_inode,
++ file, bstart, bindex, size);
++ if (!err)
++ break;
++ }
++ return err;
++ } else
++ hidden_flags &= ~(OPEN_WRITE_FLAGS);
++ }
++
++ dget(hidden_dentry);
++
++ /*
++ * dentry_open will decrement mnt refcnt if err.
++ * otherwise fput() will do an mntput() for us upon file close.
++ */
++ unionfs_mntget(file->f_dentry, bstart);
++ hidden_file =
++ dentry_open(hidden_dentry,
++ unionfs_lower_mnt_idx(file->f_dentry, bstart),
++ hidden_flags);
++ if (IS_ERR(hidden_file))
++ return PTR_ERR(hidden_file);
++
++ unionfs_set_lower_file(file, hidden_file);
++ branchget(inode->i_sb, bstart);
++
++ return 0;
++}
++
++int unionfs_open(struct inode *inode, struct file *file)
++{
++ int err = 0;
++ struct file *hidden_file = NULL;
++ struct dentry *dentry = NULL;
++ int bindex = 0, bstart = 0, bend = 0;
++ int size;
++
++ unionfs_read_lock(inode->i_sb);
++
++ file->private_data =
++ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
++ if (!UNIONFS_F(file)) {
++ err = -ENOMEM;
++ goto out_nofree;
++ }
++ fbstart(file) = -1;
++ fbend(file) = -1;
++ atomic_set(&UNIONFS_F(file)->generation,
++ atomic_read(&UNIONFS_I(inode)->generation));
++
++ size = sizeof(struct file *) * sbmax(inode->i_sb);
++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
++ if (!UNIONFS_F(file)->lower_files) {
++ err = -ENOMEM;
++ goto out;
++ }
++ size = sizeof(int) * sbmax(inode->i_sb);
++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
++ if (!UNIONFS_F(file)->saved_branch_ids) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ dentry = file->f_dentry;
++ unionfs_lock_dentry(dentry);
++
++ bstart = fbstart(file) = dbstart(dentry);
++ bend = fbend(file) = dbend(dentry);
++
++ /* increment, so that we can flush appropriately */
++ atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens);
++
++ /*
++ * open all directories and make the unionfs file struct point to
++ * these hidden file structs
++ */
++ if (S_ISDIR(inode->i_mode))
++ err = __open_dir(inode, file); /* open a dir */
++ else
++ err = __open_file(inode, file); /* open a file */
++
++ /* freeing the allocated resources, and fput the opened files */
++ if (err) {
++ atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_file = unionfs_lower_file_idx(file, bindex);
++ if (!hidden_file)
++ continue;
++
++ branchput(file->f_dentry->d_sb, bindex);
++ /* fput calls dput for hidden_dentry */
++ fput(hidden_file);
++ }
++ }
++
++ unionfs_unlock_dentry(dentry);
++
++out:
++ if (err) {
++ kfree(UNIONFS_F(file)->lower_files);
++ kfree(UNIONFS_F(file)->saved_branch_ids);
++ kfree(UNIONFS_F(file));
++ }
++out_nofree:
++ unionfs_read_unlock(inode->i_sb);
++ return err;
++}
++
++/*
++ * release all lower object references & free the file info structure
++ *
++ * No need to grab sb info's rwsem.
++ */
++int unionfs_file_release(struct inode *inode, struct file *file)
++{
++ struct file *hidden_file = NULL;
++ struct unionfs_file_info *fileinfo;
++ struct unionfs_inode_info *inodeinfo;
++ struct super_block *sb = inode->i_sb;
++ int bindex, bstart, bend;
++ int fgen;
++ int err;
++
++ unionfs_read_lock(sb);
++ /*
++ * Yes, we have to revalidate this file even if it's being released.
++ * This is important for open-but-unlinked files, as well as mmap
++ * support.
++ */
++ if ((err = unionfs_file_revalidate(file, 1)))
++ return err;
++ fileinfo = UNIONFS_F(file);
++ BUG_ON(file->f_dentry->d_inode != inode);
++ inodeinfo = UNIONFS_I(inode);
++
++ /* fput all the hidden files */
++ fgen = atomic_read(&fileinfo->generation);
++ bstart = fbstart(file);
++ bend = fbend(file);
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_file = unionfs_lower_file_idx(file, bindex);
++
++ if (hidden_file) {
++ fput(hidden_file);
++ branchput(inode->i_sb, bindex);
++ }
++ }
++ kfree(fileinfo->lower_files);
++ kfree(fileinfo->saved_branch_ids);
++
++ if (fileinfo->rdstate) {
++ fileinfo->rdstate->access = jiffies;
++ printk(KERN_DEBUG "unionfs: saving rdstate with cookie "
++ "%u [%d.%lld]\n",
++ fileinfo->rdstate->cookie,
++ fileinfo->rdstate->bindex,
++ (long long)fileinfo->rdstate->dirpos);
++ spin_lock(&inodeinfo->rdlock);
++ inodeinfo->rdcount++;
++ list_add_tail(&fileinfo->rdstate->cache,
++ &inodeinfo->readdircache);
++ mark_inode_dirty(inode);
++ spin_unlock(&inodeinfo->rdlock);
++ fileinfo->rdstate = NULL;
++ }
++ kfree(fileinfo);
++ return 0;
++}
++
++/* pass the ioctl to the lower fs */
++static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ struct file *hidden_file;
++ int err;
++
++ hidden_file = unionfs_lower_file(file);
++
++ err = security_file_ioctl(hidden_file, cmd, arg);
++ if (err)
++ goto out;
++
++ err = -ENOTTY;
++ if (!hidden_file || !hidden_file->f_op)
++ goto out;
++ if (hidden_file->f_op->unlocked_ioctl) {
++ err = hidden_file->f_op->unlocked_ioctl(hidden_file, cmd, arg);
++ } else if (hidden_file->f_op->ioctl) {
++ lock_kernel();
++ err = hidden_file->f_op->ioctl(hidden_file->f_dentry->d_inode,
++ hidden_file, cmd, arg);
++ unlock_kernel();
++ }
++
++out:
++ return err;
++}
++
++/*
++ * return to user-space the branch indices containing the file in question
++ *
++ * We use fd_set and therefore we are limited to the number of the branches
++ * to FD_SETSIZE, which is currently 1024 - plenty for most people
++ */
++static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ int err = 0;
++ fd_set branchlist;
++
++ int bstart = 0, bend = 0, bindex = 0;
++ struct dentry *dentry, *hidden_dentry;
++
++ dentry = file->f_dentry;
++ unionfs_lock_dentry(dentry);
++ if ((err = unionfs_partial_lookup(dentry)))
++ goto out;
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++
++ FD_ZERO(&branchlist);
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++ if (hidden_dentry->d_inode)
++ FD_SET(bindex, &branchlist);
++ }
++
++ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
++ if (err)
++ err = -EFAULT;
++
++out:
++ unionfs_unlock_dentry(dentry);
++ return err < 0 ? err : bend;
++}
++
++long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ long err;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 1)))
++ goto out;
++
++ /* check if asked for local commands */
++ switch (cmd) {
++ case UNIONFS_IOCTL_INCGEN:
++ /* Increment the superblock generation count */
++ printk("unionfs: incgen ioctl deprecated; "
++ "use \"-o remount,incgen\"\n");
++ err = -ENOSYS;
++ break;
++
++ case UNIONFS_IOCTL_QUERYFILE:
++ /* Return list of branches containing the given file */
++ err = unionfs_ioctl_queryfile(file, cmd, arg);
++ break;
++
++ default:
++ /* pass the ioctl down */
++ err = do_ioctl(file, cmd, arg);
++ break;
++ }
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++int unionfs_flush(struct file *file, fl_owner_t id)
++{
++ int err = 0;
++ struct file *hidden_file = NULL;
++ struct dentry *dentry = file->f_dentry;
++ int bindex, bstart, bend;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 1)))
++ goto out;
++
++ if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens))
++ goto out;
++
++ unionfs_lock_dentry(dentry);
++
++ bstart = fbstart(file);
++ bend = fbend(file);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_file = unionfs_lower_file_idx(file, bindex);
++
++ if (hidden_file && hidden_file->f_op &&
++ hidden_file->f_op->flush) {
++ err = hidden_file->f_op->flush(hidden_file, id);
++ if (err)
++ goto out_lock;
++
++ /* if there are no more refs to the dentry, dput it */
++ if (d_deleted(dentry)) {
++ dput(unionfs_lower_dentry_idx(dentry, bindex));
++ unionfs_set_lower_dentry_idx(dentry, bindex,
++ NULL);
++ }
++ }
++
++ }
++
++out_lock:
++ unionfs_unlock_dentry(dentry);
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/copyup.c linux-2.6.22-591/fs/unionfs/copyup.c
+--- linux-2.6.22-570/fs/unionfs/copyup.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/copyup.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,806 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * For detailed explanation of copyup see:
++ * Documentation/filesystems/unionfs/concepts.txt
++ */
++
++/* forward definitions */
++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry,
++ int bstart, int new_bindex, const char *name,
++ int namelen, struct file **copyup_file,
++ loff_t len);
++static struct dentry *create_parents_named(struct inode *dir,
++ struct dentry *dentry,
++ const char *name, int bindex);
++
++#ifdef CONFIG_UNION_FS_XATTR
++/* copyup all extended attrs for a given dentry */
++static int copyup_xattrs(struct dentry *old_hidden_dentry,
++ struct dentry *new_hidden_dentry)
++{
++ int err = 0;
++ ssize_t list_size = -1;
++ char *name_list = NULL;
++ char *attr_value = NULL;
++ char *name_list_orig = NULL;
++
++ list_size = vfs_listxattr(old_hidden_dentry, NULL, 0);
++
++ if (list_size <= 0) {
++ err = list_size;
++ goto out;
++ }
++
++ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
++ if (!name_list || IS_ERR(name_list)) {
++ err = PTR_ERR(name_list);
++ goto out;
++ }
++ list_size = vfs_listxattr(old_hidden_dentry, name_list, list_size);
++ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
++ if (!attr_value || IS_ERR(attr_value)) {
++ err = PTR_ERR(name_list);
++ goto out;
++ }
++ name_list_orig = name_list;
++ while (*name_list) {
++ ssize_t size;
++
++ /* Lock here since vfs_getxattr doesn't lock for us */
++ mutex_lock(&old_hidden_dentry->d_inode->i_mutex);
++ size = vfs_getxattr(old_hidden_dentry, name_list,
++ attr_value, XATTR_SIZE_MAX);
++ mutex_unlock(&old_hidden_dentry->d_inode->i_mutex);
++ if (size < 0) {
++ err = size;
++ goto out;
++ }
++
++ if (size > XATTR_SIZE_MAX) {
++ err = -E2BIG;
++ goto out;
++ }
++ /* Don't lock here since vfs_setxattr does it for us. */
++ err = vfs_setxattr(new_hidden_dentry, name_list, attr_value,
++ size, 0);
++
++ if (err < 0)
++ goto out;
++ name_list += strlen(name_list) + 1;
++ }
++out:
++ name_list = name_list_orig;
++
++ if (name_list)
++ unionfs_xattr_free(name_list, list_size + 1);
++ if (attr_value)
++ unionfs_xattr_free(attr_value, XATTR_SIZE_MAX);
++ /* It is no big deal if this fails, we just roll with the punches. */
++ if (err == -ENOTSUPP || err == -EOPNOTSUPP)
++ err = 0;
++ return err;
++}
++#endif /* CONFIG_UNION_FS_XATTR */
++
++/* Determine the mode based on the copyup flags, and the existing dentry. */
++static int copyup_permissions(struct super_block *sb,
++ struct dentry *old_hidden_dentry,
++ struct dentry *new_hidden_dentry)
++{
++ struct inode *i = old_hidden_dentry->d_inode;
++ struct iattr newattrs;
++ int err;
++
++ newattrs.ia_atime = i->i_atime;
++ newattrs.ia_mtime = i->i_mtime;
++ newattrs.ia_ctime = i->i_ctime;
++
++ newattrs.ia_gid = i->i_gid;
++ newattrs.ia_uid = i->i_uid;
++
++ newattrs.ia_mode = i->i_mode;
++
++ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
++ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
++ ATTR_GID | ATTR_UID | ATTR_MODE;
++
++ err = notify_change(new_hidden_dentry, &newattrs);
++
++ return err;
++}
++
++int copyup_dentry(struct inode *dir, struct dentry *dentry,
++ int bstart, int new_bindex,
++ struct file **copyup_file, loff_t len)
++{
++ return copyup_named_dentry(dir, dentry, bstart, new_bindex,
++ dentry->d_name.name,
++ dentry->d_name.len, copyup_file, len);
++}
++
++/*
++ * create the new device/file/directory - use copyup_permission to copyup
++ * times, and mode
++ *
++ * if the object being copied up is a regular file, the file is only created,
++ * the contents have to be copied up separately
++ */
++static int __copyup_ndentry(struct dentry *old_hidden_dentry,
++ struct dentry *new_hidden_dentry,
++ struct dentry *new_hidden_parent_dentry,
++ char *symbuf)
++{
++ int err = 0;
++ umode_t old_mode = old_hidden_dentry->d_inode->i_mode;
++ struct sioq_args args;
++
++ if (S_ISDIR(old_mode)) {
++ args.mkdir.parent = new_hidden_parent_dentry->d_inode;
++ args.mkdir.dentry = new_hidden_dentry;
++ args.mkdir.mode = old_mode;
++
++ run_sioq(__unionfs_mkdir, &args);
++ err = args.err;
++ } else if (S_ISLNK(old_mode)) {
++ args.symlink.parent = new_hidden_parent_dentry->d_inode;
++ args.symlink.dentry = new_hidden_dentry;
++ args.symlink.symbuf = symbuf;
++ args.symlink.mode = old_mode;
++
++ run_sioq(__unionfs_symlink, &args);
++ err = args.err;
++ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
++ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
++ args.mknod.parent = new_hidden_parent_dentry->d_inode;
++ args.mknod.dentry = new_hidden_dentry;
++ args.mknod.mode = old_mode;
++ args.mknod.dev = old_hidden_dentry->d_inode->i_rdev;
++
++ run_sioq(__unionfs_mknod, &args);
++ err = args.err;
++ } else if (S_ISREG(old_mode)) {
++ args.create.parent = new_hidden_parent_dentry->d_inode;
++ args.create.dentry = new_hidden_dentry;
++ args.create.mode = old_mode;
++ args.create.nd = NULL;
++
++ run_sioq(__unionfs_create, &args);
++ err = args.err;
++ } else {
++ printk(KERN_ERR "unionfs: unknown inode type %d\n",
++ old_mode);
++ BUG();
++ }
++
++ return err;
++}
++
++static int __copyup_reg_data(struct dentry *dentry,
++ struct dentry *new_hidden_dentry, int new_bindex,
++ struct dentry *old_hidden_dentry, int old_bindex,
++ struct file **copyup_file, loff_t len)
++{
++ struct super_block *sb = dentry->d_sb;
++ struct file *input_file;
++ struct file *output_file;
++ mm_segment_t old_fs;
++ char *buf = NULL;
++ ssize_t read_bytes, write_bytes;
++ loff_t size;
++ int err = 0;
++
++ /* open old file */
++ unionfs_mntget(dentry, old_bindex);
++ branchget(sb, old_bindex);
++ input_file = dentry_open(old_hidden_dentry,
++ unionfs_lower_mnt_idx(dentry, old_bindex),
++ O_RDONLY | O_LARGEFILE);
++ if (IS_ERR(input_file)) {
++ dput(old_hidden_dentry);
++ err = PTR_ERR(input_file);
++ goto out;
++ }
++ if (!input_file->f_op || !input_file->f_op->read) {
++ err = -EINVAL;
++ goto out_close_in;
++ }
++
++ /* open new file */
++ dget(new_hidden_dentry);
++ unionfs_mntget(dentry, new_bindex);
++ branchget(sb, new_bindex);
++ output_file = dentry_open(new_hidden_dentry,
++ unionfs_lower_mnt_idx(dentry, new_bindex),
++ O_WRONLY | O_LARGEFILE);
++ if (IS_ERR(output_file)) {
++ err = PTR_ERR(output_file);
++ goto out_close_in2;
++ }
++ if (!output_file->f_op || !output_file->f_op->write) {
++ err = -EINVAL;
++ goto out_close_out;
++ }
++
++ /* allocating a buffer */
++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!buf) {
++ err = -ENOMEM;
++ goto out_close_out;
++ }
++
++ input_file->f_pos = 0;
++ output_file->f_pos = 0;
++
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++
++ size = len;
++ err = 0;
++ do {
++ if (len >= PAGE_SIZE)
++ size = PAGE_SIZE;
++ else if ((len < PAGE_SIZE) && (len > 0))
++ size = len;
++
++ len -= PAGE_SIZE;
++
++ read_bytes =
++ input_file->f_op->read(input_file,
++ (char __user *)buf, size,
++ &input_file->f_pos);
++ if (read_bytes <= 0) {
++ err = read_bytes;
++ break;
++ }
++
++ write_bytes =
++ output_file->f_op->write(output_file,
++ (char __user *)buf,
++ read_bytes,
++ &output_file->f_pos);
++ if ((write_bytes < 0) || (write_bytes < read_bytes)) {
++ err = write_bytes;
++ break;
++ }
++ } while ((read_bytes > 0) && (len > 0));
++
++ set_fs(old_fs);
++
++ kfree(buf);
++
++ if (!err)
++ err = output_file->f_op->fsync(output_file,
++ new_hidden_dentry, 0);
++
++ if (err)
++ goto out_close_out;
++
++ if (copyup_file) {
++ *copyup_file = output_file;
++ goto out_close_in;
++ }
++
++out_close_out:
++ fput(output_file);
++
++out_close_in2:
++ branchput(sb, new_bindex);
++
++out_close_in:
++ fput(input_file);
++
++out:
++ branchput(sb, old_bindex);
++
++ return err;
++}
++
++/*
++ * dput the lower references for old and new dentry & clear a lower dentry
++ * pointer
++ */
++static void __clear(struct dentry *dentry, struct dentry *old_hidden_dentry,
++ int old_bstart, int old_bend,
++ struct dentry *new_hidden_dentry, int new_bindex)
++{
++ /* get rid of the hidden dentry and all its traces */
++ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
++ set_dbstart(dentry, old_bstart);
++ set_dbend(dentry, old_bend);
++
++ dput(new_hidden_dentry);
++ dput(old_hidden_dentry);
++}
++
++/* copy up a dentry to a file of specified name */
++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry,
++ int bstart, int new_bindex, const char *name,
++ int namelen, struct file **copyup_file,
++ loff_t len)
++{
++ struct dentry *new_hidden_dentry;
++ struct dentry *old_hidden_dentry = NULL;
++ struct super_block *sb;
++ int err = 0;
++ int old_bindex;
++ int old_bstart;
++ int old_bend;
++ struct dentry *new_hidden_parent_dentry = NULL;
++ mm_segment_t oldfs;
++ char *symbuf = NULL;
++
++ verify_locked(dentry);
++
++ old_bindex = bstart;
++ old_bstart = dbstart(dentry);
++ old_bend = dbend(dentry);
++
++ BUG_ON(new_bindex < 0);
++ BUG_ON(new_bindex >= old_bindex);
++
++ sb = dir->i_sb;
++
++ if ((err = is_robranch_super(sb, new_bindex)))
++ goto out;
++
++ /* Create the directory structure above this dentry. */
++ new_hidden_dentry =
++ create_parents_named(dir, dentry, name, new_bindex);
++ if (IS_ERR(new_hidden_dentry)) {
++ err = PTR_ERR(new_hidden_dentry);
++ goto out;
++ }
++
++ old_hidden_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
++ /* we conditionally dput this old_hidden_dentry at end of function */
++ dget(old_hidden_dentry);
++
++ /* For symlinks, we must read the link before we lock the directory. */
++ if (S_ISLNK(old_hidden_dentry->d_inode->i_mode)) {
++
++ symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
++ if (!symbuf) {
++ __clear(dentry, old_hidden_dentry,
++ old_bstart, old_bend,
++ new_hidden_dentry, new_bindex);
++ err = -ENOMEM;
++ goto out_free;
++ }
++
++ oldfs = get_fs();
++ set_fs(KERNEL_DS);
++ err = old_hidden_dentry->d_inode->i_op->readlink(
++ old_hidden_dentry,
++ (char __user *)symbuf,
++ PATH_MAX);
++ set_fs(oldfs);
++ if (err) {
++ __clear(dentry, old_hidden_dentry,
++ old_bstart, old_bend,
++ new_hidden_dentry, new_bindex);
++ goto out_free;
++ }
++ symbuf[err] = '\0';
++ }
++
++ /* Now we lock the parent, and create the object in the new branch. */
++ new_hidden_parent_dentry = lock_parent(new_hidden_dentry);
++
++ /* create the new inode */
++ err = __copyup_ndentry(old_hidden_dentry, new_hidden_dentry,
++ new_hidden_parent_dentry, symbuf);
++
++ if (err) {
++ __clear(dentry, old_hidden_dentry,
++ old_bstart, old_bend,
++ new_hidden_dentry, new_bindex);
++ goto out_unlock;
++ }
++
++ /* We actually copyup the file here. */
++ if (S_ISREG(old_hidden_dentry->d_inode->i_mode))
++ err = __copyup_reg_data(dentry, new_hidden_dentry, new_bindex,
++ old_hidden_dentry, old_bindex,
++ copyup_file, len);
++ if (err)
++ goto out_unlink;
++
++ /* Set permissions. */
++ if ((err = copyup_permissions(sb, old_hidden_dentry,
++ new_hidden_dentry)))
++ goto out_unlink;
++
++#ifdef CONFIG_UNION_FS_XATTR
++ /* Selinux uses extended attributes for permissions. */
++ if ((err = copyup_xattrs(old_hidden_dentry, new_hidden_dentry)))
++ goto out_unlink;
++#endif
++
++ /* do not allow files getting deleted to be re-interposed */
++ if (!d_deleted(dentry))
++ unionfs_reinterpose(dentry);
++
++ goto out_unlock;
++
++out_unlink:
++ /*
++ * copyup failed, because we possibly ran out of space or
++ * quota, or something else happened so let's unlink; we don't
++ * really care about the return value of vfs_unlink
++ */
++ vfs_unlink(new_hidden_parent_dentry->d_inode, new_hidden_dentry);
++
++ if (copyup_file) {
++ /* need to close the file */
++
++ fput(*copyup_file);
++ branchput(sb, new_bindex);
++ }
++
++ /*
++ * TODO: should we reset the error to something like -EIO?
++ *
++ * If we don't reset, the user may get some nonsensical errors, but
++ * on the other hand, if we reset to EIO, we guarantee that the user
++ * will get a "confusing" error message.
++ */
++
++out_unlock:
++ unlock_dir(new_hidden_parent_dentry);
++
++out_free:
++ /*
++ * If old_hidden_dentry was a directory, we need to dput it. If it
++ * was a file, then it was already dput indirectly by other
++ * functions we call above which operate on regular files.
++ */
++ if (old_hidden_dentry && old_hidden_dentry->d_inode &&
++ S_ISDIR(old_hidden_dentry->d_inode->i_mode))
++ dput(old_hidden_dentry);
++ kfree(symbuf);
++
++out:
++ return err;
++}
++
++/*
++ * This function creates a copy of a file represented by 'file' which
++ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
++ * will be named "name".
++ */
++int copyup_named_file(struct inode *dir, struct file *file, char *name,
++ int bstart, int new_bindex, loff_t len)
++{
++ int err = 0;
++ struct file *output_file = NULL;
++
++ err = copyup_named_dentry(dir, file->f_dentry, bstart,
++ new_bindex, name, strlen(name), &output_file,
++ len);
++ if (!err) {
++ fbstart(file) = new_bindex;
++ unionfs_set_lower_file_idx(file, new_bindex, output_file);
++ }
++
++ return err;
++}
++
++/*
++ * This function creates a copy of a file represented by 'file' which
++ * currently resides in branch 'bstart' to branch 'new_bindex'.
++ */
++int copyup_file(struct inode *dir, struct file *file, int bstart,
++ int new_bindex, loff_t len)
++{
++ int err = 0;
++ struct file *output_file = NULL;
++
++ err = copyup_dentry(dir, file->f_dentry, bstart, new_bindex,
++ &output_file, len);
++ if (!err) {
++ fbstart(file) = new_bindex;
++ unionfs_set_lower_file_idx(file, new_bindex, output_file);
++ }
++
++ return err;
++}
++
++/*
++ * This function replicates the directory structure up-to given dentry in the
++ * bindex branch. Can create directory structure recursively to the right
++ * also.
++ */
++struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
++ int bindex)
++{
++ return create_parents_named(dir, dentry, dentry->d_name.name, bindex);
++}
++
++/* purge a dentry's lower-branch states (dput/mntput, etc.) */
++static void __cleanup_dentry(struct dentry *dentry, int bindex,
++ int old_bstart, int old_bend)
++{
++ int loop_start;
++ int loop_end;
++ int new_bstart = -1;
++ int new_bend = -1;
++ int i;
++
++ loop_start = min(old_bstart, bindex);
++ loop_end = max(old_bend, bindex);
++
++ /*
++ * This loop sets the bstart and bend for the new dentry by
++ * traversing from left to right. It also dputs all negative
++ * dentries except bindex
++ */
++ for (i = loop_start; i <= loop_end; i++) {
++ if (!unionfs_lower_dentry_idx(dentry, i))
++ continue;
++
++ if (i == bindex) {
++ new_bend = i;
++ if (new_bstart < 0)
++ new_bstart = i;
++ continue;
++ }
++
++ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
++ dput(unionfs_lower_dentry_idx(dentry, i));
++ unionfs_set_lower_dentry_idx(dentry, i, NULL);
++
++ unionfs_mntput(dentry, i);
++ unionfs_set_lower_mnt_idx(dentry, i, NULL);
++ } else {
++ if (new_bstart < 0)
++ new_bstart = i;
++ new_bend = i;
++ }
++ }
++
++ if (new_bstart < 0)
++ new_bstart = bindex;
++ if (new_bend < 0)
++ new_bend = bindex;
++ set_dbstart(dentry, new_bstart);
++ set_dbend(dentry, new_bend);
++
++}
++
++/* set lower inode ptr and update bstart & bend if necessary */
++static void __set_inode(struct dentry *upper, struct dentry *lower,
++ int bindex)
++{
++ unionfs_set_lower_inode_idx(upper->d_inode, bindex,
++ igrab(lower->d_inode));
++ if (likely(ibstart(upper->d_inode) > bindex))
++ ibstart(upper->d_inode) = bindex;
++ if (likely(ibend(upper->d_inode) < bindex))
++ ibend(upper->d_inode) = bindex;
++
++}
++
++/* set lower dentry ptr and update bstart & bend if necessary */
++static void __set_dentry(struct dentry *upper, struct dentry *lower,
++ int bindex)
++{
++ unionfs_set_lower_dentry_idx(upper, bindex, lower);
++ if (likely(dbstart(upper) > bindex))
++ set_dbstart(upper, bindex);
++ if (likely(dbend(upper) < bindex))
++ set_dbend(upper, bindex);
++}
++
++/*
++ * This function replicates the directory structure up-to given dentry
++ * in the bindex branch.
++ */
++static struct dentry *create_parents_named(struct inode *dir,
++ struct dentry *dentry,
++ const char *name, int bindex)
++{
++ int err;
++ struct dentry *child_dentry;
++ struct dentry *parent_dentry;
++ struct dentry *hidden_parent_dentry = NULL;
++ struct dentry *hidden_dentry = NULL;
++ const char *childname;
++ unsigned int childnamelen;
++
++ int nr_dentry;
++ int count = 0;
++
++ int old_bstart;
++ int old_bend;
++ struct dentry **path = NULL;
++ struct super_block *sb;
++
++ verify_locked(dentry);
++
++ if ((err = is_robranch_super(dir->i_sb, bindex))) {
++ hidden_dentry = ERR_PTR(err);
++ goto out;
++ }
++
++ old_bstart = dbstart(dentry);
++ old_bend = dbend(dentry);
++
++ hidden_dentry = ERR_PTR(-ENOMEM);
++
++ /* There is no sense allocating any less than the minimum. */
++ nr_dentry = 1;
++ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
++ if (!path)
++ goto out;
++
++ /* assume the negative dentry of unionfs as the parent dentry */
++ parent_dentry = dentry;
++
++ /*
++ * This loop finds the first parent that exists in the given branch.
++ * We start building the directory structure from there. At the end
++ * of the loop, the following should hold:
++ * - child_dentry is the first nonexistent child
++ * - parent_dentry is the first existent parent
++ * - path[0] is the = deepest child
++ * - path[count] is the first child to create
++ */
++ do {
++ child_dentry = parent_dentry;
++
++ /* find the parent directory dentry in unionfs */
++ parent_dentry = child_dentry->d_parent;
++ unionfs_lock_dentry(parent_dentry);
++
++ /* find out the hidden_parent_dentry in the given branch */
++ hidden_parent_dentry =
++ unionfs_lower_dentry_idx(parent_dentry, bindex);
++
++ /* grow path table */
++ if (count == nr_dentry) {
++ void *p;
++
++ nr_dentry *= 2;
++ p = krealloc(path, nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
++ if (!p) {
++ hidden_dentry = ERR_PTR(-ENOMEM);
++ goto out;
++ }
++ path = p;
++ }
++
++ /* store the child dentry */
++ path[count++] = child_dentry;
++ } while (!hidden_parent_dentry);
++ count--;
++
++ sb = dentry->d_sb;
++
++ /*
++ * This is basically while(child_dentry != dentry). This loop is
++ * horrible to follow and should be replaced with cleaner code.
++ */
++ while (1) {
++ /* get hidden parent dir in the current branch */
++ hidden_parent_dentry =
++ unionfs_lower_dentry_idx(parent_dentry, bindex);
++ unionfs_unlock_dentry(parent_dentry);
++
++ /* init the values to lookup */
++ childname = child_dentry->d_name.name;
++ childnamelen = child_dentry->d_name.len;
++
++ if (child_dentry != dentry) {
++ /* lookup child in the underlying file system */
++ hidden_dentry =
++ lookup_one_len(childname, hidden_parent_dentry,
++ childnamelen);
++ if (IS_ERR(hidden_dentry))
++ goto out;
++ } else {
++
++ /*
++ * is the name a whiteout of the child name ?
++ * lookup the whiteout child in the underlying file
++ * system
++ */
++ hidden_dentry =
++ lookup_one_len(name, hidden_parent_dentry,
++ strlen(name));
++ if (IS_ERR(hidden_dentry))
++ goto out;
++
++ /*
++ * Replace the current dentry (if any) with the new
++ * one.
++ */
++ dput(unionfs_lower_dentry_idx(dentry, bindex));
++ unionfs_set_lower_dentry_idx(dentry, bindex,
++ hidden_dentry);
++
++ __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
++ break;
++ }
++
++ if (hidden_dentry->d_inode) {
++ /*
++ * since this already exists we dput to avoid
++ * multiple references on the same dentry
++ */
++ dput(hidden_dentry);
++ } else {
++ struct sioq_args args;
++
++ /* its a negative dentry, create a new dir */
++ hidden_parent_dentry = lock_parent(hidden_dentry);
++
++ args.mkdir.parent = hidden_parent_dentry->d_inode;
++ args.mkdir.dentry = hidden_dentry;
++ args.mkdir.mode = child_dentry->d_inode->i_mode;
++
++ run_sioq(__unionfs_mkdir, &args);
++ err = args.err;
++
++ if (!err)
++ err = copyup_permissions(dir->i_sb,
++ child_dentry,
++ hidden_dentry);
++ unlock_dir(hidden_parent_dentry);
++ if (err) {
++ struct inode *inode = hidden_dentry->d_inode;
++ /*
++ * If we get here, it means that we created a new
++ * dentry+inode, but copying permissions failed.
++ * Therefore, we should delete this inode and dput
++ * the dentry so as not to leave cruft behind.
++ *
++ * XXX: call dentry_iput() instead, but then we have
++ * to export that symbol.
++ */
++ if (hidden_dentry->d_op && hidden_dentry->d_op->d_iput)
++ hidden_dentry->d_op->d_iput(hidden_dentry,
++ inode);
++ else
++ iput(inode);
++ hidden_dentry->d_inode = NULL;
++
++ dput(hidden_dentry);
++ hidden_dentry = ERR_PTR(err);
++ goto out;
++ }
++
++ }
++
++ __set_inode(child_dentry, hidden_dentry, bindex);
++ __set_dentry(child_dentry, hidden_dentry, bindex);
++
++ parent_dentry = child_dentry;
++ child_dentry = path[--count];
++ }
++out:
++ /* cleanup any leftover locks from the do/while loop above */
++ if (IS_ERR(hidden_dentry))
++ while (count)
++ unionfs_unlock_dentry(path[count--]);
++ kfree(path);
++ return hidden_dentry;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/dentry.c linux-2.6.22-591/fs/unionfs/dentry.c
+--- linux-2.6.22-570/fs/unionfs/dentry.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/dentry.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,353 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Revalidate a single dentry.
++ * Assume that dentry's info node is locked.
++ * Assume that parent(s) are all valid already, but
++ * the child may not yet be valid.
++ * Returns 1 if valid, 0 otherwise.
++ */
++static int __unionfs_d_revalidate_one(struct dentry *dentry,
++ struct nameidata *nd)
++{
++ int valid = 1; /* default is valid (1); invalid is 0. */
++ struct dentry *hidden_dentry;
++ int bindex, bstart, bend;
++ int sbgen, dgen;
++ int positive = 0;
++ int locked = 0;
++ int interpose_flag;
++
++ struct nameidata lowernd; /* TODO: be gentler to the stack */
++
++ if (nd)
++ memcpy(&lowernd, nd, sizeof(struct nameidata));
++ else
++ memset(&lowernd, 0, sizeof(struct nameidata));
++
++ verify_locked(dentry);
++
++ /* if the dentry is unhashed, do NOT revalidate */
++ if (d_deleted(dentry)) {
++ printk(KERN_DEBUG "unionfs: unhashed dentry being "
++ "revalidated: %*s\n",
++ dentry->d_name.len, dentry->d_name.name);
++ goto out;
++ }
++
++ BUG_ON(dbstart(dentry) == -1);
++ if (dentry->d_inode)
++ positive = 1;
++ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++ /*
++ * If we are working on an unconnected dentry, then there is no
++ * revalidation to be done, because this file does not exist within
++ * the namespace, and Unionfs operates on the namespace, not data.
++ */
++ if (sbgen != dgen) {
++ struct dentry *result;
++ int pdgen;
++
++ /* The root entry should always be valid */
++ BUG_ON(IS_ROOT(dentry));
++
++ /* We can't work correctly if our parent isn't valid. */
++ pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
++ BUG_ON(pdgen != sbgen); /* should never happen here */
++
++ /* Free the pointers for our inodes and this dentry. */
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ if (bstart >= 0) {
++ struct dentry *hidden_dentry;
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry =
++ unionfs_lower_dentry_idx(dentry,
++ bindex);
++ dput(hidden_dentry);
++ }
++ }
++ set_dbstart(dentry, -1);
++ set_dbend(dentry, -1);
++
++ interpose_flag = INTERPOSE_REVAL_NEG;
++ if (positive) {
++ interpose_flag = INTERPOSE_REVAL;
++ /*
++ * During BRM, the VFS could already hold a lock on
++ * a file being read, so don't lock it again
++ * (deadlock), but if you lock it in this function,
++ * then release it here too.
++ */
++ if (!mutex_is_locked(&dentry->d_inode->i_mutex)) {
++ mutex_lock(&dentry->d_inode->i_mutex);
++ locked = 1;
++ }
++
++ bstart = ibstart(dentry->d_inode);
++ bend = ibend(dentry->d_inode);
++ if (bstart >= 0) {
++ struct inode *hidden_inode;
++ for (bindex = bstart; bindex <= bend;
++ bindex++) {
++ hidden_inode =
++ unionfs_lower_inode_idx(
++ dentry->d_inode,
++ bindex);
++ iput(hidden_inode);
++ }
++ }
++ kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
++ UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
++ ibstart(dentry->d_inode) = -1;
++ ibend(dentry->d_inode) = -1;
++ if (locked)
++ mutex_unlock(&dentry->d_inode->i_mutex);
++ }
++
++ result = unionfs_lookup_backend(dentry, &lowernd,
++ interpose_flag);
++ if (result) {
++ if (IS_ERR(result)) {
++ valid = 0;
++ goto out;
++ }
++ /*
++ * current unionfs_lookup_backend() doesn't return
++ * a valid dentry
++ */
++ dput(dentry);
++ dentry = result;
++ }
++
++ if (positive && UNIONFS_I(dentry->d_inode)->stale) {
++ make_bad_inode(dentry->d_inode);
++ d_drop(dentry);
++ valid = 0;
++ goto out;
++ }
++ goto out;
++ }
++
++ /* The revalidation must occur across all branches */
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ BUG_ON(bstart == -1);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry || !hidden_dentry->d_op
++ || !hidden_dentry->d_op->d_revalidate)
++ continue;
++ if (!hidden_dentry->d_op->d_revalidate(hidden_dentry,
++ &lowernd))
++ valid = 0;
++ }
++
++ if (!dentry->d_inode)
++ valid = 0;
++
++ if (valid) {
++ fsstack_copy_attr_all(dentry->d_inode,
++ unionfs_lower_inode(dentry->d_inode),
++ unionfs_get_nlinks);
++ fsstack_copy_inode_size(dentry->d_inode,
++ unionfs_lower_inode(dentry->d_inode));
++ }
++
++out:
++ return valid;
++}
++
++/*
++ * Revalidate a parent chain of dentries, then the actual node.
++ * Assumes that dentry is locked, but will lock all parents if/when needed.
++ */
++int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd)
++{
++ int valid = 0; /* default is invalid (0); valid is 1. */
++ struct dentry **chain = NULL; /* chain of dentries to reval */
++ int chain_len = 0;
++ struct dentry *dtmp;
++ int sbgen, dgen, i;
++ int saved_bstart, saved_bend, bindex;
++
++ /* find length of chain needed to revalidate */
++ /* XXX: should I grab some global (dcache?) lock? */
++ chain_len = 0;
++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++ dtmp = dentry->d_parent;
++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
++ while (sbgen != dgen) {
++ /* The root entry should always be valid */
++ BUG_ON(IS_ROOT(dtmp));
++ chain_len++;
++ dtmp = dtmp->d_parent;
++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
++ }
++ if (chain_len == 0)
++ goto out_this; /* shortcut if parents are OK */
++
++ /*
++ * Allocate array of dentries to reval. We could use linked lists,
++ * but the number of entries we need to alloc here is often small,
++ * and short lived, so locality will be better.
++ */
++ chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
++ if (!chain) {
++ printk("unionfs: no more memory in %s\n", __FUNCTION__);
++ goto out;
++ }
++
++ /*
++ * lock all dentries in chain, in child to parent order.
++ * if failed, then sleep for a little, then retry.
++ */
++ dtmp = dentry->d_parent;
++ for (i=chain_len-1; i>=0; i--) {
++ chain[i] = dget(dtmp);
++ dtmp = dtmp->d_parent;
++ }
++
++ /*
++ * call __unionfs_d_revalidate() on each dentry, but in parent to
++ * child order.
++ */
++ for (i=0; i<chain_len; i++) {
++ unionfs_lock_dentry(chain[i]);
++ saved_bstart = dbstart(chain[i]);
++ saved_bend = dbend(chain[i]);
++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++ dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
++
++ valid = __unionfs_d_revalidate_one(chain[i], nd);
++ /* XXX: is this the correct mntput condition?! */
++ if (valid && chain_len > 0 &&
++ sbgen != dgen && chain[i]->d_inode &&
++ S_ISDIR(chain[i]->d_inode->i_mode)) {
++ for (bindex = saved_bstart; bindex <= saved_bend;
++ bindex++)
++ unionfs_mntput(chain[i], bindex);
++ }
++ unionfs_unlock_dentry(chain[i]);
++
++ if (!valid)
++ goto out_free;
++ }
++
++
++out_this:
++ /* finally, lock this dentry and revalidate it */
++ verify_locked(dentry);
++ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++ valid = __unionfs_d_revalidate_one(dentry, nd);
++
++ /*
++ * If __unionfs_d_revalidate_one() succeeded above, then it will
++ * have incremented the refcnt of the mnt's, but also the branch
++ * indices of the dentry will have been updated (to take into
++ * account any branch insertions/deletion. So the current
++ * dbstart/dbend match the current, and new, indices of the mnts
++ * which __unionfs_d_revalidate_one has incremented. Note: the "if"
++ * test below does not depend on whether chain_len was 0 or greater.
++ */
++ if (valid && sbgen != dgen)
++ for (bindex = dbstart(dentry);
++ bindex <= dbend(dentry);
++ bindex++)
++ unionfs_mntput(dentry, bindex);
++
++out_free:
++ /* unlock/dput all dentries in chain and return status */
++ if (chain_len > 0) {
++ for (i=0; i<chain_len; i++)
++ dput(chain[i]);
++ kfree(chain);
++ }
++out:
++ return valid;
++}
++
++static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++ int err;
++
++ unionfs_read_lock(dentry->d_sb);
++
++ unionfs_lock_dentry(dentry);
++ err = __unionfs_d_revalidate_chain(dentry, nd);
++ unionfs_unlock_dentry(dentry);
++
++ unionfs_read_unlock(dentry->d_sb);
++
++ return err;
++}
++
++/*
++ * At this point no one can reference this dentry, so we don't have to be
++ * careful about concurrent access.
++ */
++static void unionfs_d_release(struct dentry *dentry)
++{
++ int bindex, bstart, bend;
++
++ unionfs_read_lock(dentry->d_sb);
++
++ /* this could be a negative dentry, so check first */
++ if (!UNIONFS_D(dentry)) {
++ printk(KERN_DEBUG "unionfs: dentry without private data: %.*s",
++ dentry->d_name.len, dentry->d_name.name);
++ goto out;
++ } else if (dbstart(dentry) < 0) {
++ /* this is due to a failed lookup */
++ printk(KERN_DEBUG "unionfs: dentry without hidden "
++ "dentries: %.*s",
++ dentry->d_name.len, dentry->d_name.name);
++ goto out_free;
++ }
++
++ /* Release all the hidden dentries */
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ dput(unionfs_lower_dentry_idx(dentry, bindex));
++ unionfs_mntput(dentry, bindex);
++
++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
++ }
++ /* free private data (unionfs_dentry_info) here */
++ kfree(UNIONFS_D(dentry)->lower_paths);
++ UNIONFS_D(dentry)->lower_paths = NULL;
++
++out_free:
++ /* No need to unlock it, because it is disappeared. */
++ free_dentry_private_data(dentry);
++
++out:
++ unionfs_read_unlock(dentry->d_sb);
++ return;
++}
++
++struct dentry_operations unionfs_dops = {
++ .d_revalidate = unionfs_d_revalidate,
++ .d_release = unionfs_d_release,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/dirfops.c linux-2.6.22-591/fs/unionfs/dirfops.c
+--- linux-2.6.22-570/fs/unionfs/dirfops.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/dirfops.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,276 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* Make sure our rdstate is playing by the rules. */
++static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
++{
++ BUG_ON(rdstate->offset >= DIREOF);
++ BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
++}
++
++struct unionfs_getdents_callback {
++ struct unionfs_dir_state *rdstate;
++ void *dirent;
++ int entries_written;
++ int filldir_called;
++ int filldir_error;
++ filldir_t filldir;
++ struct super_block *sb;
++};
++
++/* based on generic filldir in fs/readir.c */
++static int unionfs_filldir(void *dirent, const char *name, int namelen,
++ loff_t offset, u64 ino, unsigned int d_type)
++{
++ struct unionfs_getdents_callback *buf = dirent;
++ struct filldir_node *found = NULL;
++ int err = 0;
++ int is_wh_entry = 0;
++
++ buf->filldir_called++;
++
++ if ((namelen > UNIONFS_WHLEN) &&
++ !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
++ name += UNIONFS_WHLEN;
++ namelen -= UNIONFS_WHLEN;
++ is_wh_entry = 1;
++ }
++
++ found = find_filldir_node(buf->rdstate, name, namelen);
++
++ if (found)
++ goto out;
++
++ /* if 'name' isn't a whiteout, filldir it. */
++ if (!is_wh_entry) {
++ off_t pos = rdstate2offset(buf->rdstate);
++ u64 unionfs_ino = ino;
++
++ if (!err) {
++ err = buf->filldir(buf->dirent, name, namelen, pos,
++ unionfs_ino, d_type);
++ buf->rdstate->offset++;
++ verify_rdstate_offset(buf->rdstate);
++ }
++ }
++ /*
++ * If we did fill it, stuff it in our hash, otherwise return an
++ * error.
++ */
++ if (err) {
++ buf->filldir_error = err;
++ goto out;
++ }
++ buf->entries_written++;
++ if ((err = add_filldir_node(buf->rdstate, name, namelen,
++ buf->rdstate->bindex, is_wh_entry)))
++ buf->filldir_error = err;
++
++out:
++ return err;
++}
++
++static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++ int err = 0;
++ struct file *hidden_file = NULL;
++ struct inode *inode = NULL;
++ struct unionfs_getdents_callback buf;
++ struct unionfs_dir_state *uds;
++ int bend;
++ loff_t offset;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 0)))
++ goto out;
++
++ inode = file->f_dentry->d_inode;
++
++ uds = UNIONFS_F(file)->rdstate;
++ if (!uds) {
++ if (file->f_pos == DIREOF) {
++ goto out;
++ } else if (file->f_pos > 0) {
++ uds = find_rdstate(inode, file->f_pos);
++ if (!uds) {
++ err = -ESTALE;
++ goto out;
++ }
++ UNIONFS_F(file)->rdstate = uds;
++ } else {
++ init_rdstate(file);
++ uds = UNIONFS_F(file)->rdstate;
++ }
++ }
++ bend = fbend(file);
++
++ while (uds->bindex <= bend) {
++ hidden_file = unionfs_lower_file_idx(file, uds->bindex);
++ if (!hidden_file) {
++ uds->bindex++;
++ uds->dirpos = 0;
++ continue;
++ }
++
++ /* prepare callback buffer */
++ buf.filldir_called = 0;
++ buf.filldir_error = 0;
++ buf.entries_written = 0;
++ buf.dirent = dirent;
++ buf.filldir = filldir;
++ buf.rdstate = uds;
++ buf.sb = inode->i_sb;
++
++ /* Read starting from where we last left off. */
++ offset = vfs_llseek(hidden_file, uds->dirpos, SEEK_SET);
++ if (offset < 0) {
++ err = offset;
++ goto out;
++ }
++ err = vfs_readdir(hidden_file, unionfs_filldir, &buf);
++
++ /* Save the position for when we continue. */
++ offset = vfs_llseek(hidden_file, 0, SEEK_CUR);
++ if (offset < 0) {
++ err = offset;
++ goto out;
++ }
++ uds->dirpos = offset;
++
++ /* Copy the atime. */
++ fsstack_copy_attr_atime(inode, hidden_file->f_dentry->d_inode);
++
++ if (err < 0)
++ goto out;
++
++ if (buf.filldir_error)
++ break;
++
++ if (!buf.entries_written) {
++ uds->bindex++;
++ uds->dirpos = 0;
++ }
++ }
++
++ if (!buf.filldir_error && uds->bindex >= bend) {
++ /* Save the number of hash entries for next time. */
++ UNIONFS_I(inode)->hashsize = uds->hashentries;
++ free_rdstate(uds);
++ UNIONFS_F(file)->rdstate = NULL;
++ file->f_pos = DIREOF;
++ } else
++ file->f_pos = rdstate2offset(uds);
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++/*
++ * This is not meant to be a generic repositioning function. If you do
++ * things that aren't supported, then we return EINVAL.
++ *
++ * What is allowed:
++ * (1) seeking to the same position that you are currently at
++ * This really has no effect, but returns where you are.
++ * (2) seeking to the beginning of the file
++ * This throws out all state, and lets you begin again.
++ */
++static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
++{
++ struct unionfs_dir_state *rdstate;
++ loff_t err;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 0)))
++ goto out;
++
++ rdstate = UNIONFS_F(file)->rdstate;
++
++ /*
++ * we let users seek to their current position, but not anywhere
++ * else.
++ */
++ if (!offset) {
++ switch (origin) {
++ case SEEK_SET:
++ if (rdstate) {
++ free_rdstate(rdstate);
++ UNIONFS_F(file)->rdstate = NULL;
++ }
++ init_rdstate(file);
++ err = 0;
++ break;
++ case SEEK_CUR:
++ err = file->f_pos;
++ break;
++ case SEEK_END:
++ /* Unsupported, because we would break everything. */
++ err = -EINVAL;
++ break;
++ }
++ } else {
++ switch (origin) {
++ case SEEK_SET:
++ if (rdstate) {
++ if (offset == rdstate2offset(rdstate))
++ err = offset;
++ else if (file->f_pos == DIREOF)
++ err = DIREOF;
++ else
++ err = -EINVAL;
++ } else {
++ rdstate = find_rdstate(file->f_dentry->d_inode,
++ offset);
++ if (rdstate) {
++ UNIONFS_F(file)->rdstate = rdstate;
++ err = rdstate->offset;
++ } else
++ err = -EINVAL;
++ }
++ break;
++ case SEEK_CUR:
++ case SEEK_END:
++ /* Unsupported, because we would break everything. */
++ err = -EINVAL;
++ break;
++ }
++ }
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++/*
++ * Trimmed directory options, we shouldn't pass everything down since
++ * we don't want to operate on partial directories.
++ */
++struct file_operations unionfs_dir_fops = {
++ .llseek = unionfs_dir_llseek,
++ .read = generic_read_dir,
++ .readdir = unionfs_readdir,
++ .unlocked_ioctl = unionfs_ioctl,
++ .open = unionfs_open,
++ .release = unionfs_file_release,
++ .flush = unionfs_flush,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/dirhelper.c linux-2.6.22-591/fs/unionfs/dirhelper.c
+--- linux-2.6.22-570/fs/unionfs/dirhelper.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/dirhelper.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,273 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Delete all of the whiteouts in a given directory for rmdir.
++ *
++ * hidden directory inode should be locked
++ */
++int do_delete_whiteouts(struct dentry *dentry, int bindex,
++ struct unionfs_dir_state *namelist)
++{
++ int err = 0;
++ struct dentry *hidden_dir_dentry = NULL;
++ struct dentry *hidden_dentry;
++ char *name = NULL, *p;
++ struct inode *hidden_dir;
++
++ int i;
++ struct list_head *pos;
++ struct filldir_node *cursor;
++
++ /* Find out hidden parent dentry */
++ hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode));
++ hidden_dir = hidden_dir_dentry->d_inode;
++ BUG_ON(!S_ISDIR(hidden_dir->i_mode));
++
++ err = -ENOMEM;
++ name = __getname();
++ if (!name)
++ goto out;
++ strcpy(name, UNIONFS_WHPFX);
++ p = name + UNIONFS_WHLEN;
++
++ err = 0;
++ for (i = 0; !err && i < namelist->size; i++) {
++ list_for_each(pos, &namelist->list[i]) {
++ cursor =
++ list_entry(pos, struct filldir_node,
++ file_list);
++ /* Only operate on whiteouts in this branch. */
++ if (cursor->bindex != bindex)
++ continue;
++ if (!cursor->whiteout)
++ continue;
++
++ strcpy(p, cursor->name);
++ hidden_dentry =
++ lookup_one_len(name, hidden_dir_dentry,
++ cursor->namelen +
++ UNIONFS_WHLEN);
++ if (IS_ERR(hidden_dentry)) {
++ err = PTR_ERR(hidden_dentry);
++ break;
++ }
++ if (hidden_dentry->d_inode)
++ err = vfs_unlink(hidden_dir, hidden_dentry);
++ dput(hidden_dentry);
++ if (err)
++ break;
++ }
++ }
++
++ __putname(name);
++
++ /* After all of the removals, we should copy the attributes once. */
++ fsstack_copy_attr_times(dentry->d_inode, hidden_dir_dentry->d_inode);
++
++out:
++ return err;
++}
++
++/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
++int delete_whiteouts(struct dentry *dentry, int bindex,
++ struct unionfs_dir_state *namelist)
++{
++ int err;
++ struct super_block *sb;
++ struct dentry *hidden_dir_dentry;
++ struct inode *hidden_dir;
++
++ struct sioq_args args;
++
++ sb = dentry->d_sb;
++
++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
++ BUG_ON(bindex < dbstart(dentry));
++ BUG_ON(bindex > dbend(dentry));
++ err = is_robranch_super(sb, bindex);
++ if (err)
++ goto out;
++
++ hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode));
++ hidden_dir = hidden_dir_dentry->d_inode;
++ BUG_ON(!S_ISDIR(hidden_dir->i_mode));
++
++ mutex_lock(&hidden_dir->i_mutex);
++ if (!permission(hidden_dir, MAY_WRITE | MAY_EXEC, NULL))
++ err = do_delete_whiteouts(dentry, bindex, namelist);
++ else {
++ args.deletewh.namelist = namelist;
++ args.deletewh.dentry = dentry;
++ args.deletewh.bindex = bindex;
++ run_sioq(__delete_whiteouts, &args);
++ err = args.err;
++ }
++ mutex_unlock(&hidden_dir->i_mutex);
++
++out:
++ return err;
++}
++
++#define RD_NONE 0
++#define RD_CHECK_EMPTY 1
++/* The callback structure for check_empty. */
++struct unionfs_rdutil_callback {
++ int err;
++ int filldir_called;
++ struct unionfs_dir_state *rdstate;
++ int mode;
++};
++
++/* This filldir function makes sure only whiteouts exist within a directory. */
++static int readdir_util_callback(void *dirent, const char *name, int namelen,
++ loff_t offset, u64 ino, unsigned int d_type)
++{
++ int err = 0;
++ struct unionfs_rdutil_callback *buf = dirent;
++ int whiteout = 0;
++ struct filldir_node *found;
++
++ buf->filldir_called = 1;
++
++ if (name[0] == '.' && (namelen == 1 ||
++ (name[1] == '.' && namelen == 2)))
++ goto out;
++
++ if (namelen > UNIONFS_WHLEN &&
++ !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
++ namelen -= UNIONFS_WHLEN;
++ name += UNIONFS_WHLEN;
++ whiteout = 1;
++ }
++
++ found = find_filldir_node(buf->rdstate, name, namelen);
++ /* If it was found in the table there was a previous whiteout. */
++ if (found)
++ goto out;
++
++ /*
++ * if it wasn't found and isn't a whiteout, the directory isn't
++ * empty.
++ */
++ err = -ENOTEMPTY;
++ if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
++ goto out;
++
++ err = add_filldir_node(buf->rdstate, name, namelen,
++ buf->rdstate->bindex, whiteout);
++
++out:
++ buf->err = err;
++ return err;
++}
++
++/* Is a directory logically empty? */
++int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL;
++ struct super_block *sb;
++ struct file *hidden_file;
++ struct unionfs_rdutil_callback *buf = NULL;
++ int bindex, bstart, bend, bopaque;
++
++ sb = dentry->d_sb;
++
++
++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
++
++ if ((err = unionfs_partial_lookup(dentry)))
++ goto out;
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ bopaque = dbopaque(dentry);
++ if (0 <= bopaque && bopaque < bend)
++ bend = bopaque;
++
++ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
++ if (!buf) {
++ err = -ENOMEM;
++ goto out;
++ }
++ buf->err = 0;
++ buf->mode = RD_CHECK_EMPTY;
++ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
++ if (!buf->rdstate) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ /* Process the hidden directories with rdutil_callback as a filldir. */
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++ if (!hidden_dentry->d_inode)
++ continue;
++ if (!S_ISDIR(hidden_dentry->d_inode->i_mode))
++ continue;
++
++ dget(hidden_dentry);
++ unionfs_mntget(dentry, bindex);
++ branchget(sb, bindex);
++ hidden_file =
++ dentry_open(hidden_dentry,
++ unionfs_lower_mnt_idx(dentry, bindex),
++ O_RDONLY);
++ if (IS_ERR(hidden_file)) {
++ err = PTR_ERR(hidden_file);
++ dput(hidden_dentry);
++ branchput(sb, bindex);
++ goto out;
++ }
++
++ do {
++ buf->filldir_called = 0;
++ buf->rdstate->bindex = bindex;
++ err = vfs_readdir(hidden_file,
++ readdir_util_callback, buf);
++ if (buf->err)
++ err = buf->err;
++ } while ((err >= 0) && buf->filldir_called);
++
++ /* fput calls dput for hidden_dentry */
++ fput(hidden_file);
++ branchput(sb, bindex);
++
++ if (err < 0)
++ goto out;
++ }
++
++out:
++ if (buf) {
++ if (namelist && !err)
++ *namelist = buf->rdstate;
++ else if (buf->rdstate)
++ free_rdstate(buf->rdstate);
++ kfree(buf);
++ }
++
++
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/fanout.h linux-2.6.22-591/fs/unionfs/fanout.h
+--- linux-2.6.22-570/fs/unionfs/fanout.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/fanout.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,308 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _FANOUT_H_
++#define _FANOUT_H_
++
++/*
++ * Inode to private data
++ *
++ * Since we use containers and the struct inode is _inside_ the
++ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
++ * inode pointer), return a valid non-NULL pointer.
++ */
++static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
++{
++ return container_of(inode, struct unionfs_inode_info, vfs_inode);
++}
++
++#define ibstart(ino) (UNIONFS_I(ino)->bstart)
++#define ibend(ino) (UNIONFS_I(ino)->bend)
++
++/* Superblock to private data */
++#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
++#define sbstart(sb) 0
++#define sbend(sb) (UNIONFS_SB(sb)->bend)
++#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
++#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
++
++/* File to private Data */
++#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
++#define fbstart(file) (UNIONFS_F(file)->bstart)
++#define fbend(file) (UNIONFS_F(file)->bend)
++
++/* macros to manipulate branch IDs in stored in our superblock */
++static inline int branch_id(struct super_block *sb, int index)
++{
++ BUG_ON(!sb || index < 0);
++ return UNIONFS_SB(sb)->data[index].branch_id;
++}
++
++static inline void set_branch_id(struct super_block *sb, int index, int val)
++{
++ BUG_ON(!sb || index < 0);
++ UNIONFS_SB(sb)->data[index].branch_id = val;
++}
++
++static inline void new_branch_id(struct super_block *sb, int index)
++{
++ BUG_ON(!sb || index < 0);
++ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
++}
++
++/*
++ * Find new index of matching branch with an existing superblock a a known
++ * (possibly old) id. This is needed because branches could have been
++ * added/deleted causing the branchs of any open files to shift.
++ *
++ * @sb: the new superblock which may have new/different branch IDs
++ * @id: the old/existing id we're looking for
++ * Returns index of newly found branch (0 or greater), -1 otherwise.
++ */
++static inline int branch_id_to_idx(struct super_block *sb, int id)
++{
++ int i;
++ for (i = 0; i < sbmax(sb); i++) {
++ if (branch_id(sb, i) == id)
++ return i;
++ }
++ /*
++ * XXX: maybe we should BUG_ON if not found new branch index?
++ * (really that should never happen).
++ */
++ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
++ return -1;
++}
++
++/* File to lower file. */
++static inline struct file *unionfs_lower_file(const struct file *f)
++{
++ BUG_ON(!f);
++ return UNIONFS_F(f)->lower_files[fbstart(f)];
++}
++
++static inline struct file *unionfs_lower_file_idx(const struct file *f,
++ int index)
++{
++ BUG_ON(!f || index < 0);
++ return UNIONFS_F(f)->lower_files[index];
++}
++
++static inline void unionfs_set_lower_file_idx(struct file *f, int index,
++ struct file *val)
++{
++ BUG_ON(!f || index < 0);
++ UNIONFS_F(f)->lower_files[index] = val;
++ /* save branch ID (may be redundant?) */
++ UNIONFS_F(f)->saved_branch_ids[index] =
++ branch_id((f)->f_dentry->d_sb, index);
++}
++
++static inline void unionfs_set_lower_file(struct file *f, struct file *val)
++{
++ BUG_ON(!f);
++ unionfs_set_lower_file_idx((f), fbstart(f), (val));
++}
++
++/* Inode to lower inode. */
++static inline struct inode *unionfs_lower_inode(const struct inode *i)
++{
++ BUG_ON(!i);
++ return UNIONFS_I(i)->lower_inodes[ibstart(i)];
++}
++
++static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
++ int index)
++{
++ BUG_ON(!i || index < 0);
++ return UNIONFS_I(i)->lower_inodes[index];
++}
++
++static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
++ struct inode *val)
++{
++ BUG_ON(!i || index < 0);
++ UNIONFS_I(i)->lower_inodes[index] = val;
++}
++
++static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
++{
++ BUG_ON(!i);
++ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
++}
++
++/* Superblock to lower superblock. */
++static inline struct super_block *unionfs_lower_super(
++ const struct super_block *sb)
++{
++ BUG_ON(!sb);
++ return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
++}
++
++static inline struct super_block *unionfs_lower_super_idx(
++ const struct super_block *sb,
++ int index)
++{
++ BUG_ON(!sb || index < 0);
++ return UNIONFS_SB(sb)->data[index].sb;
++}
++
++static inline void unionfs_set_lower_super_idx(struct super_block *sb,
++ int index,
++ struct super_block *val)
++{
++ BUG_ON(!sb || index < 0);
++ UNIONFS_SB(sb)->data[index].sb = val;
++}
++
++static inline void unionfs_set_lower_super(struct super_block *sb,
++ struct super_block *val)
++{
++ BUG_ON(!sb);
++ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
++}
++
++/* Branch count macros. */
++static inline int branch_count(const struct super_block *sb, int index)
++{
++ BUG_ON(!sb || index < 0);
++ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++static inline void set_branch_count(struct super_block *sb, int index, int val)
++{
++ BUG_ON(!sb || index < 0);
++ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
++}
++
++static inline void branchget(struct super_block *sb, int index)
++{
++ BUG_ON(!sb || index < 0);
++ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++static inline void branchput(struct super_block *sb, int index)
++{
++ BUG_ON(!sb || index < 0);
++ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++/* Dentry macros */
++static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return dent->d_fsdata;
++}
++
++static inline int dbstart(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return UNIONFS_D(dent)->bstart;
++}
++
++static inline void set_dbstart(struct dentry *dent, int val)
++{
++ BUG_ON(!dent);
++ UNIONFS_D(dent)->bstart = val;
++}
++
++static inline int dbend(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return UNIONFS_D(dent)->bend;
++}
++
++static inline void set_dbend(struct dentry *dent, int val)
++{
++ BUG_ON(!dent);
++ UNIONFS_D(dent)->bend = val;
++}
++
++static inline int dbopaque(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return UNIONFS_D(dent)->bopaque;
++}
++
++static inline void set_dbopaque(struct dentry *dent, int val)
++{
++ BUG_ON(!dent);
++ UNIONFS_D(dent)->bopaque = val;
++}
++
++static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
++ struct dentry *val)
++{
++ BUG_ON(!dent || index < 0);
++ UNIONFS_D(dent)->lower_paths[index].dentry = val;
++}
++
++static inline struct dentry *unionfs_lower_dentry_idx(
++ const struct dentry *dent,
++ int index)
++{
++ BUG_ON(!dent || index < 0);
++ return UNIONFS_D(dent)->lower_paths[index].dentry;
++}
++
++static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return unionfs_lower_dentry_idx(dent, dbstart(dent));
++}
++
++static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
++ struct vfsmount *mnt)
++{
++ BUG_ON(!dent || index < 0);
++ UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
++}
++
++static inline struct vfsmount *unionfs_lower_mnt_idx(
++ const struct dentry *dent,
++ int index)
++{
++ BUG_ON(!dent || index < 0);
++ return UNIONFS_D(dent)->lower_paths[index].mnt;
++}
++
++static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
++{
++ BUG_ON(!dent);
++ return unionfs_lower_mnt_idx(dent, dbstart(dent));
++}
++
++/* Macros for locking a dentry. */
++static inline void unionfs_lock_dentry(struct dentry *d)
++{
++ BUG_ON(!d);
++ mutex_lock(&UNIONFS_D(d)->lock);
++}
++
++static inline void unionfs_unlock_dentry(struct dentry *d)
++{
++ BUG_ON(!d);
++ mutex_unlock(&UNIONFS_D(d)->lock);
++}
++
++static inline void verify_locked(struct dentry *d)
++{
++ BUG_ON(!d);
++ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
++}
++
++#endif /* _FANOUT_H */
+diff -Nurb linux-2.6.22-570/fs/unionfs/file.c linux-2.6.22-591/fs/unionfs/file.c
+--- linux-2.6.22-570/fs/unionfs/file.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/file.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*******************
++ * File Operations *
++ *******************/
++
++static ssize_t unionfs_read(struct file *file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ int err;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 0)))
++ goto out;
++
++ err = do_sync_read(file, buf, count, ppos);
++
++ if (err >= 0)
++ touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++ unionfs_lower_dentry(file->f_path.dentry));
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
++ unsigned long nr_segs, loff_t pos)
++{
++ int err = 0;
++ struct file *file = iocb->ki_filp;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 0)))
++ goto out;
++
++ err = generic_file_aio_read(iocb, iov, nr_segs, pos);
++
++ if (err == -EIOCBQUEUED)
++ err = wait_on_sync_kiocb(iocb);
++
++ if (err >= 0)
++ touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++ unionfs_lower_dentry(file->f_path.dentry));
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++static ssize_t unionfs_write(struct file * file, const char __user * buf,
++ size_t count, loff_t *ppos)
++{
++ int err = 0;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 1)))
++ goto out;
++
++ err = do_sync_write(file, buf, count, ppos);
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++static int unionfs_file_readdir(struct file *file, void *dirent,
++ filldir_t filldir)
++{
++ return -ENOTDIR;
++}
++
++static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ int err = 0;
++ int willwrite;
++ struct file *lower_file;
++
++ unionfs_read_lock(file->f_path.dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 1)))
++ goto out;
++
++ /* This might be deferred to mmap's writepage */
++ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
++ if ((err = unionfs_file_revalidate(file, willwrite)))
++ goto out;
++
++ /*
++ * File systems which do not implement ->writepage may use
++ * generic_file_readonly_mmap as their ->mmap op. If you call
++ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
++ * But we cannot call the lower ->mmap op, so we can't tell that
++ * writeable mappings won't work. Therefore, our only choice is to
++ * check if the lower file system supports the ->writepage, and if
++ * not, return EINVAL (the same error that
++ * generic_file_readonly_mmap returns in that case).
++ */
++ lower_file = unionfs_lower_file(file);
++ if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
++ err = -EINVAL;
++ printk("unionfs: branch %d file system does not support "
++ "writeable mmap\n", fbstart(file));
++ } else {
++ err = generic_file_mmap(file, vma);
++ if (err)
++ printk("unionfs: generic_file_mmap failed %d\n", err);
++ }
++
++out:
++ unionfs_read_unlock(file->f_path.dentry->d_sb);
++ return err;
++}
++
++struct file_operations unionfs_main_fops = {
++ .llseek = generic_file_llseek,
++ .read = unionfs_read,
++ .aio_read = unionfs_aio_read,
++ .write = unionfs_write,
++ .aio_write = generic_file_aio_write,
++ .readdir = unionfs_file_readdir,
++ .unlocked_ioctl = unionfs_ioctl,
++ .mmap = unionfs_mmap,
++ .open = unionfs_open,
++ .flush = unionfs_flush,
++ .release = unionfs_file_release,
++ .fsync = file_fsync,
++ .sendfile = generic_file_sendfile,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/inode.c linux-2.6.22-591/fs/unionfs/inode.c
+--- linux-2.6.22-570/fs/unionfs/inode.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/inode.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,1138 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++static int unionfs_create(struct inode *parent, struct dentry *dentry,
++ int mode, struct nameidata *nd)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL;
++ struct dentry *wh_dentry = NULL;
++ struct dentry *new_hidden_dentry;
++ struct dentry *hidden_parent_dentry = NULL;
++ int bindex = 0, bstart;
++ char *name = NULL;
++ int valid = 0;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ unionfs_lock_dentry(dentry->d_parent);
++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd);
++ unionfs_unlock_dentry(dentry->d_parent);
++ if (!valid) {
++ err = -ESTALE; /* same as what real_lookup does */
++ goto out;
++ }
++ valid = __unionfs_d_revalidate_chain(dentry, nd);
++ /*
++ * It's only a bug if this dentry was not negative and couldn't be
++ * revalidated (shouldn't happen).
++ */
++ BUG_ON(!valid && dentry->d_inode);
++
++ /* We start out in the leftmost branch. */
++ bstart = dbstart(dentry);
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ /*
++ * check if whiteout exists in this branch, i.e. lookup .wh.foo
++ * first.
++ */
++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ wh_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(wh_dentry)) {
++ err = PTR_ERR(wh_dentry);
++ wh_dentry = NULL;
++ goto out;
++ }
++
++ if (wh_dentry->d_inode) {
++ /*
++ * .wh.foo has been found.
++ * First truncate it and then rename it to foo (hence having
++ * the same overall effect as a normal create.
++ */
++ struct dentry *hidden_dir_dentry;
++ struct iattr newattrs;
++
++ mutex_lock(&wh_dentry->d_inode->i_mutex);
++ newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME
++ | ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE
++ | ATTR_KILL_SUID | ATTR_KILL_SGID;
++
++ newattrs.ia_mode = mode & ~current->fs->umask;
++ newattrs.ia_uid = current->fsuid;
++ newattrs.ia_gid = current->fsgid;
++
++ if (wh_dentry->d_inode->i_size != 0) {
++ newattrs.ia_valid |= ATTR_SIZE;
++ newattrs.ia_size = 0;
++ }
++
++ err = notify_change(wh_dentry, &newattrs);
++
++ mutex_unlock(&wh_dentry->d_inode->i_mutex);
++
++ if (err)
++ printk(KERN_WARNING "unionfs: %s:%d: notify_change "
++ "failed: %d, ignoring..\n",
++ __FILE__, __LINE__, err);
++
++ new_hidden_dentry = unionfs_lower_dentry(dentry);
++ dget(new_hidden_dentry);
++
++ hidden_dir_dentry = dget_parent(wh_dentry);
++ lock_rename(hidden_dir_dentry, hidden_dir_dentry);
++
++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
++ err = vfs_rename(hidden_dir_dentry->d_inode,
++ wh_dentry,
++ hidden_dir_dentry->d_inode,
++ new_hidden_dentry);
++ }
++ if (!err) {
++ fsstack_copy_attr_times(parent,
++ new_hidden_dentry->d_parent->
++ d_inode);
++ fsstack_copy_inode_size(parent,
++ new_hidden_dentry->d_parent->
++ d_inode);
++ parent->i_nlink = unionfs_get_nlinks(parent);
++ }
++
++ unlock_rename(hidden_dir_dentry, hidden_dir_dentry);
++ dput(hidden_dir_dentry);
++
++ dput(new_hidden_dentry);
++
++ if (err) {
++ /* exit if the error returned was NOT -EROFS */
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++ /*
++ * We were not able to create the file in this
++ * branch, so, we try to create it in one branch to
++ * left
++ */
++ bstart--;
++ } else {
++ /*
++ * reset the unionfs dentry to point to the .wh.foo
++ * entry.
++ */
++
++ /* Discard any old reference. */
++ dput(unionfs_lower_dentry(dentry));
++
++ /* Trade one reference to another. */
++ unionfs_set_lower_dentry_idx(dentry, bstart,
++ wh_dentry);
++ wh_dentry = NULL;
++
++ err = unionfs_interpose(dentry, parent->i_sb, 0);
++ goto out;
++ }
++ }
++
++ for (bindex = bstart; bindex >= 0; bindex--) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry) {
++ /*
++ * if hidden_dentry is NULL, create the entire
++ * dentry directory structure in branch 'bindex'.
++ * hidden_dentry will NOT be null when bindex == bstart
++ * because lookup passed as a negative unionfs dentry
++ * pointing to a lone negative underlying dentry.
++ */
++ hidden_dentry = create_parents(parent, dentry, bindex);
++ if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++ if (IS_ERR(hidden_dentry))
++ err = PTR_ERR(hidden_dentry);
++ continue;
++ }
++ }
++
++ hidden_parent_dentry = lock_parent(hidden_dentry);
++ if (IS_ERR(hidden_parent_dentry)) {
++ err = PTR_ERR(hidden_parent_dentry);
++ goto out;
++ }
++ /* We shouldn't create things in a read-only branch. */
++ if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++ err = vfs_create(hidden_parent_dentry->d_inode,
++ hidden_dentry, mode, nd);
++
++ if (err || !hidden_dentry->d_inode) {
++ unlock_dir(hidden_parent_dentry);
++
++ /* break out of for loop if the error wasn't -EROFS */
++ if (!IS_COPYUP_ERR(err))
++ break;
++ } else {
++ err = unionfs_interpose(dentry, parent->i_sb, 0);
++ if (!err) {
++ fsstack_copy_attr_times(parent,
++ hidden_parent_dentry->
++ d_inode);
++ fsstack_copy_inode_size(parent,
++ hidden_parent_dentry->
++ d_inode);
++ /* update no. of links on parent directory */
++ parent->i_nlink = unionfs_get_nlinks(parent);
++ }
++ unlock_dir(hidden_parent_dentry);
++ break;
++ }
++ }
++
++out:
++ dput(wh_dentry);
++ kfree(name);
++
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++static struct dentry *unionfs_lookup(struct inode *parent,
++ struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct path path_save;
++ struct dentry *ret;
++
++ unionfs_read_lock(dentry->d_sb);
++
++ /* save the dentry & vfsmnt from namei */
++ if (nd) {
++ path_save.dentry = nd->dentry;
++ path_save.mnt = nd->mnt;
++ }
++
++ /* The locking is done by unionfs_lookup_backend. */
++ ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
++
++ /* restore the dentry & vfsmnt in namei */
++ if (nd) {
++ nd->dentry = path_save.dentry;
++ nd->mnt = path_save.mnt;
++ }
++
++ unionfs_read_unlock(dentry->d_sb);
++
++ return ret;
++}
++
++static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
++ struct dentry *new_dentry)
++{
++ int err = 0;
++ struct dentry *hidden_old_dentry = NULL;
++ struct dentry *hidden_new_dentry = NULL;
++ struct dentry *hidden_dir_dentry = NULL;
++ struct dentry *whiteout_dentry;
++ char *name = NULL;
++
++ unionfs_read_lock(old_dentry->d_sb);
++ unionfs_double_lock_dentry(new_dentry, old_dentry);
++
++ if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++ if (new_dentry->d_inode &&
++ !__unionfs_d_revalidate_chain(new_dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_new_dentry = unionfs_lower_dentry(new_dentry);
++
++ /*
++ * check if whiteout exists in the branch of new dentry, i.e. lookup
++ * .wh.foo first. If present, delete it
++ */
++ name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ whiteout_dentry = lookup_one_len(name, hidden_new_dentry->d_parent,
++ new_dentry->d_name.len +
++ UNIONFS_WHLEN);
++ if (IS_ERR(whiteout_dentry)) {
++ err = PTR_ERR(whiteout_dentry);
++ goto out;
++ }
++
++ if (!whiteout_dentry->d_inode) {
++ dput(whiteout_dentry);
++ whiteout_dentry = NULL;
++ } else {
++ /* found a .wh.foo entry, unlink it and then call vfs_link() */
++ hidden_dir_dentry = lock_parent(whiteout_dentry);
++ err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
++ if (!err)
++ err = vfs_unlink(hidden_dir_dentry->d_inode,
++ whiteout_dentry);
++
++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++ dir->i_nlink = unionfs_get_nlinks(dir);
++ unlock_dir(hidden_dir_dentry);
++ hidden_dir_dentry = NULL;
++ dput(whiteout_dentry);
++ if (err)
++ goto out;
++ }
++
++ if (dbstart(old_dentry) != dbstart(new_dentry)) {
++ hidden_new_dentry =
++ create_parents(dir, new_dentry, dbstart(old_dentry));
++ err = PTR_ERR(hidden_new_dentry);
++ if (IS_COPYUP_ERR(err))
++ goto docopyup;
++ if (!hidden_new_dentry || IS_ERR(hidden_new_dentry))
++ goto out;
++ }
++ hidden_new_dentry = unionfs_lower_dentry(new_dentry);
++ hidden_old_dentry = unionfs_lower_dentry(old_dentry);
++
++ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
++ hidden_dir_dentry = lock_parent(hidden_new_dentry);
++ if (!(err = is_robranch(old_dentry)))
++ err = vfs_link(hidden_old_dentry, hidden_dir_dentry->d_inode,
++ hidden_new_dentry);
++ unlock_dir(hidden_dir_dentry);
++
++docopyup:
++ if (IS_COPYUP_ERR(err)) {
++ int old_bstart = dbstart(old_dentry);
++ int bindex;
++
++ for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
++ err = copyup_dentry(old_dentry->d_parent->d_inode,
++ old_dentry, old_bstart,
++ bindex, NULL,
++ old_dentry->d_inode->i_size);
++ if (!err) {
++ hidden_new_dentry =
++ create_parents(dir, new_dentry,
++ bindex);
++ hidden_old_dentry =
++ unionfs_lower_dentry(old_dentry);
++ hidden_dir_dentry =
++ lock_parent(hidden_new_dentry);
++ /* do vfs_link */
++ err = vfs_link(hidden_old_dentry,
++ hidden_dir_dentry->d_inode,
++ hidden_new_dentry);
++ unlock_dir(hidden_dir_dentry);
++ goto check_link;
++ }
++ }
++ goto out;
++ }
++
++check_link:
++ if (err || !hidden_new_dentry->d_inode)
++ goto out;
++
++ /* Its a hard link, so use the same inode */
++ new_dentry->d_inode = igrab(old_dentry->d_inode);
++ d_instantiate(new_dentry, new_dentry->d_inode);
++ fsstack_copy_attr_all(dir, hidden_new_dentry->d_parent->d_inode,
++ unionfs_get_nlinks);
++ fsstack_copy_inode_size(dir, hidden_new_dentry->d_parent->d_inode);
++
++ /* propagate number of hard-links */
++ old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
++
++out:
++ if (!new_dentry->d_inode)
++ d_drop(new_dentry);
++
++ kfree(name);
++
++ unionfs_unlock_dentry(new_dentry);
++ unionfs_unlock_dentry(old_dentry);
++
++ unionfs_read_unlock(old_dentry->d_sb);
++
++ return err;
++}
++
++static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
++ const char *symname)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL;
++ struct dentry *whiteout_dentry = NULL;
++ struct dentry *hidden_dir_dentry = NULL;
++ umode_t mode;
++ int bindex = 0, bstart;
++ char *name = NULL;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (dentry->d_inode &&
++ !__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ /* We start out in the leftmost branch. */
++ bstart = dbstart(dentry);
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ /*
++ * check if whiteout exists in this branch, i.e. lookup .wh.foo
++ * first. If present, delete it
++ */
++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ whiteout_dentry =
++ lookup_one_len(name, hidden_dentry->d_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(whiteout_dentry)) {
++ err = PTR_ERR(whiteout_dentry);
++ goto out;
++ }
++
++ if (!whiteout_dentry->d_inode) {
++ dput(whiteout_dentry);
++ whiteout_dentry = NULL;
++ } else {
++ /*
++ * found a .wh.foo entry, unlink it and then call
++ * vfs_symlink().
++ */
++ hidden_dir_dentry = lock_parent(whiteout_dentry);
++
++ if (!(err = is_robranch_super(dentry->d_sb, bstart)))
++ err = vfs_unlink(hidden_dir_dentry->d_inode,
++ whiteout_dentry);
++ dput(whiteout_dentry);
++
++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++ /* propagate number of hard-links */
++ dir->i_nlink = unionfs_get_nlinks(dir);
++
++ unlock_dir(hidden_dir_dentry);
++
++ if (err) {
++ /* exit if the error returned was NOT -EROFS */
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++ /*
++ * should now try to create symlink in the another
++ * branch.
++ */
++ bstart--;
++ }
++ }
++
++ /*
++ * deleted whiteout if it was present, now do a normal vfs_symlink()
++ * with possible recursive directory creation
++ */
++ for (bindex = bstart; bindex >= 0; bindex--) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry) {
++ /*
++ * if hidden_dentry is NULL, create the entire
++ * dentry directory structure in branch 'bindex'.
++ * hidden_dentry will NOT be null when bindex ==
++ * bstart because lookup passed as a negative
++ * unionfs dentry pointing to a lone negative
++ * underlying dentry
++ */
++ hidden_dentry = create_parents(dir, dentry, bindex);
++ if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++ if (IS_ERR(hidden_dentry))
++ err = PTR_ERR(hidden_dentry);
++
++ printk(KERN_DEBUG "unionfs: hidden dentry "
++ "NULL (or error) for bindex = %d\n",
++ bindex);
++ continue;
++ }
++ }
++
++ hidden_dir_dentry = lock_parent(hidden_dentry);
++
++ if (!(err = is_robranch_super(dentry->d_sb, bindex))) {
++ mode = S_IALLUGO;
++ err =
++ vfs_symlink(hidden_dir_dentry->d_inode,
++ hidden_dentry, symname, mode);
++ }
++ unlock_dir(hidden_dir_dentry);
++
++ if (err || !hidden_dentry->d_inode) {
++ /*
++ * break out of for loop if error returned was NOT
++ * -EROFS.
++ */
++ if (!IS_COPYUP_ERR(err))
++ break;
++ } else {
++ err = unionfs_interpose(dentry, dir->i_sb, 0);
++ if (!err) {
++ fsstack_copy_attr_times(dir,
++ hidden_dir_dentry->
++ d_inode);
++ fsstack_copy_inode_size(dir,
++ hidden_dir_dentry->
++ d_inode);
++ /*
++ * update number of links on parent
++ * directory.
++ */
++ dir->i_nlink = unionfs_get_nlinks(dir);
++ }
++ break;
++ }
++ }
++
++out:
++ if (!dentry->d_inode)
++ d_drop(dentry);
++
++ kfree(name);
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL;
++ struct dentry *hidden_parent_dentry = NULL;
++ int bindex = 0, bstart;
++ char *name = NULL;
++ int whiteout_unlinked = 0;
++ struct sioq_args args;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (dentry->d_inode &&
++ !__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ bstart = dbstart(dentry);
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ /*
++ * check if whiteout exists in this branch, i.e. lookup .wh.foo
++ * first.
++ */
++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(whiteout_dentry)) {
++ err = PTR_ERR(whiteout_dentry);
++ goto out;
++ }
++
++ if (!whiteout_dentry->d_inode) {
++ dput(whiteout_dentry);
++ whiteout_dentry = NULL;
++ } else {
++ hidden_parent_dentry = lock_parent(whiteout_dentry);
++
++ /* found a.wh.foo entry, remove it then do vfs_mkdir */
++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
++ args.unlink.parent = hidden_parent_dentry->d_inode;
++ args.unlink.dentry = whiteout_dentry;
++ run_sioq(__unionfs_unlink, &args);
++ err = args.err;
++ }
++ dput(whiteout_dentry);
++
++ unlock_dir(hidden_parent_dentry);
++
++ if (err) {
++ /* exit if the error returned was NOT -EROFS */
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++ bstart--;
++ } else
++ whiteout_unlinked = 1;
++ }
++
++ for (bindex = bstart; bindex >= 0; bindex--) {
++ int i;
++ int bend = dbend(dentry);
++
++ if (is_robranch_super(dentry->d_sb, bindex))
++ continue;
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry) {
++ hidden_dentry = create_parents(parent, dentry, bindex);
++ if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++ printk(KERN_DEBUG "unionfs: hidden dentry "
++ " NULL for bindex = %d\n", bindex);
++ continue;
++ }
++ }
++
++ hidden_parent_dentry = lock_parent(hidden_dentry);
++
++ if (IS_ERR(hidden_parent_dentry)) {
++ err = PTR_ERR(hidden_parent_dentry);
++ goto out;
++ }
++
++ err = vfs_mkdir(hidden_parent_dentry->d_inode, hidden_dentry,
++ mode);
++
++ unlock_dir(hidden_parent_dentry);
++
++ /* did the mkdir succeed? */
++ if (err)
++ break;
++
++ for (i = bindex + 1; i < bend; i++) {
++ if (unionfs_lower_dentry_idx(dentry, i)) {
++ dput(unionfs_lower_dentry_idx(dentry, i));
++ unionfs_set_lower_dentry_idx(dentry, i, NULL);
++ }
++ }
++ set_dbend(dentry, bindex);
++
++ err = unionfs_interpose(dentry, parent->i_sb, 0);
++ if (!err) {
++ fsstack_copy_attr_times(parent,
++ hidden_parent_dentry->d_inode);
++ fsstack_copy_inode_size(parent,
++ hidden_parent_dentry->d_inode);
++
++ /* update number of links on parent directory */
++ parent->i_nlink = unionfs_get_nlinks(parent);
++ }
++
++ err = make_dir_opaque(dentry, dbstart(dentry));
++ if (err) {
++ printk(KERN_ERR "unionfs: mkdir: error creating "
++ ".wh.__dir_opaque: %d\n", err);
++ goto out;
++ }
++
++ /* we are done! */
++ break;
++ }
++
++out:
++ if (!dentry->d_inode)
++ d_drop(dentry);
++
++ kfree(name);
++
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
++ dev_t dev)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL;
++ struct dentry *hidden_parent_dentry = NULL;
++ int bindex = 0, bstart;
++ char *name = NULL;
++ int whiteout_unlinked = 0;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (dentry->d_inode &&
++ !__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ bstart = dbstart(dentry);
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ /*
++ * check if whiteout exists in this branch, i.e. lookup .wh.foo
++ * first.
++ */
++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(whiteout_dentry)) {
++ err = PTR_ERR(whiteout_dentry);
++ goto out;
++ }
++
++ if (!whiteout_dentry->d_inode) {
++ dput(whiteout_dentry);
++ whiteout_dentry = NULL;
++ } else {
++ /* found .wh.foo, unlink it */
++ hidden_parent_dentry = lock_parent(whiteout_dentry);
++
++ /* found a.wh.foo entry, remove it then do vfs_mkdir */
++ if (!(err = is_robranch_super(dentry->d_sb, bstart)))
++ err = vfs_unlink(hidden_parent_dentry->d_inode,
++ whiteout_dentry);
++ dput(whiteout_dentry);
++
++ unlock_dir(hidden_parent_dentry);
++
++ if (err) {
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++
++ bstart--;
++ } else
++ whiteout_unlinked = 1;
++ }
++
++ for (bindex = bstart; bindex >= 0; bindex--) {
++ if (is_robranch_super(dentry->d_sb, bindex))
++ continue;
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry) {
++ hidden_dentry = create_parents(dir, dentry, bindex);
++ if (IS_ERR(hidden_dentry)) {
++ printk(KERN_DEBUG "unionfs: failed to create "
++ "parents on %d, err = %ld\n",
++ bindex, PTR_ERR(hidden_dentry));
++ continue;
++ }
++ }
++
++ hidden_parent_dentry = lock_parent(hidden_dentry);
++ if (IS_ERR(hidden_parent_dentry)) {
++ err = PTR_ERR(hidden_parent_dentry);
++ goto out;
++ }
++
++ err = vfs_mknod(hidden_parent_dentry->d_inode,
++ hidden_dentry, mode, dev);
++
++ if (err) {
++ unlock_dir(hidden_parent_dentry);
++ break;
++ }
++
++ err = unionfs_interpose(dentry, dir->i_sb, 0);
++ if (!err) {
++ fsstack_copy_attr_times(dir,
++ hidden_parent_dentry->d_inode);
++ fsstack_copy_inode_size(dir,
++ hidden_parent_dentry->d_inode);
++ /* update number of links on parent directory */
++ dir->i_nlink = unionfs_get_nlinks(dir);
++ }
++ unlock_dir(hidden_parent_dentry);
++
++ break;
++ }
++
++out:
++ if (!dentry->d_inode)
++ d_drop(dentry);
++
++ kfree(name);
++
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++static int unionfs_readlink(struct dentry *dentry, char __user *buf,
++ int bufsiz)
++{
++ int err;
++ struct dentry *hidden_dentry;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ if (!hidden_dentry->d_inode->i_op ||
++ !hidden_dentry->d_inode->i_op->readlink) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ err = hidden_dentry->d_inode->i_op->readlink(hidden_dentry,
++ buf, bufsiz);
++ if (err > 0)
++ fsstack_copy_attr_atime(dentry->d_inode,
++ hidden_dentry->d_inode);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++/*
++ * Check if dentry is valid or not, as per our generation numbers.
++ * @dentry: dentry to check.
++ * Returns 1 (valid) or 0 (invalid/stale).
++ */
++static inline int is_valid_dentry(struct dentry *dentry)
++{
++ BUG_ON(!UNIONFS_D(dentry));
++ BUG_ON(!UNIONFS_SB(dentry->d_sb));
++ return (atomic_read(&UNIONFS_D(dentry)->generation) ==
++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
++}
++
++/* We don't lock the dentry here, because readlink does the heavy lifting. */
++static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++ char *buf;
++ int len = PAGE_SIZE, err;
++ mm_segment_t old_fs;
++
++ /*
++ * FIXME: Really nasty...we can get called from two distinct places:
++ * 1) read_link - locks the dentry
++ * 2) VFS lookup code - does NOT lock the dentry
++ *
++ * The proper thing would be to call dentry revalidate. It however
++ * expects a locked dentry, and we can't cleanly guarantee that.
++ */
++ BUG_ON(!is_valid_dentry(dentry));
++
++ unionfs_read_lock(dentry->d_sb);
++
++ /* This is freed by the put_link method assuming a successful call. */
++ buf = kmalloc(len, GFP_KERNEL);
++ if (!buf) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ /* read the symlink, and then we will follow it */
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
++ set_fs(old_fs);
++ if (err < 0) {
++ kfree(buf);
++ buf = NULL;
++ goto out;
++ }
++ buf[err] = 0;
++ nd_set_link(nd, buf);
++ err = 0;
++
++out:
++ unionfs_read_unlock(dentry->d_sb);
++ return ERR_PTR(err);
++}
++
++/* FIXME: We may not have to lock here */
++static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
++ void *cookie)
++{
++ unionfs_read_lock(dentry->d_sb);
++ kfree(nd_get_link(nd));
++ unionfs_read_unlock(dentry->d_sb);
++}
++
++/*
++ * Basically copied from the kernel vfs permission(), but we've changed
++ * the following:
++ * (1) the IS_RDONLY check is skipped, and
++ * (2) if you set the mount option `mode=nfsro', we assume that -EACCES
++ * means that the export is read-only and we should check standard Unix
++ * permissions. This means that NFS ACL checks (or other advanced
++ * permission features) are bypassed. Note however, that we do call
++ * security_inode_permission, and therefore security inside SELinux, etc.
++ * are performed.
++ */
++static int inode_permission(struct inode *inode, int mask,
++ struct nameidata *nd, int bindex)
++{
++ int retval, submask;
++
++ if (mask & MAY_WRITE) {
++ /* The first branch is allowed to be really readonly. */
++ if (bindex == 0) {
++ umode_t mode = inode->i_mode;
++ if (IS_RDONLY(inode) &&
++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
++ return -EROFS;
++ }
++ /*
++ * Nobody gets write access to an immutable file.
++ */
++ if (IS_IMMUTABLE(inode))
++ return -EACCES;
++ }
++
++ /* Ordinary permission routines do not understand MAY_APPEND. */
++ submask = mask & ~MAY_APPEND;
++ if (inode->i_op && inode->i_op->permission) {
++ retval = inode->i_op->permission(inode, submask, nd);
++ if ((retval == -EACCES) && (submask & MAY_WRITE) &&
++ (!strcmp("nfs", (inode)->i_sb->s_type->name)) &&
++ (nd) && (nd->mnt) && (nd->mnt->mnt_sb)) {
++ int perms;
++ perms = branchperms(nd->mnt->mnt_sb, bindex);
++ if (perms & MAY_NFSRO)
++ retval = generic_permission(inode, submask,
++ NULL);
++ }
++ } else
++ retval = generic_permission(inode, submask, NULL);
++
++ if (retval && retval != -EROFS) /* ignore EROFS */
++ return retval;
++
++ retval = security_inode_permission(inode, mask, nd);
++ return ((retval == -EROFS) ? 0 : retval); /* ignore EROFS */
++}
++
++static int unionfs_permission(struct inode *inode, int mask,
++ struct nameidata *nd)
++{
++ struct inode *hidden_inode = NULL;
++ int err = 0;
++ int bindex, bstart, bend;
++ const int is_file = !S_ISDIR(inode->i_mode);
++ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
++
++ unionfs_read_lock(inode->i_sb);
++
++ bstart = ibstart(inode);
++ bend = ibend(inode);
++ if (bstart < 0 || bend < 0) {
++ /*
++ * With branch-management, we can get a stale inode here.
++ * If so, we return ESTALE back to link_path_walk, which
++ * would discard the dcache entry and re-lookup the
++ * dentry+inode. This should be equivalent to issuing
++ * __unionfs_d_revalidate_chain on nd.dentry here.
++ */
++ err = -ESTALE; /* force revalidate */
++ goto out;
++ }
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++ if (!hidden_inode)
++ continue;
++
++ /*
++ * check the condition for D-F-D underlying files/directories,
++ * we don't have to check for files, if we are checking for
++ * directories.
++ */
++ if (!is_file && !S_ISDIR(hidden_inode->i_mode))
++ continue;
++
++ /*
++ * We use our own special version of permission, such that
++ * only the first branch returns -EROFS.
++ */
++ err = inode_permission(hidden_inode, mask, nd, bindex);
++
++ /*
++ * The permissions are an intersection of the overall directory
++ * permissions, so we fail if one fails.
++ */
++ if (err)
++ goto out;
++
++ /* only the leftmost file matters. */
++ if (is_file || write_mask) {
++ if (is_file && write_mask) {
++ err = get_write_access(hidden_inode);
++ if (!err)
++ put_write_access(hidden_inode);
++ }
++ break;
++ }
++ }
++
++out:
++ unionfs_read_unlock(inode->i_sb);
++ return err;
++}
++
++static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++ int err = 0;
++ struct dentry *hidden_dentry;
++ struct inode *inode = NULL;
++ struct inode *hidden_inode = NULL;
++ int bstart, bend, bindex;
++ int i;
++ int copyup = 0;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ inode = dentry->d_inode;
++
++ for (bindex = bstart; (bindex <= bend) || (bindex == bstart);
++ bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++ BUG_ON(hidden_dentry->d_inode == NULL);
++
++ /* If the file is on a read only branch */
++ if (is_robranch_super(dentry->d_sb, bindex)
++ || IS_RDONLY(hidden_dentry->d_inode)) {
++ if (copyup || (bindex != bstart))
++ continue;
++ /* Only if its the leftmost file, copyup the file */
++ for (i = bstart - 1; i >= 0; i--) {
++ loff_t size = dentry->d_inode->i_size;
++ if (ia->ia_valid & ATTR_SIZE)
++ size = ia->ia_size;
++ err = copyup_dentry(dentry->d_parent->d_inode,
++ dentry, bstart, i, NULL,
++ size);
++
++ if (!err) {
++ copyup = 1;
++ hidden_dentry =
++ unionfs_lower_dentry(dentry);
++ break;
++ }
++ /*
++ * if error is in the leftmost branch, pass
++ * it up.
++ */
++ if (i == 0)
++ goto out;
++ }
++
++ }
++ err = notify_change(hidden_dentry, ia);
++ if (err)
++ goto out;
++ break;
++ }
++
++ /* for mmap */
++ if (ia->ia_valid & ATTR_SIZE) {
++ if (ia->ia_size != i_size_read(inode)) {
++ err = vmtruncate(inode, ia->ia_size);
++ if (err)
++ printk("unionfs_setattr: vmtruncate failed\n");
++ }
++ }
++
++ /* get the size from the first hidden inode */
++ hidden_inode = unionfs_lower_inode(dentry->d_inode);
++ fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
++ fsstack_copy_inode_size(inode, hidden_inode);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++struct inode_operations unionfs_symlink_iops = {
++ .readlink = unionfs_readlink,
++ .permission = unionfs_permission,
++ .follow_link = unionfs_follow_link,
++ .setattr = unionfs_setattr,
++ .put_link = unionfs_put_link,
++};
++
++struct inode_operations unionfs_dir_iops = {
++ .create = unionfs_create,
++ .lookup = unionfs_lookup,
++ .link = unionfs_link,
++ .unlink = unionfs_unlink,
++ .symlink = unionfs_symlink,
++ .mkdir = unionfs_mkdir,
++ .rmdir = unionfs_rmdir,
++ .mknod = unionfs_mknod,
++ .rename = unionfs_rename,
++ .permission = unionfs_permission,
++ .setattr = unionfs_setattr,
++#ifdef CONFIG_UNION_FS_XATTR
++ .setxattr = unionfs_setxattr,
++ .getxattr = unionfs_getxattr,
++ .removexattr = unionfs_removexattr,
++ .listxattr = unionfs_listxattr,
++#endif
++};
++
++struct inode_operations unionfs_main_iops = {
++ .permission = unionfs_permission,
++ .setattr = unionfs_setattr,
++#ifdef CONFIG_UNION_FS_XATTR
++ .setxattr = unionfs_setxattr,
++ .getxattr = unionfs_getxattr,
++ .removexattr = unionfs_removexattr,
++ .listxattr = unionfs_listxattr,
++#endif
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/lookup.c linux-2.6.22-591/fs/unionfs/lookup.c
+--- linux-2.6.22-570/fs/unionfs/lookup.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/lookup.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,549 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* is the filename valid == !(whiteout for a file or opaque dir marker) */
++static int is_validname(const char *name)
++{
++ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
++ return 0;
++ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
++ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
++ return 0;
++ return 1;
++}
++
++/* The rest of these are utility functions for lookup. */
++static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
++{
++ int err = 0;
++ struct dentry *hidden_dentry;
++ struct dentry *wh_hidden_dentry;
++ struct inode *hidden_inode;
++ struct sioq_args args;
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ hidden_inode = hidden_dentry->d_inode;
++
++ BUG_ON(!S_ISDIR(hidden_inode->i_mode));
++
++ mutex_lock(&hidden_inode->i_mutex);
++
++ if (!permission(hidden_inode, MAY_EXEC, NULL))
++ wh_hidden_dentry =
++ lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry,
++ sizeof(UNIONFS_DIR_OPAQUE) - 1);
++ else {
++ args.is_opaque.dentry = hidden_dentry;
++ run_sioq(__is_opaque_dir, &args);
++ wh_hidden_dentry = args.ret;
++ }
++
++ mutex_unlock(&hidden_inode->i_mutex);
++
++ if (IS_ERR(wh_hidden_dentry)) {
++ err = PTR_ERR(wh_hidden_dentry);
++ goto out;
++ }
++
++ /* This is an opaque dir iff wh_hidden_dentry is positive */
++ err = !!wh_hidden_dentry->d_inode;
++
++ dput(wh_hidden_dentry);
++out:
++ return err;
++}
++
++/* main (and complex) driver function for Unionfs's lookup */
++struct dentry *unionfs_lookup_backend(struct dentry *dentry,
++ struct nameidata *nd, int lookupmode)
++{
++ int err = 0;
++ struct dentry *hidden_dentry = NULL;
++ struct dentry *wh_hidden_dentry = NULL;
++ struct dentry *hidden_dir_dentry = NULL;
++ struct dentry *parent_dentry = NULL;
++ int bindex, bstart, bend, bopaque;
++ int dentry_count = 0; /* Number of positive dentries. */
++ int first_dentry_offset = -1; /* -1 is uninitialized */
++ struct dentry *first_dentry = NULL;
++ struct dentry *first_hidden_dentry = NULL;
++ struct vfsmount *first_hidden_mnt = NULL;
++ int locked_parent = 0;
++ int locked_child = 0;
++ int allocated_new_info = 0;
++
++ int opaque;
++ char *whname = NULL;
++ const char *name;
++ int namelen;
++
++ /*
++ * We should already have a lock on this dentry in the case of a
++ * partial lookup, or a revalidation. Otherwise it is returned from
++ * new_dentry_private_data already locked.
++ */
++ if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
++ lookupmode == INTERPOSE_REVAL_NEG)
++ verify_locked(dentry);
++ else {
++ BUG_ON(UNIONFS_D(dentry) != NULL);
++ locked_child = 1;
++ }
++
++ switch(lookupmode) {
++ case INTERPOSE_PARTIAL:
++ break;
++ case INTERPOSE_LOOKUP:
++ if ((err = new_dentry_private_data(dentry)))
++ goto out;
++ allocated_new_info = 1;
++ break;
++ default:
++ if ((err = realloc_dentry_private_data(dentry)))
++ goto out;
++ allocated_new_info = 1;
++ break;
++ }
++
++ /* must initialize dentry operations */
++ dentry->d_op = &unionfs_dops;
++
++ parent_dentry = dget_parent(dentry);
++ /* We never partial lookup the root directory. */
++ if (parent_dentry != dentry) {
++ unionfs_lock_dentry(parent_dentry);
++ locked_parent = 1;
++ } else {
++ dput(parent_dentry);
++ parent_dentry = NULL;
++ goto out;
++ }
++
++ name = dentry->d_name.name;
++ namelen = dentry->d_name.len;
++
++ /* No dentries should get created for possible whiteout names. */
++ if (!is_validname(name)) {
++ err = -EPERM;
++ goto out_free;
++ }
++
++ /* Now start the actual lookup procedure. */
++ bstart = dbstart(parent_dentry);
++ bend = dbend(parent_dentry);
++ bopaque = dbopaque(parent_dentry);
++ BUG_ON(bstart < 0);
++
++ /*
++ * It would be ideal if we could convert partial lookups to only have
++ * to do this work when they really need to. It could probably improve
++ * performance quite a bit, and maybe simplify the rest of the code.
++ */
++ if (lookupmode == INTERPOSE_PARTIAL) {
++ bstart++;
++ if ((bopaque != -1) && (bopaque < bend))
++ bend = bopaque;
++ }
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (lookupmode == INTERPOSE_PARTIAL && hidden_dentry)
++ continue;
++ BUG_ON(hidden_dentry != NULL);
++
++ hidden_dir_dentry =
++ unionfs_lower_dentry_idx(parent_dentry, bindex);
++
++ /* if the parent hidden dentry does not exist skip this */
++ if (!(hidden_dir_dentry && hidden_dir_dentry->d_inode))
++ continue;
++
++ /* also skip it if the parent isn't a directory. */
++ if (!S_ISDIR(hidden_dir_dentry->d_inode->i_mode))
++ continue;
++
++ /* Reuse the whiteout name because its value doesn't change. */
++ if (!whname) {
++ whname = alloc_whname(name, namelen);
++ if (IS_ERR(whname)) {
++ err = PTR_ERR(whname);
++ goto out_free;
++ }
++ }
++
++ /* check if whiteout exists in this branch: lookup .wh.foo */
++ wh_hidden_dentry = lookup_one_len(whname, hidden_dir_dentry,
++ namelen + UNIONFS_WHLEN);
++ if (IS_ERR(wh_hidden_dentry)) {
++ dput(first_hidden_dentry);
++ unionfs_mntput(first_dentry, first_dentry_offset);
++ err = PTR_ERR(wh_hidden_dentry);
++ goto out_free;
++ }
++
++ if (wh_hidden_dentry->d_inode) {
++ /* We found a whiteout so lets give up. */
++ if (S_ISREG(wh_hidden_dentry->d_inode->i_mode)) {
++ set_dbend(dentry, bindex);
++ set_dbopaque(dentry, bindex);
++ dput(wh_hidden_dentry);
++ break;
++ }
++ err = -EIO;
++ printk(KERN_NOTICE "unionfs: EIO: invalid whiteout "
++ "entry type %d.\n",
++ wh_hidden_dentry->d_inode->i_mode);
++ dput(wh_hidden_dentry);
++ dput(first_hidden_dentry);
++ unionfs_mntput(first_dentry, first_dentry_offset);
++ goto out_free;
++ }
++
++ dput(wh_hidden_dentry);
++ wh_hidden_dentry = NULL;
++
++ /* Now do regular lookup; lookup foo */
++ nd->dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ /* FIXME: fix following line for mount point crossing */
++ nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
++
++ hidden_dentry = lookup_one_len_nd(name, hidden_dir_dentry,
++ namelen, nd);
++ if (IS_ERR(hidden_dentry)) {
++ dput(first_hidden_dentry);
++ unionfs_mntput(first_dentry, first_dentry_offset);
++ err = PTR_ERR(hidden_dentry);
++ goto out_free;
++ }
++
++ /*
++ * Store the first negative dentry specially, because if they
++ * are all negative we need this for future creates.
++ */
++ if (!hidden_dentry->d_inode) {
++ if (!first_hidden_dentry && (dbstart(dentry) == -1)) {
++ first_hidden_dentry = hidden_dentry;
++ /*
++ * FIXME: following line needs to be changed
++ * to allow mount-point crossing
++ */
++ first_dentry = parent_dentry;
++ first_hidden_mnt =
++ unionfs_mntget(parent_dentry, bindex);
++ first_dentry_offset = bindex;
++ } else
++ dput(hidden_dentry);
++
++ continue;
++ }
++
++ /* number of positive dentries */
++ dentry_count++;
++
++ /* store underlying dentry */
++ if (dbstart(dentry) == -1)
++ set_dbstart(dentry, bindex);
++ unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry);
++ /*
++ * FIXME: the following line needs to get fixed to allow
++ * mount-point crossing
++ */
++ unionfs_set_lower_mnt_idx(dentry, bindex,
++ unionfs_mntget(parent_dentry,
++ bindex));
++ set_dbend(dentry, bindex);
++
++ /* update parent directory's atime with the bindex */
++ fsstack_copy_attr_atime(parent_dentry->d_inode,
++ hidden_dir_dentry->d_inode);
++
++ /* We terminate file lookups here. */
++ if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) {
++ if (lookupmode == INTERPOSE_PARTIAL)
++ continue;
++ if (dentry_count == 1)
++ goto out_positive;
++ /* This can only happen with mixed D-*-F-* */
++ BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
++ d_inode->i_mode));
++ continue;
++ }
++
++ opaque = is_opaque_dir(dentry, bindex);
++ if (opaque < 0) {
++ dput(first_hidden_dentry);
++ unionfs_mntput(first_dentry, first_dentry_offset);
++ err = opaque;
++ goto out_free;
++ } else if (opaque) {
++ set_dbend(dentry, bindex);
++ set_dbopaque(dentry, bindex);
++ break;
++ }
++ }
++
++ if (dentry_count)
++ goto out_positive;
++ else
++ goto out_negative;
++
++out_negative:
++ if (lookupmode == INTERPOSE_PARTIAL)
++ goto out;
++
++ /* If we've only got negative dentries, then use the leftmost one. */
++ if (lookupmode == INTERPOSE_REVAL) {
++ if (dentry->d_inode)
++ UNIONFS_I(dentry->d_inode)->stale = 1;
++
++ goto out;
++ }
++ /* This should only happen if we found a whiteout. */
++ if (first_dentry_offset == -1) {
++ nd->dentry = dentry;
++ /* FIXME: fix following line for mount point crossing */
++ nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
++
++ first_hidden_dentry =
++ lookup_one_len_nd(name, hidden_dir_dentry,
++ namelen, nd);
++ first_dentry_offset = bindex;
++ if (IS_ERR(first_hidden_dentry)) {
++ err = PTR_ERR(first_hidden_dentry);
++ goto out;
++ }
++
++ /*
++ * FIXME: the following line needs to be changed to allow
++ * mount-point crossing
++ */
++ first_dentry = dentry;
++ first_hidden_mnt = unionfs_mntget(dentry, bindex);
++ }
++ unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
++ first_hidden_dentry);
++ unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
++ first_hidden_mnt);
++ set_dbstart(dentry, first_dentry_offset);
++ set_dbend(dentry, first_dentry_offset);
++
++ if (lookupmode == INTERPOSE_REVAL_NEG)
++ BUG_ON(dentry->d_inode != NULL);
++ else
++ d_add(dentry, NULL);
++ goto out;
++
++/* This part of the code is for positive dentries. */
++out_positive:
++ BUG_ON(dentry_count <= 0);
++
++ /*
++ * If we're holding onto the first negative dentry & corresponding
++ * vfsmount - throw it out.
++ */
++ dput(first_hidden_dentry);
++ unionfs_mntput(first_dentry, first_dentry_offset);
++
++ /* Partial lookups need to re-interpose, or throw away older negs. */
++ if (lookupmode == INTERPOSE_PARTIAL) {
++ if (dentry->d_inode) {
++ unionfs_reinterpose(dentry);
++ goto out;
++ }
++
++ /*
++ * This somehow turned positive, so it is as if we had a
++ * negative revalidation.
++ */
++ lookupmode = INTERPOSE_REVAL_NEG;
++
++ update_bstart(dentry);
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ }
++
++ err = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
++ if (err)
++ goto out_drop;
++
++ goto out;
++
++out_drop:
++ d_drop(dentry);
++
++out_free:
++ /* should dput all the underlying dentries on error condition */
++ bstart = dbstart(dentry);
++ if (bstart >= 0) {
++ bend = dbend(dentry);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ dput(unionfs_lower_dentry_idx(dentry, bindex));
++ unionfs_mntput(dentry, bindex);
++ }
++ }
++ kfree(UNIONFS_D(dentry)->lower_paths);
++ UNIONFS_D(dentry)->lower_paths = NULL;
++ set_dbstart(dentry, -1);
++ set_dbend(dentry, -1);
++
++out:
++ if (!err && UNIONFS_D(dentry)) {
++ BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
++ BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
++ BUG_ON(dbstart(dentry) < 0);
++ }
++ kfree(whname);
++ if (locked_parent)
++ unionfs_unlock_dentry(parent_dentry);
++ dput(parent_dentry);
++ if (locked_child || (err && allocated_new_info))
++ unionfs_unlock_dentry(dentry);
++ return ERR_PTR(err);
++}
++
++/* This is a utility function that fills in a unionfs dentry */
++int unionfs_partial_lookup(struct dentry *dentry)
++{
++ struct dentry *tmp;
++ struct nameidata nd = { .flags = 0 };
++
++ tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
++ if (!tmp)
++ return 0;
++ if (IS_ERR(tmp))
++ return PTR_ERR(tmp);
++ /* need to change the interface */
++ BUG_ON(tmp != dentry);
++ return -ENOSYS;
++}
++
++/* The dentry cache is just so we have properly sized dentries. */
++static struct kmem_cache *unionfs_dentry_cachep;
++int unionfs_init_dentry_cache(void)
++{
++ unionfs_dentry_cachep =
++ kmem_cache_create("unionfs_dentry",
++ sizeof(struct unionfs_dentry_info),
++ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
++
++ return (unionfs_dentry_cachep ? 0 : -ENOMEM);
++}
++
++void unionfs_destroy_dentry_cache(void)
++{
++ if (unionfs_dentry_cachep)
++ kmem_cache_destroy(unionfs_dentry_cachep);
++}
++
++void free_dentry_private_data(struct dentry *dentry)
++{
++ if (!dentry || !dentry->d_fsdata)
++ return;
++ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
++ dentry->d_fsdata = NULL;
++}
++
++static inline int __realloc_dentry_private_data(struct dentry *dentry)
++{
++ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
++ void *p;
++ int size;
++
++ BUG_ON(!info);
++
++ size = sizeof(struct path) * sbmax(dentry->d_sb);
++ p = krealloc(info->lower_paths, size, GFP_ATOMIC);
++ if (!p)
++ return -ENOMEM;
++
++ info->lower_paths = p;
++
++ info->bstart = -1;
++ info->bend = -1;
++ info->bopaque = -1;
++ info->bcount = sbmax(dentry->d_sb);
++ atomic_set(&info->generation,
++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
++
++ memset(info->lower_paths, 0, size);
++
++ return 0;
++}
++
++/* UNIONFS_D(dentry)->lock must be locked */
++int realloc_dentry_private_data(struct dentry *dentry)
++{
++ if (!__realloc_dentry_private_data(dentry))
++ return 0;
++
++ kfree(UNIONFS_D(dentry)->lower_paths);
++ free_dentry_private_data(dentry);
++ return -ENOMEM;
++}
++
++/* allocate new dentry private data */
++int new_dentry_private_data(struct dentry *dentry)
++{
++ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
++
++ BUG_ON(info);
++
++ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
++ if (!info)
++ return -ENOMEM;
++
++ mutex_init(&info->lock);
++ mutex_lock(&info->lock);
++
++ info->lower_paths = NULL;
++
++ dentry->d_fsdata = info;
++
++ if (!__realloc_dentry_private_data(dentry))
++ return 0;
++
++ mutex_unlock(&info->lock);
++ free_dentry_private_data(dentry);
++ return -ENOMEM;
++}
++
++/*
++ * scan through the lower dentry objects, and set bstart to reflect the
++ * starting branch
++ */
++void update_bstart(struct dentry *dentry)
++{
++ int bindex;
++ int bstart = dbstart(dentry);
++ int bend = dbend(dentry);
++ struct dentry *hidden_dentry;
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++ if (hidden_dentry->d_inode) {
++ set_dbstart(dentry, bindex);
++ break;
++ }
++ dput(hidden_dentry);
++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++ }
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/main.c linux-2.6.22-591/fs/unionfs/main.c
+--- linux-2.6.22-570/fs/unionfs/main.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/main.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,729 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++
++/*
++ * Connect a unionfs inode dentry/inode with several lower ones. This is
++ * the classic stackable file system "vnode interposition" action.
++ *
++ * @sb: unionfs's super_block
++ */
++int unionfs_interpose(struct dentry *dentry, struct super_block *sb, int flag)
++{
++ struct inode *hidden_inode;
++ struct dentry *hidden_dentry;
++ int err = 0;
++ struct inode *inode;
++ int is_negative_dentry = 1;
++ int bindex, bstart, bend;
++
++ verify_locked(dentry);
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++
++ /* Make sure that we didn't get a negative dentry. */
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ if (unionfs_lower_dentry_idx(dentry, bindex) &&
++ unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
++ is_negative_dentry = 0;
++ break;
++ }
++ }
++ BUG_ON(is_negative_dentry);
++
++ /*
++ * We allocate our new inode below, by calling iget.
++ * iget will call our read_inode which will initialize some
++ * of the new inode's fields
++ */
++
++ /*
++ * On revalidate we've already got our own inode and just need
++ * to fix it up.
++ */
++ if (flag == INTERPOSE_REVAL) {
++ inode = dentry->d_inode;
++ UNIONFS_I(inode)->bstart = -1;
++ UNIONFS_I(inode)->bend = -1;
++ atomic_set(&UNIONFS_I(inode)->generation,
++ atomic_read(&UNIONFS_SB(sb)->generation));
++
++ UNIONFS_I(inode)->lower_inodes =
++ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
++ if (!UNIONFS_I(inode)->lower_inodes) {
++ err = -ENOMEM;
++ goto out;
++ }
++ } else {
++ /* get unique inode number for unionfs */
++ inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO));
++ if (!inode) {
++ err = -EACCES;
++ goto out;
++ }
++
++ if (atomic_read(&inode->i_count) > 1)
++ goto skip;
++ }
++
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry) {
++ unionfs_set_lower_inode_idx(inode, bindex, NULL);
++ continue;
++ }
++
++ /* Initialize the hidden inode to the new hidden inode. */
++ if (!hidden_dentry->d_inode)
++ continue;
++
++ unionfs_set_lower_inode_idx(inode, bindex,
++ igrab(hidden_dentry->d_inode));
++ }
++
++ ibstart(inode) = dbstart(dentry);
++ ibend(inode) = dbend(dentry);
++
++ /* Use attributes from the first branch. */
++ hidden_inode = unionfs_lower_inode(inode);
++
++ /* Use different set of inode ops for symlinks & directories */
++ if (S_ISLNK(hidden_inode->i_mode))
++ inode->i_op = &unionfs_symlink_iops;
++ else if (S_ISDIR(hidden_inode->i_mode))
++ inode->i_op = &unionfs_dir_iops;
++
++ /* Use different set of file ops for directories */
++ if (S_ISDIR(hidden_inode->i_mode))
++ inode->i_fop = &unionfs_dir_fops;
++
++ /* properly initialize special inodes */
++ if (S_ISBLK(hidden_inode->i_mode) || S_ISCHR(hidden_inode->i_mode) ||
++ S_ISFIFO(hidden_inode->i_mode) || S_ISSOCK(hidden_inode->i_mode))
++ init_special_inode(inode, hidden_inode->i_mode,
++ hidden_inode->i_rdev);
++
++ /* all well, copy inode attributes */
++ fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
++ fsstack_copy_inode_size(inode, hidden_inode);
++
++skip:
++ /* only (our) lookup wants to do a d_add */
++ switch (flag) {
++ case INTERPOSE_DEFAULT:
++ case INTERPOSE_REVAL_NEG:
++ d_instantiate(dentry, inode);
++ break;
++ case INTERPOSE_LOOKUP:
++ err = PTR_ERR(d_splice_alias(inode, dentry));
++ break;
++ case INTERPOSE_REVAL:
++ /* Do nothing. */
++ break;
++ default:
++ printk(KERN_ERR "unionfs: invalid interpose flag passed!");
++ BUG();
++ }
++
++out:
++ return err;
++}
++
++/* like interpose above, but for an already existing dentry */
++void unionfs_reinterpose(struct dentry *dentry)
++{
++ struct dentry *hidden_dentry;
++ struct inode *inode;
++ int bindex, bstart, bend;
++
++ verify_locked(dentry);
++
++ /* This is pre-allocated inode */
++ inode = dentry->d_inode;
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ continue;
++
++ if (!hidden_dentry->d_inode)
++ continue;
++ if (unionfs_lower_inode_idx(inode, bindex))
++ continue;
++ unionfs_set_lower_inode_idx(inode, bindex,
++ igrab(hidden_dentry->d_inode));
++ }
++ ibstart(inode) = dbstart(dentry);
++ ibend(inode) = dbend(dentry);
++}
++
++/*
++ * make sure the branch we just looked up (nd) makes sense:
++ *
++ * 1) we're not trying to stack unionfs on top of unionfs
++ * 2) it exists
++ * 3) is a directory
++ */
++int check_branch(struct nameidata *nd)
++{
++ if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs"))
++ return -EINVAL;
++ if (!nd->dentry->d_inode)
++ return -ENOENT;
++ if (!S_ISDIR(nd->dentry->d_inode->i_mode))
++ return -ENOTDIR;
++ return 0;
++}
++
++/* checks if two hidden_dentries have overlapping branches */
++static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
++{
++ struct dentry *dent = NULL;
++
++ dent = dent1;
++ while ((dent != dent2) && (dent->d_parent != dent))
++ dent = dent->d_parent;
++
++ if (dent == dent2)
++ return 1;
++
++ dent = dent2;
++ while ((dent != dent1) && (dent->d_parent != dent))
++ dent = dent->d_parent;
++
++ return (dent == dent1);
++}
++
++/*
++ * Parse branch mode helper function
++ */
++int __parse_branch_mode(const char *name)
++{
++ if (!name)
++ return 0;
++ if (!strcmp(name, "ro"))
++ return MAY_READ;
++ if (!strcmp(name, "rw"))
++ return (MAY_READ | MAY_WRITE);
++ return 0;
++}
++
++/*
++ * Parse "ro" or "rw" options, but default to "rw" of no mode options
++ * was specified.
++ */
++int parse_branch_mode(const char *name)
++{
++ int perms = __parse_branch_mode(name);
++
++ if (perms == 0)
++ perms = MAY_READ | MAY_WRITE;
++ return perms;
++}
++
++/*
++ * parse the dirs= mount argument
++ *
++ * We don't need to lock the superblock private data's rwsem, as we get
++ * called only by unionfs_read_super - it is still a long time before anyone
++ * can even get a reference to us.
++ */
++static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
++ *hidden_root_info, char *options)
++{
++ struct nameidata nd;
++ char *name;
++ int err = 0;
++ int branches = 1;
++ int bindex = 0;
++ int i = 0;
++ int j = 0;
++
++ struct dentry *dent1;
++ struct dentry *dent2;
++
++ if (options[0] == '\0') {
++ printk(KERN_WARNING "unionfs: no branches specified\n");
++ err = -EINVAL;
++ goto out;
++ }
++
++ /*
++ * Each colon means we have a separator, this is really just a rough
++ * guess, since strsep will handle empty fields for us.
++ */
++ for (i = 0; options[i]; i++)
++ if (options[i] == ':')
++ branches++;
++
++ /* allocate space for underlying pointers to hidden dentry */
++ UNIONFS_SB(sb)->data =
++ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
++ if (!UNIONFS_SB(sb)->data) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ hidden_root_info->lower_paths =
++ kcalloc(branches, sizeof(struct path), GFP_KERNEL);
++ if (!hidden_root_info->lower_paths) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
++ branches = 0;
++ while ((name = strsep(&options, ":")) != NULL) {
++ int perms;
++ char *mode = strchr(name, '=');
++
++ if (!name || !*name)
++ continue;
++
++ branches++;
++
++ /* strip off '=' if any */
++ if (mode)
++ *mode++ = '\0';
++
++ perms = parse_branch_mode(mode);
++ if (!bindex && !(perms & MAY_WRITE)) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ err = path_lookup(name, LOOKUP_FOLLOW, &nd);
++ if (err) {
++ printk(KERN_WARNING "unionfs: error accessing "
++ "hidden directory '%s' (error %d)\n",
++ name, err);
++ goto out;
++ }
++
++ if ((err = check_branch(&nd))) {
++ printk(KERN_WARNING "unionfs: hidden directory "
++ "'%s' is not a valid branch\n", name);
++ path_release(&nd);
++ goto out;
++ }
++
++ hidden_root_info->lower_paths[bindex].dentry = nd.dentry;
++ hidden_root_info->lower_paths[bindex].mnt = nd.mnt;
++
++ set_branchperms(sb, bindex, perms);
++ set_branch_count(sb, bindex, 0);
++ new_branch_id(sb, bindex);
++
++ if (hidden_root_info->bstart < 0)
++ hidden_root_info->bstart = bindex;
++ hidden_root_info->bend = bindex;
++ bindex++;
++ }
++
++ if (branches == 0) {
++ printk(KERN_WARNING "unionfs: no branches specified\n");
++ err = -EINVAL;
++ goto out;
++ }
++
++ BUG_ON(branches != (hidden_root_info->bend + 1));
++
++ /*
++ * Ensure that no overlaps exist in the branches.
++ *
++ * This test is required because the Linux kernel has no support
++ * currently for ensuring coherency between stackable layers and
++ * branches. If we were to allow overlapping branches, it would be
++ * possible, for example, to delete a file via one branch, which
++ * would not be reflected in another branch. Such incoherency could
++ * lead to inconsistencies and even kernel oopses. Rather than
++ * implement hacks to work around some of these cache-coherency
++ * problems, we prevent branch overlapping, for now. A complete
++ * solution will involve proper kernel/VFS support for cache
++ * coherency, at which time we could safely remove this
++ * branch-overlapping test.
++ */
++ for (i = 0; i < branches; i++) {
++ for (j = i + 1; j < branches; j++) {
++ dent1 = hidden_root_info->lower_paths[i].dentry;
++ dent2 = hidden_root_info->lower_paths[j].dentry;
++
++ if (is_branch_overlap(dent1, dent2)) {
++ printk(KERN_WARNING "unionfs: branches %d and "
++ "%d overlap\n", i, j);
++ err = -EINVAL;
++ goto out;
++ }
++ }
++ }
++
++out:
++ if (err) {
++ for (i = 0; i < branches; i++)
++ if (hidden_root_info->lower_paths[i].dentry) {
++ dput(hidden_root_info->lower_paths[i].dentry);
++ /* initialize: can't use unionfs_mntput here */
++ mntput(hidden_root_info->lower_paths[i].mnt);
++ }
++
++ kfree(hidden_root_info->lower_paths);
++ kfree(UNIONFS_SB(sb)->data);
++
++ /*
++ * MUST clear the pointers to prevent potential double free if
++ * the caller dies later on
++ */
++ hidden_root_info->lower_paths = NULL;
++ UNIONFS_SB(sb)->data = NULL;
++ }
++ return err;
++}
++
++/*
++ * Parse mount options. See the manual page for usage instructions.
++ *
++ * Returns the dentry object of the lower-level (hidden) directory;
++ * We want to mount our stackable file system on top of that hidden directory.
++ */
++static struct unionfs_dentry_info *unionfs_parse_options(
++ struct super_block *sb,
++ char *options)
++{
++ struct unionfs_dentry_info *hidden_root_info;
++ char *optname;
++ int err = 0;
++ int bindex;
++ int dirsfound = 0;
++
++ /* allocate private data area */
++ err = -ENOMEM;
++ hidden_root_info =
++ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
++ if (!hidden_root_info)
++ goto out_error;
++ hidden_root_info->bstart = -1;
++ hidden_root_info->bend = -1;
++ hidden_root_info->bopaque = -1;
++
++ while ((optname = strsep(&options, ",")) != NULL) {
++ char *optarg;
++ char *endptr;
++ int intval;
++
++ if (!optname || !*optname)
++ continue;
++
++ optarg = strchr(optname, '=');
++ if (optarg)
++ *optarg++ = '\0';
++
++ /*
++ * All of our options take an argument now. Insert ones that
++ * don't, above this check.
++ */
++ if (!optarg) {
++ printk("unionfs: %s requires an argument.\n", optname);
++ err = -EINVAL;
++ goto out_error;
++ }
++
++ if (!strcmp("dirs", optname)) {
++ if (++dirsfound > 1) {
++ printk(KERN_WARNING
++ "unionfs: multiple dirs specified\n");
++ err = -EINVAL;
++ goto out_error;
++ }
++ err = parse_dirs_option(sb, hidden_root_info, optarg);
++ if (err)
++ goto out_error;
++ continue;
++ }
++
++ /* All of these options require an integer argument. */
++ intval = simple_strtoul(optarg, &endptr, 0);
++ if (*endptr) {
++ printk(KERN_WARNING
++ "unionfs: invalid %s option '%s'\n",
++ optname, optarg);
++ err = -EINVAL;
++ goto out_error;
++ }
++
++ err = -EINVAL;
++ printk(KERN_WARNING
++ "unionfs: unrecognized option '%s'\n", optname);
++ goto out_error;
++ }
++ if (dirsfound != 1) {
++ printk(KERN_WARNING "unionfs: dirs option required\n");
++ err = -EINVAL;
++ goto out_error;
++ }
++ goto out;
++
++out_error:
++ if (hidden_root_info && hidden_root_info->lower_paths) {
++ for (bindex = hidden_root_info->bstart;
++ bindex >= 0 && bindex <= hidden_root_info->bend;
++ bindex++) {
++ struct dentry *d;
++ struct vfsmount *m;
++
++ d = hidden_root_info->lower_paths[bindex].dentry;
++ m = hidden_root_info->lower_paths[bindex].mnt;
++
++ dput(d);
++ /* initializing: can't use unionfs_mntput here */
++ mntput(m);
++ }
++ }
++
++ kfree(hidden_root_info->lower_paths);
++ kfree(hidden_root_info);
++
++ kfree(UNIONFS_SB(sb)->data);
++ UNIONFS_SB(sb)->data = NULL;
++
++ hidden_root_info = ERR_PTR(err);
++out:
++ return hidden_root_info;
++}
++
++/*
++ * our custom d_alloc_root work-alike
++ *
++ * we can't use d_alloc_root if we want to use our own interpose function
++ * unchanged, so we simply call our own "fake" d_alloc_root
++ */
++static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
++{
++ struct dentry *ret = NULL;
++
++ if (sb) {
++ static const struct qstr name = {.name = "/",.len = 1 };
++
++ ret = d_alloc(NULL, &name);
++ if (ret) {
++ ret->d_op = &unionfs_dops;
++ ret->d_sb = sb;
++ ret->d_parent = ret;
++ }
++ }
++ return ret;
++}
++
++/*
++ * There is no need to lock the unionfs_super_info's rwsem as there is no
++ * way anyone can have a reference to the superblock at this point in time.
++ */
++static int unionfs_read_super(struct super_block *sb, void *raw_data,
++ int silent)
++{
++ int err = 0;
++
++ struct unionfs_dentry_info *hidden_root_info = NULL;
++ int bindex, bstart, bend;
++
++ if (!raw_data) {
++ printk(KERN_WARNING
++ "unionfs: read_super: missing data argument\n");
++ err = -EINVAL;
++ goto out;
++ }
++
++ /* Allocate superblock private data */
++ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
++ if (!UNIONFS_SB(sb)) {
++ printk(KERN_WARNING "unionfs: read_super: out of memory\n");
++ err = -ENOMEM;
++ goto out;
++ }
++
++ UNIONFS_SB(sb)->bend = -1;
++ atomic_set(&UNIONFS_SB(sb)->generation, 1);
++ init_rwsem(&UNIONFS_SB(sb)->rwsem);
++ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
++
++ hidden_root_info = unionfs_parse_options(sb, raw_data);
++ if (IS_ERR(hidden_root_info)) {
++ printk(KERN_WARNING
++ "unionfs: read_super: error while parsing options "
++ "(err = %ld)\n", PTR_ERR(hidden_root_info));
++ err = PTR_ERR(hidden_root_info);
++ hidden_root_info = NULL;
++ goto out_free;
++ }
++ if (hidden_root_info->bstart == -1) {
++ err = -ENOENT;
++ goto out_free;
++ }
++
++ /* set the hidden superblock field of upper superblock */
++ bstart = hidden_root_info->bstart;
++ BUG_ON(bstart != 0);
++ sbend(sb) = bend = hidden_root_info->bend;
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ struct dentry *d = hidden_root_info->lower_paths[bindex].dentry;
++ unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
++ }
++
++ /* max Bytes is the maximum bytes from highest priority branch */
++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
++
++ sb->s_op = &unionfs_sops;
++
++ /* See comment next to the definition of unionfs_d_alloc_root */
++ sb->s_root = unionfs_d_alloc_root(sb);
++ if (!sb->s_root) {
++ err = -ENOMEM;
++ goto out_dput;
++ }
++
++ /* link the upper and lower dentries */
++ sb->s_root->d_fsdata = NULL;
++ if ((err = new_dentry_private_data(sb->s_root)))
++ goto out_freedpd;
++
++ /* Set the hidden dentries for s_root */
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ struct dentry *d;
++ struct vfsmount *m;
++
++ d = hidden_root_info->lower_paths[bindex].dentry;
++ m = hidden_root_info->lower_paths[bindex].mnt;
++
++ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
++ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
++ }
++ set_dbstart(sb->s_root, bstart);
++ set_dbend(sb->s_root, bend);
++
++ /* Set the generation number to one, since this is for the mount. */
++ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
++
++ /* call interpose to create the upper level inode */
++ err = unionfs_interpose(sb->s_root, sb, 0);
++ unionfs_unlock_dentry(sb->s_root);
++ if (!err)
++ goto out;
++ /* else fall through */
++
++out_freedpd:
++ if (UNIONFS_D(sb->s_root)) {
++ kfree(UNIONFS_D(sb->s_root)->lower_paths);
++ free_dentry_private_data(sb->s_root);
++ }
++ dput(sb->s_root);
++
++out_dput:
++ if (hidden_root_info && !IS_ERR(hidden_root_info)) {
++ for (bindex = hidden_root_info->bstart;
++ bindex <= hidden_root_info->bend; bindex++) {
++ struct dentry *d;
++ struct vfsmount *m;
++
++ d = hidden_root_info->lower_paths[bindex].dentry;
++ m = hidden_root_info->lower_paths[bindex].mnt;
++
++ dput(d);
++ /* initializing: can't use unionfs_mntput here */
++ mntput(m);
++ }
++ kfree(hidden_root_info->lower_paths);
++ kfree(hidden_root_info);
++ hidden_root_info = NULL;
++ }
++
++out_free:
++ kfree(UNIONFS_SB(sb)->data);
++ kfree(UNIONFS_SB(sb));
++ sb->s_fs_info = NULL;
++
++out:
++ if (hidden_root_info && !IS_ERR(hidden_root_info)) {
++ kfree(hidden_root_info->lower_paths);
++ kfree(hidden_root_info);
++ }
++ return err;
++}
++
++static int unionfs_get_sb(struct file_system_type *fs_type,
++ int flags, const char *dev_name,
++ void *raw_data, struct vfsmount *mnt)
++{
++ return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
++}
++
++static struct file_system_type unionfs_fs_type = {
++ .owner = THIS_MODULE,
++ .name = "unionfs",
++ .get_sb = unionfs_get_sb,
++ .kill_sb = generic_shutdown_super,
++ .fs_flags = FS_REVAL_DOT,
++};
++
++static int __init init_unionfs_fs(void)
++{
++ int err;
++
++ printk("Registering unionfs " UNIONFS_VERSION "\n");
++
++ if ((err = unionfs_init_filldir_cache()))
++ goto out;
++ if ((err = unionfs_init_inode_cache()))
++ goto out;
++ if ((err = unionfs_init_dentry_cache()))
++ goto out;
++ if ((err = init_sioq()))
++ goto out;
++ err = register_filesystem(&unionfs_fs_type);
++out:
++ if (err) {
++ stop_sioq();
++ unionfs_destroy_filldir_cache();
++ unionfs_destroy_inode_cache();
++ unionfs_destroy_dentry_cache();
++ }
++ return err;
++}
++
++static void __exit exit_unionfs_fs(void)
++{
++ stop_sioq();
++ unionfs_destroy_filldir_cache();
++ unionfs_destroy_inode_cache();
++ unionfs_destroy_dentry_cache();
++ unregister_filesystem(&unionfs_fs_type);
++ printk("Completed unionfs module unload.\n");
++}
++
++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
++ " (http://www.fsl.cs.sunysb.edu)");
++MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
++ " (http://unionfs.filesystems.org)");
++MODULE_LICENSE("GPL");
++
++module_init(init_unionfs_fs);
++module_exit(exit_unionfs_fs);
+diff -Nurb linux-2.6.22-570/fs/unionfs/mmap.c linux-2.6.22-591/fs/unionfs/mmap.c
+--- linux-2.6.22-570/fs/unionfs/mmap.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/mmap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,348 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2006 Shaya Potter
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of State University of New York
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Unionfs doesn't implement ->writepages, which is OK with the VFS and
++ * nkeeps our code simpler and smaller. Nevertheless, somehow, our own
++ * ->writepage must be called so we can sync the upper pages with the lower
++ * pages: otherwise data changed at the upper layer won't get written to the
++ * lower layer.
++ *
++ * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
++ * only, which in turn will call generic_writepages and invoke each of the
++ * lower file system's ->writepage. NFS in particular uses the
++ * wbc->fs_private field in its nfs_writepage, which is set in its
++ * nfs_writepages. So if we don't call the lower nfs_writepages first, then
++ * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
++ * OOPS. If, however, we implement a unionfs_writepages and then we do call
++ * the lower nfs_writepages, then we "lose control" over the pages we're
++ * trying to write to the lower file system: we won't be writing our own
++ * new/modified data from the upper pages to the lower pages, and any
++ * mmap-based changes are lost.
++ *
++ * This is a fundamental cache-coherency problem in Linux. The kernel isn't
++ * able to support such stacking abstractions cleanly. One possible clean
++ * way would be that a lower file system's ->writepage method have some sort
++ * of a callback to validate if any upper pages for the same file+offset
++ * exist and have newer content in them.
++ *
++ * This whole NULL ptr dereference is triggered at the lower file system
++ * (NFS) because the wbc->for_writepages is set to 1. Therefore, to avoid
++ * this NULL pointer dereference, we set this flag to 0 and restore it upon
++ * exit. This probably means that we're slightly less efficient in writing
++ * pages out, doing them one at a time, but at least we avoid the oops until
++ * such day as Linux can better support address_space_ops in a stackable
++ * fashion.
++ */
++int unionfs_writepage(struct page *page, struct writeback_control *wbc)
++{
++ int err = -EIO;
++ struct inode *inode;
++ struct inode *lower_inode;
++ struct page *lower_page;
++ char *kaddr, *lower_kaddr;
++ int saved_for_writepages = wbc->for_writepages;
++
++ inode = page->mapping->host;
++ lower_inode = unionfs_lower_inode(inode);
++
++ /* find lower page (returns a locked page) */
++ lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
++ if (!lower_page)
++ goto out;
++
++ /* get page address, and encode it */
++ kaddr = kmap(page);
++ lower_kaddr = kmap(lower_page);
++
++ memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE);
++
++ kunmap(page);
++ kunmap(lower_page);
++
++ BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
++
++ /* workaround for some lower file systems: see big comment on top */
++ if (wbc->for_writepages && !wbc->fs_private)
++ wbc->for_writepages = 0;
++
++ /* call lower writepage (expects locked page) */
++ err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
++ wbc->for_writepages = saved_for_writepages; /* restore value */
++
++ /*
++ * update mtime and ctime of lower level file system
++ * unionfs' mtime and ctime are updated by generic_file_write
++ */
++ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
++
++ page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */
++
++ if (err)
++ ClearPageUptodate(page);
++ else
++ SetPageUptodate(page);
++
++out:
++ unlock_page(page);
++ return err;
++}
++
++/*
++ * readpage is called from generic_page_read and the fault handler.
++ * If your file system uses generic_page_read for the read op, it
++ * must implement readpage.
++ *
++ * Readpage expects a locked page, and must unlock it.
++ */
++static int unionfs_do_readpage(struct file *file, struct page *page)
++{
++ int err = -EIO;
++ struct dentry *dentry;
++ struct file *lower_file = NULL;
++ struct inode *inode, *lower_inode;
++ char *page_data;
++ struct page *lower_page;
++ char *lower_page_data;
++
++ dentry = file->f_dentry;
++ if (UNIONFS_F(file) == NULL) {
++ err = -ENOENT;
++ goto out_err;
++ }
++
++ lower_file = unionfs_lower_file(file);
++ inode = dentry->d_inode;
++ lower_inode = unionfs_lower_inode(inode);
++
++ lower_page = NULL;
++
++ /* find lower page (returns a locked page) */
++ lower_page = read_cache_page(lower_inode->i_mapping,
++ page->index,
++ (filler_t *) lower_inode->i_mapping->
++ a_ops->readpage, (void *)lower_file);
++
++ if (IS_ERR(lower_page)) {
++ err = PTR_ERR(lower_page);
++ lower_page = NULL;
++ goto out_release;
++ }
++
++ /*
++ * wait for the page data to show up
++ * (signaled by readpage as unlocking the page)
++ */
++ wait_on_page_locked(lower_page);
++ if (!PageUptodate(lower_page)) {
++ /*
++ * call readpage() again if we returned from wait_on_page
++ * with a page that's not up-to-date; that can happen when a
++ * partial page has a few buffers which are ok, but not the
++ * whole page.
++ */
++ lock_page(lower_page);
++ err = lower_inode->i_mapping->a_ops->readpage(lower_file,
++ lower_page);
++ if (err) {
++ lower_page = NULL;
++ goto out_release;
++ }
++
++ wait_on_page_locked(lower_page);
++ if (!PageUptodate(lower_page)) {
++ err = -EIO;
++ goto out_release;
++ }
++ }
++
++ /* map pages, get their addresses */
++ page_data = (char *)kmap(page);
++ lower_page_data = (char *)kmap(lower_page);
++
++ memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
++
++ err = 0;
++
++ kunmap(lower_page);
++ kunmap(page);
++
++out_release:
++ if (lower_page)
++ page_cache_release(lower_page); /* undo read_cache_page */
++
++ if (err == 0)
++ SetPageUptodate(page);
++ else
++ ClearPageUptodate(page);
++
++out_err:
++ return err;
++}
++
++int unionfs_readpage(struct file *file, struct page *page)
++{
++ int err;
++
++ unionfs_read_lock(file->f_dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 0)))
++ goto out;
++
++ err = unionfs_do_readpage(file, page);
++
++ if (!err)
++ touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++ unionfs_lower_dentry(file->f_path.dentry));
++
++ /*
++ * we have to unlock our page, b/c we _might_ have gotten a locked
++ * page. but we no longer have to wakeup on our page here, b/c
++ * UnlockPage does it
++ */
++out:
++ unlock_page(page);
++ unionfs_read_unlock(file->f_dentry->d_sb);
++
++ return err;
++}
++
++int unionfs_prepare_write(struct file *file, struct page *page, unsigned from,
++ unsigned to)
++{
++ int err;
++
++ unionfs_read_lock(file->f_dentry->d_sb);
++
++ err = unionfs_file_revalidate(file, 1);
++
++ unionfs_read_unlock(file->f_dentry->d_sb);
++
++ return err;
++}
++
++int unionfs_commit_write(struct file *file, struct page *page, unsigned from,
++ unsigned to)
++{
++ int err = -ENOMEM;
++ struct inode *inode, *lower_inode;
++ struct file *lower_file = NULL;
++ loff_t pos;
++ unsigned bytes = to - from;
++ char *page_data = NULL;
++ mm_segment_t old_fs;
++
++ BUG_ON(file == NULL);
++
++ unionfs_read_lock(file->f_dentry->d_sb);
++
++ if ((err = unionfs_file_revalidate(file, 1)))
++ goto out;
++
++ inode = page->mapping->host;
++ lower_inode = unionfs_lower_inode(inode);
++
++ if (UNIONFS_F(file) != NULL)
++ lower_file = unionfs_lower_file(file);
++
++ /* FIXME: is this assertion right here? */
++ BUG_ON(lower_file == NULL);
++
++ page_data = (char *)kmap(page);
++ lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from;
++
++ /* SP: I use vfs_write instead of copying page data and the
++ * prepare_write/commit_write combo because file system's like
++ * GFS/OCFS2 don't like things touching those directly,
++ * calling the underlying write op, while a little bit slower, will
++ * call all the FS specific code as well
++ */
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = vfs_write(lower_file, page_data + from, bytes,
++ &lower_file->f_pos);
++ set_fs(old_fs);
++
++ kunmap(page);
++
++ if (err < 0)
++ goto out;
++
++ inode->i_blocks = lower_inode->i_blocks;
++ /* we may have to update i_size */
++ pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
++ if (pos > i_size_read(inode))
++ i_size_write(inode, pos);
++
++ /*
++ * update mtime and ctime of lower level file system
++ * unionfs' mtime and ctime are updated by generic_file_write
++ */
++ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
++
++ mark_inode_dirty_sync(inode);
++
++out:
++ if (err < 0)
++ ClearPageUptodate(page);
++
++ unionfs_read_unlock(file->f_dentry->d_sb);
++ return err; /* assume all is ok */
++}
++
++void unionfs_sync_page(struct page *page)
++{
++ struct inode *inode;
++ struct inode *lower_inode;
++ struct page *lower_page;
++ struct address_space *mapping;
++
++ inode = page->mapping->host;
++ lower_inode = unionfs_lower_inode(inode);
++
++ /* find lower page (returns a locked page) */
++ lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
++ if (!lower_page)
++ goto out;
++
++ /* do the actual sync */
++ mapping = lower_page->mapping;
++ /*
++ * XXX: can we optimize ala RAIF and set the lower page to be
++ * discarded after a successful sync_page?
++ */
++ if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
++ mapping->a_ops->sync_page(lower_page);
++
++ unlock_page(lower_page); /* b/c grab_cache_page locked it */
++ page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */
++
++out:
++ return;
++}
++
++struct address_space_operations unionfs_aops = {
++ .writepage = unionfs_writepage,
++ .readpage = unionfs_readpage,
++ .prepare_write = unionfs_prepare_write,
++ .commit_write = unionfs_commit_write,
++ .sync_page = unionfs_sync_page,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/rdstate.c linux-2.6.22-591/fs/unionfs/rdstate.c
+--- linux-2.6.22-570/fs/unionfs/rdstate.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/rdstate.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,282 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* This file contains the routines for maintaining readdir state. */
++
++/*
++ * There are two structures here, rdstate which is a hash table
++ * of the second structure which is a filldir_node.
++ */
++
++/*
++ * This is a struct kmem_cache for filldir nodes, because we allocate a lot
++ * of them and they shouldn't waste memory. If the node has a small name
++ * (as defined by the dentry structure), then we use an inline name to
++ * preserve kmalloc space.
++ */
++static struct kmem_cache *unionfs_filldir_cachep;
++
++int unionfs_init_filldir_cache(void)
++{
++ unionfs_filldir_cachep =
++ kmem_cache_create("unionfs_filldir",
++ sizeof(struct filldir_node), 0,
++ SLAB_RECLAIM_ACCOUNT, NULL, NULL);
++
++ return (unionfs_filldir_cachep ? 0 : -ENOMEM);
++}
++
++void unionfs_destroy_filldir_cache(void)
++{
++ if (unionfs_filldir_cachep)
++ kmem_cache_destroy(unionfs_filldir_cachep);
++}
++
++/*
++ * This is a tuning parameter that tells us roughly how big to make the
++ * hash table in directory entries per page. This isn't perfect, but
++ * at least we get a hash table size that shouldn't be too overloaded.
++ * The following averages are based on my home directory.
++ * 14.44693 Overall
++ * 12.29 Single Page Directories
++ * 117.93 Multi-page directories
++ */
++#define DENTPAGE 4096
++#define DENTPERONEPAGE 12
++#define DENTPERPAGE 118
++#define MINHASHSIZE 1
++static int guesstimate_hash_size(struct inode *inode)
++{
++ struct inode *hidden_inode;
++ int bindex;
++ int hashsize = MINHASHSIZE;
++
++ if (UNIONFS_I(inode)->hashsize > 0)
++ return UNIONFS_I(inode)->hashsize;
++
++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
++ if (!(hidden_inode = unionfs_lower_inode_idx(inode, bindex)))
++ continue;
++
++ if (hidden_inode->i_size == DENTPAGE)
++ hashsize += DENTPERONEPAGE;
++ else
++ hashsize += (hidden_inode->i_size / DENTPAGE) *
++ DENTPERPAGE;
++ }
++
++ return hashsize;
++}
++
++int init_rdstate(struct file *file)
++{
++ BUG_ON(sizeof(loff_t) !=
++ (sizeof(unsigned int) + sizeof(unsigned int)));
++ BUG_ON(UNIONFS_F(file)->rdstate != NULL);
++
++ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_dentry->d_inode,
++ fbstart(file));
++
++ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
++}
++
++struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
++{
++ struct unionfs_dir_state *rdstate = NULL;
++ struct list_head *pos;
++
++ spin_lock(&UNIONFS_I(inode)->rdlock);
++ list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
++ struct unionfs_dir_state *r =
++ list_entry(pos, struct unionfs_dir_state, cache);
++ if (fpos == rdstate2offset(r)) {
++ UNIONFS_I(inode)->rdcount--;
++ list_del(&r->cache);
++ rdstate = r;
++ break;
++ }
++ }
++ spin_unlock(&UNIONFS_I(inode)->rdlock);
++ return rdstate;
++}
++
++struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
++{
++ int i = 0;
++ int hashsize;
++ unsigned long mallocsize = sizeof(struct unionfs_dir_state);
++ struct unionfs_dir_state *rdstate;
++
++ hashsize = guesstimate_hash_size(inode);
++ mallocsize += hashsize * sizeof(struct list_head);
++ mallocsize = __roundup_pow_of_two(mallocsize);
++
++ /* This should give us about 500 entries anyway. */
++ if (mallocsize > PAGE_SIZE)
++ mallocsize = PAGE_SIZE;
++
++ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
++ sizeof(struct list_head);
++
++ rdstate = kmalloc(mallocsize, GFP_KERNEL);
++ if (!rdstate)
++ return NULL;
++
++ spin_lock(&UNIONFS_I(inode)->rdlock);
++ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
++ UNIONFS_I(inode)->cookie = 1;
++ else
++ UNIONFS_I(inode)->cookie++;
++
++ rdstate->cookie = UNIONFS_I(inode)->cookie;
++ spin_unlock(&UNIONFS_I(inode)->rdlock);
++ rdstate->offset = 1;
++ rdstate->access = jiffies;
++ rdstate->bindex = bindex;
++ rdstate->dirpos = 0;
++ rdstate->hashentries = 0;
++ rdstate->size = hashsize;
++ for (i = 0; i < rdstate->size; i++)
++ INIT_LIST_HEAD(&rdstate->list[i]);
++
++ return rdstate;
++}
++
++static void free_filldir_node(struct filldir_node *node)
++{
++ if (node->namelen >= DNAME_INLINE_LEN_MIN)
++ kfree(node->name);
++ kmem_cache_free(unionfs_filldir_cachep, node);
++}
++
++void free_rdstate(struct unionfs_dir_state *state)
++{
++ struct filldir_node *tmp;
++ int i;
++
++ for (i = 0; i < state->size; i++) {
++ struct list_head *head = &(state->list[i]);
++ struct list_head *pos, *n;
++
++ /* traverse the list and deallocate space */
++ list_for_each_safe(pos, n, head) {
++ tmp = list_entry(pos, struct filldir_node, file_list);
++ list_del(&tmp->file_list);
++ free_filldir_node(tmp);
++ }
++ }
++
++ kfree(state);
++}
++
++struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
++ const char *name, int namelen)
++{
++ int index;
++ unsigned int hash;
++ struct list_head *head;
++ struct list_head *pos;
++ struct filldir_node *cursor = NULL;
++ int found = 0;
++
++ BUG_ON(namelen <= 0);
++
++ hash = full_name_hash(name, namelen);
++ index = hash % rdstate->size;
++
++ head = &(rdstate->list[index]);
++ list_for_each(pos, head) {
++ cursor = list_entry(pos, struct filldir_node, file_list);
++
++ if (cursor->namelen == namelen && cursor->hash == hash &&
++ !strncmp(cursor->name, name, namelen)) {
++ /*
++ * a duplicate exists, and hence no need to create
++ * entry to the list
++ */
++ found = 1;
++
++ /*
++ * if the duplicate is in this branch, then the file
++ * system is corrupted.
++ */
++ if (cursor->bindex == rdstate->bindex) {
++ printk(KERN_DEBUG "unionfs: filldir: possible "
++ "I/O error: a file is duplicated "
++ "in the same branch %d: %s\n",
++ rdstate->bindex, cursor->name);
++ }
++ break;
++ }
++ }
++
++ if (!found)
++ cursor = NULL;
++
++ return cursor;
++}
++
++int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
++ int namelen, int bindex, int whiteout)
++{
++ struct filldir_node *new;
++ unsigned int hash;
++ int index;
++ int err = 0;
++ struct list_head *head;
++
++ BUG_ON(namelen <= 0);
++
++ hash = full_name_hash(name, namelen);
++ index = hash % rdstate->size;
++ head = &(rdstate->list[index]);
++
++ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
++ if (!new) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ INIT_LIST_HEAD(&new->file_list);
++ new->namelen = namelen;
++ new->hash = hash;
++ new->bindex = bindex;
++ new->whiteout = whiteout;
++
++ if (namelen < DNAME_INLINE_LEN_MIN)
++ new->name = new->iname;
++ else {
++ new->name = kmalloc(namelen + 1, GFP_KERNEL);
++ if (!new->name) {
++ kmem_cache_free(unionfs_filldir_cachep, new);
++ new = NULL;
++ goto out;
++ }
++ }
++
++ memcpy(new->name, name, namelen);
++ new->name[namelen] = '\0';
++
++ rdstate->hashentries++;
++
++ list_add(&(new->file_list), head);
++out:
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/rename.c linux-2.6.22-591/fs/unionfs/rename.c
+--- linux-2.6.22-570/fs/unionfs/rename.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/rename.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,477 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry,
++ int bindex, struct dentry **wh_old)
++{
++ int err = 0;
++ struct dentry *hidden_old_dentry;
++ struct dentry *hidden_new_dentry;
++ struct dentry *hidden_old_dir_dentry;
++ struct dentry *hidden_new_dir_dentry;
++ struct dentry *hidden_wh_dentry;
++ struct dentry *hidden_wh_dir_dentry;
++ char *wh_name = NULL;
++
++ hidden_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
++ hidden_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
++
++ if (!hidden_new_dentry) {
++ hidden_new_dentry =
++ create_parents(new_dentry->d_parent->d_inode,
++ new_dentry, bindex);
++ if (IS_ERR(hidden_new_dentry)) {
++ printk(KERN_DEBUG "unionfs: error creating directory "
++ "tree for rename, bindex = %d, err = %ld\n",
++ bindex, PTR_ERR(hidden_new_dentry));
++ err = PTR_ERR(hidden_new_dentry);
++ goto out;
++ }
++ }
++
++ wh_name = alloc_whname(new_dentry->d_name.name,
++ new_dentry->d_name.len);
++ if (IS_ERR(wh_name)) {
++ err = PTR_ERR(wh_name);
++ goto out;
++ }
++
++ hidden_wh_dentry = lookup_one_len(wh_name, hidden_new_dentry->d_parent,
++ new_dentry->d_name.len +
++ UNIONFS_WHLEN);
++ if (IS_ERR(hidden_wh_dentry)) {
++ err = PTR_ERR(hidden_wh_dentry);
++ goto out;
++ }
++
++ if (hidden_wh_dentry->d_inode) {
++ /* get rid of the whiteout that is existing */
++ if (hidden_new_dentry->d_inode) {
++ printk(KERN_WARNING "unionfs: both a whiteout and a "
++ "dentry exist when doing a rename!\n");
++ err = -EIO;
++
++ dput(hidden_wh_dentry);
++ goto out;
++ }
++
++ hidden_wh_dir_dentry = lock_parent(hidden_wh_dentry);
++ if (!(err = is_robranch_super(old_dentry->d_sb, bindex)))
++ err = vfs_unlink(hidden_wh_dir_dentry->d_inode,
++ hidden_wh_dentry);
++
++ dput(hidden_wh_dentry);
++ unlock_dir(hidden_wh_dir_dentry);
++ if (err)
++ goto out;
++ } else
++ dput(hidden_wh_dentry);
++
++ dget(hidden_old_dentry);
++ hidden_old_dir_dentry = dget_parent(hidden_old_dentry);
++ hidden_new_dir_dentry = dget_parent(hidden_new_dentry);
++
++ lock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry);
++
++ err = is_robranch_super(old_dentry->d_sb, bindex);
++ if (err)
++ goto out_unlock;
++
++ /*
++ * ready to whiteout for old_dentry. caller will create the actual
++ * whiteout, and must dput(*wh_old)
++ */
++ if (wh_old) {
++ char *whname;
++ whname = alloc_whname(old_dentry->d_name.name,
++ old_dentry->d_name.len);
++ err = PTR_ERR(whname);
++ if (IS_ERR(whname))
++ goto out_unlock;
++ *wh_old = lookup_one_len(whname, hidden_old_dir_dentry,
++ old_dentry->d_name.len +
++ UNIONFS_WHLEN);
++ kfree(whname);
++ err = PTR_ERR(*wh_old);
++ if (IS_ERR(*wh_old)) {
++ *wh_old = NULL;
++ goto out_unlock;
++ }
++ }
++
++ err = vfs_rename(hidden_old_dir_dentry->d_inode, hidden_old_dentry,
++ hidden_new_dir_dentry->d_inode, hidden_new_dentry);
++
++out_unlock:
++ unlock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry);
++
++ dput(hidden_old_dir_dentry);
++ dput(hidden_new_dir_dentry);
++ dput(hidden_old_dentry);
++
++out:
++ if (!err) {
++ /* Fixup the new_dentry. */
++ if (bindex < dbstart(new_dentry))
++ set_dbstart(new_dentry, bindex);
++ else if (bindex > dbend(new_dentry))
++ set_dbend(new_dentry, bindex);
++ }
++
++ kfree(wh_name);
++
++ return err;
++}
++
++/*
++ * Main rename code. This is sufficienly complex, that it's documented in
++ * Docmentation/filesystems/unionfs/rename.txt. This routine calls
++ * __unionfs_rename() above to perform some of the work.
++ */
++static int do_unionfs_rename(struct inode *old_dir,
++ struct dentry *old_dentry,
++ struct inode *new_dir,
++ struct dentry *new_dentry)
++{
++ int err = 0;
++ int bindex, bwh_old;
++ int old_bstart, old_bend;
++ int new_bstart, new_bend;
++ int do_copyup = -1;
++ struct dentry *parent_dentry;
++ int local_err = 0;
++ int eio = 0;
++ int revert = 0;
++ struct dentry *wh_old = NULL;
++
++ old_bstart = dbstart(old_dentry);
++ bwh_old = old_bstart;
++ old_bend = dbend(old_dentry);
++ parent_dentry = old_dentry->d_parent;
++
++ new_bstart = dbstart(new_dentry);
++ new_bend = dbend(new_dentry);
++
++ /* Rename source to destination. */
++ err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
++ old_bstart, &wh_old);
++ if (err) {
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++ do_copyup = old_bstart - 1;
++ } else
++ revert = 1;
++
++ /*
++ * Unlink all instances of destination that exist to the left of
++ * bstart of source. On error, revert back, goto out.
++ */
++ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
++ struct dentry *unlink_dentry;
++ struct dentry *unlink_dir_dentry;
++
++ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
++ if (!unlink_dentry)
++ continue;
++
++ unlink_dir_dentry = lock_parent(unlink_dentry);
++ if (!(err = is_robranch_super(old_dir->i_sb, bindex)))
++ err = vfs_unlink(unlink_dir_dentry->d_inode,
++ unlink_dentry);
++
++ fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
++ unlink_dir_dentry->d_inode);
++ /* propagate number of hard-links */
++ new_dentry->d_parent->d_inode->i_nlink =
++ unionfs_get_nlinks(new_dentry->d_parent->d_inode);
++
++ unlock_dir(unlink_dir_dentry);
++ if (!err) {
++ if (bindex != new_bstart) {
++ dput(unlink_dentry);
++ unionfs_set_lower_dentry_idx(new_dentry,
++ bindex, NULL);
++ }
++ } else if (IS_COPYUP_ERR(err)) {
++ do_copyup = bindex - 1;
++ } else if (revert) {
++ dput(wh_old);
++ goto revert;
++ }
++ }
++
++ if (do_copyup != -1) {
++ for (bindex = do_copyup; bindex >= 0; bindex--) {
++ /*
++ * copyup the file into some left directory, so that
++ * you can rename it
++ */
++ err = copyup_dentry(old_dentry->d_parent->d_inode,
++ old_dentry, old_bstart, bindex,
++ NULL, old_dentry->d_inode->i_size);
++ if (!err) {
++ dput(wh_old);
++ bwh_old = bindex;
++ err = __unionfs_rename(old_dir, old_dentry,
++ new_dir, new_dentry,
++ bindex, &wh_old);
++ break;
++ }
++ }
++ }
++
++ /* make it opaque */
++ if (S_ISDIR(old_dentry->d_inode->i_mode)) {
++ err = make_dir_opaque(old_dentry, dbstart(old_dentry));
++ if (err)
++ goto revert;
++ }
++
++ /*
++ * Create whiteout for source, only if:
++ * (1) There is more than one underlying instance of source.
++ * (2) We did a copy_up
++ */
++ if ((old_bstart != old_bend) || (do_copyup != -1)) {
++ struct dentry *hidden_parent;
++ BUG_ON(!wh_old || wh_old->d_inode || bwh_old < 0);
++ hidden_parent = lock_parent(wh_old);
++ local_err = vfs_create(hidden_parent->d_inode, wh_old, S_IRUGO,
++ NULL);
++ unlock_dir(hidden_parent);
++ if (!local_err)
++ set_dbopaque(old_dentry, bwh_old);
++ else {
++ /*
++ * we can't fix anything now, so we cop-out and use
++ * -EIO.
++ */
++ printk(KERN_ERR "unionfs: can't create a whiteout for "
++ "the source in rename!\n");
++ err = -EIO;
++ }
++ }
++
++out:
++ dput(wh_old);
++ return err;
++
++revert:
++ /* Do revert here. */
++ local_err = unionfs_refresh_hidden_dentry(new_dentry, old_bstart);
++ if (local_err) {
++ printk(KERN_WARNING "unionfs: revert failed in rename: "
++ "the new refresh failed.\n");
++ eio = -EIO;
++ }
++
++ local_err = unionfs_refresh_hidden_dentry(old_dentry, old_bstart);
++ if (local_err) {
++ printk(KERN_WARNING "unionfs: revert failed in rename: "
++ "the old refresh failed.\n");
++ eio = -EIO;
++ goto revert_out;
++ }
++
++ if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
++ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
++ printk(KERN_WARNING "unionfs: revert failed in rename: "
++ "the object disappeared from under us!\n");
++ eio = -EIO;
++ goto revert_out;
++ }
++
++ if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
++ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
++ printk(KERN_WARNING "unionfs: revert failed in rename: "
++ "the object was created underneath us!\n");
++ eio = -EIO;
++ goto revert_out;
++ }
++
++ local_err = __unionfs_rename(new_dir, new_dentry,
++ old_dir, old_dentry, old_bstart, NULL);
++
++ /* If we can't fix it, then we cop-out with -EIO. */
++ if (local_err) {
++ printk(KERN_WARNING "unionfs: revert failed in rename!\n");
++ eio = -EIO;
++ }
++
++ local_err = unionfs_refresh_hidden_dentry(new_dentry, bindex);
++ if (local_err)
++ eio = -EIO;
++ local_err = unionfs_refresh_hidden_dentry(old_dentry, bindex);
++ if (local_err)
++ eio = -EIO;
++
++revert_out:
++ if (eio)
++ err = eio;
++ return err;
++}
++
++static struct dentry *lookup_whiteout(struct dentry *dentry)
++{
++ char *whname;
++ int bindex = -1, bstart = -1, bend = -1;
++ struct dentry *parent, *hidden_parent, *wh_dentry;
++
++ whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(whname))
++ return (void *)whname;
++
++ parent = dget_parent(dentry);
++ unionfs_lock_dentry(parent);
++ bstart = dbstart(parent);
++ bend = dbend(parent);
++ wh_dentry = ERR_PTR(-ENOENT);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_parent = unionfs_lower_dentry_idx(parent, bindex);
++ if (!hidden_parent)
++ continue;
++ wh_dentry = lookup_one_len(whname, hidden_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(wh_dentry))
++ continue;
++ if (wh_dentry->d_inode)
++ break;
++ dput(wh_dentry);
++ wh_dentry = ERR_PTR(-ENOENT);
++ }
++ unionfs_unlock_dentry(parent);
++ dput(parent);
++ kfree(whname);
++ return wh_dentry;
++}
++
++/*
++ * We can't copyup a directory, because it may involve huge numbers of
++ * children, etc. Doing that in the kernel would be bad, so instead we
++ * return EXDEV to the user-space utility that caused this, and let the
++ * user-space recurse and ask us to copy up each file separately.
++ */
++static int may_rename_dir(struct dentry *dentry)
++{
++ int err, bstart;
++
++ err = check_empty(dentry, NULL);
++ if (err == -ENOTEMPTY) {
++ if (is_robranch(dentry))
++ return -EXDEV;
++ } else if (err)
++ return err;
++
++ bstart = dbstart(dentry);
++ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
++ return 0;
++
++ set_dbstart(dentry, bstart + 1);
++ err = check_empty(dentry, NULL);
++ set_dbstart(dentry, bstart);
++ if (err == -ENOTEMPTY)
++ err = -EXDEV;
++ return err;
++}
++
++int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry)
++{
++ int err = 0;
++ struct dentry *wh_dentry;
++
++ unionfs_read_lock(old_dentry->d_sb);
++ unionfs_double_lock_dentry(old_dentry, new_dentry);
++
++ if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++ if (!d_deleted(new_dentry) && new_dentry->d_inode &&
++ !__unionfs_d_revalidate_chain(new_dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ if (!S_ISDIR(old_dentry->d_inode->i_mode))
++ err = unionfs_partial_lookup(old_dentry);
++ else
++ err = may_rename_dir(old_dentry);
++
++ if (err)
++ goto out;
++
++ err = unionfs_partial_lookup(new_dentry);
++ if (err)
++ goto out;
++
++ /*
++ * if new_dentry is already hidden because of whiteout,
++ * simply override it even if the whited-out dir is not empty.
++ */
++ wh_dentry = lookup_whiteout(new_dentry);
++ if (!IS_ERR(wh_dentry))
++ dput(wh_dentry);
++ else if (new_dentry->d_inode) {
++ if (S_ISDIR(old_dentry->d_inode->i_mode) !=
++ S_ISDIR(new_dentry->d_inode->i_mode)) {
++ err = S_ISDIR(old_dentry->d_inode->i_mode) ?
++ -ENOTDIR : -EISDIR;
++ goto out;
++ }
++
++ if (S_ISDIR(new_dentry->d_inode->i_mode)) {
++ struct unionfs_dir_state *namelist;
++ /* check if this unionfs directory is empty or not */
++ err = check_empty(new_dentry, &namelist);
++ if (err)
++ goto out;
++
++ if (!is_robranch(new_dentry))
++ err = delete_whiteouts(new_dentry,
++ dbstart(new_dentry),
++ namelist);
++
++ free_rdstate(namelist);
++
++ if (err)
++ goto out;
++ }
++ }
++ err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
++
++out:
++ if (err)
++ /* clear the new_dentry stuff created */
++ d_drop(new_dentry);
++ else
++ /*
++ * force re-lookup since the dir on ro branch is not renamed,
++ * and hidden dentries still indicate the un-renamed ones.
++ */
++ if (S_ISDIR(old_dentry->d_inode->i_mode))
++ atomic_dec(&UNIONFS_D(old_dentry)->generation);
++
++ unionfs_unlock_dentry(new_dentry);
++ unionfs_unlock_dentry(old_dentry);
++ unionfs_read_unlock(old_dentry->d_sb);
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.c linux-2.6.22-591/fs/unionfs/sioq.c
+--- linux-2.6.22-570/fs/unionfs/sioq.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/sioq.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Super-user IO work Queue - sometimes we need to perform actions which
++ * would fail due to the unix permissions on the parent directory (e.g.,
++ * rmdir a directory which appears empty, but in reality contains
++ * whiteouts).
++ */
++
++static struct workqueue_struct *superio_workqueue;
++
++int __init init_sioq(void)
++{
++ int err;
++
++ superio_workqueue = create_workqueue("unionfs_siod");
++ if (!IS_ERR(superio_workqueue))
++ return 0;
++
++ err = PTR_ERR(superio_workqueue);
++ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
++ superio_workqueue = NULL;
++ return err;
++}
++
++void stop_sioq(void)
++{
++ if (superio_workqueue)
++ destroy_workqueue(superio_workqueue);
++}
++
++void run_sioq(work_func_t func, struct sioq_args *args)
++{
++ INIT_WORK(&args->work, func);
++
++ init_completion(&args->comp);
++ while (!queue_work(superio_workqueue, &args->work)) {
++ /* TODO: do accounting if needed */
++ schedule();
++ }
++ wait_for_completion(&args->comp);
++}
++
++void __unionfs_create(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct create_args *c = &args->create;
++
++ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
++ complete(&args->comp);
++}
++
++void __unionfs_mkdir(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct mkdir_args *m = &args->mkdir;
++
++ args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
++ complete(&args->comp);
++}
++
++void __unionfs_mknod(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct mknod_args *m = &args->mknod;
++
++ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
++ complete(&args->comp);
++}
++
++void __unionfs_symlink(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct symlink_args *s = &args->symlink;
++
++ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
++ complete(&args->comp);
++}
++
++void __unionfs_unlink(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct unlink_args *u = &args->unlink;
++
++ args->err = vfs_unlink(u->parent, u->dentry);
++ complete(&args->comp);
++}
++
++void __delete_whiteouts(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++ struct deletewh_args *d = &args->deletewh;
++
++ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
++ complete(&args->comp);
++}
++
++void __is_opaque_dir(struct work_struct *work)
++{
++ struct sioq_args *args = container_of(work, struct sioq_args, work);
++
++ args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
++ sizeof(UNIONFS_DIR_OPAQUE) - 1);
++ complete(&args->comp);
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.h linux-2.6.22-591/fs/unionfs/sioq.h
+--- linux-2.6.22-570/fs/unionfs/sioq.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/sioq.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _SIOQ_H
++#define _SIOQ_H
++
++struct deletewh_args {
++ struct unionfs_dir_state *namelist;
++ struct dentry *dentry;
++ int bindex;
++};
++
++struct is_opaque_args {
++ struct dentry *dentry;
++};
++
++struct create_args {
++ struct inode *parent;
++ struct dentry *dentry;
++ umode_t mode;
++ struct nameidata *nd;
++};
++
++struct mkdir_args {
++ struct inode *parent;
++ struct dentry *dentry;
++ umode_t mode;
++};
++
++struct mknod_args {
++ struct inode *parent;
++ struct dentry *dentry;
++ umode_t mode;
++ dev_t dev;
++};
++
++struct symlink_args {
++ struct inode *parent;
++ struct dentry *dentry;
++ char *symbuf;
++ umode_t mode;
++};
++
++struct unlink_args {
++ struct inode *parent;
++ struct dentry *dentry;
++};
++
++
++struct sioq_args {
++ struct completion comp;
++ struct work_struct work;
++ int err;
++ void *ret;
++
++ union {
++ struct deletewh_args deletewh;
++ struct is_opaque_args is_opaque;
++ struct create_args create;
++ struct mkdir_args mkdir;
++ struct mknod_args mknod;
++ struct symlink_args symlink;
++ struct unlink_args unlink;
++ };
++};
++
++/* Extern definitions for SIOQ functions */
++extern int __init init_sioq(void);
++extern void stop_sioq(void);
++extern void run_sioq(work_func_t func, struct sioq_args *args);
++
++/* Extern definitions for our privilege escalation helpers */
++extern void __unionfs_create(struct work_struct *work);
++extern void __unionfs_mkdir(struct work_struct *work);
++extern void __unionfs_mknod(struct work_struct *work);
++extern void __unionfs_symlink(struct work_struct *work);
++extern void __unionfs_unlink(struct work_struct *work);
++extern void __delete_whiteouts(struct work_struct *work);
++extern void __is_opaque_dir(struct work_struct *work);
++
++#endif /* _SIOQ_H */
+diff -Nurb linux-2.6.22-570/fs/unionfs/subr.c linux-2.6.22-591/fs/unionfs/subr.c
+--- linux-2.6.22-570/fs/unionfs/subr.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/subr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Pass an unionfs dentry and an index. It will try to create a whiteout
++ * for the filename in dentry, and will try in branch 'index'. On error,
++ * it will proceed to a branch to the left.
++ */
++int create_whiteout(struct dentry *dentry, int start)
++{
++ int bstart, bend, bindex;
++ struct dentry *hidden_dir_dentry;
++ struct dentry *hidden_dentry;
++ struct dentry *hidden_wh_dentry;
++ char *name = NULL;
++ int err = -EINVAL;
++
++ verify_locked(dentry);
++
++ bstart = dbstart(dentry);
++ bend = dbend(dentry);
++
++ /* create dentry's whiteout equivalent */
++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++ if (IS_ERR(name)) {
++ err = PTR_ERR(name);
++ goto out;
++ }
++
++ for (bindex = start; bindex >= 0; bindex--) {
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++
++ if (!hidden_dentry) {
++ /*
++ * if hidden dentry is not present, create the
++ * entire hidden dentry directory structure and go
++ * ahead. Since we want to just create whiteout, we
++ * only want the parent dentry, and hence get rid of
++ * this dentry.
++ */
++ hidden_dentry = create_parents(dentry->d_inode,
++ dentry, bindex);
++ if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++ printk(KERN_DEBUG "unionfs: create_parents "
++ "failed for bindex = %d\n", bindex);
++ continue;
++ }
++ }
++
++ hidden_wh_dentry =
++ lookup_one_len(name, hidden_dentry->d_parent,
++ dentry->d_name.len + UNIONFS_WHLEN);
++ if (IS_ERR(hidden_wh_dentry))
++ continue;
++
++ /*
++ * The whiteout already exists. This used to be impossible,
++ * but now is possible because of opaqueness.
++ */
++ if (hidden_wh_dentry->d_inode) {
++ dput(hidden_wh_dentry);
++ err = 0;
++ goto out;
++ }
++
++ hidden_dir_dentry = lock_parent(hidden_wh_dentry);
++ if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++ err = vfs_create(hidden_dir_dentry->d_inode,
++ hidden_wh_dentry,
++ ~current->fs->umask & S_IRWXUGO,
++ NULL);
++ unlock_dir(hidden_dir_dentry);
++ dput(hidden_wh_dentry);
++
++ if (!err || !IS_COPYUP_ERR(err))
++ break;
++ }
++
++ /* set dbopaque so that lookup will not proceed after this branch */
++ if (!err)
++ set_dbopaque(dentry, bindex);
++
++out:
++ kfree(name);
++ return err;
++}
++
++/*
++ * This is a helper function for rename, which ends up with hosed over
++ * dentries when it needs to revert.
++ */
++int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex)
++{
++ struct dentry *hidden_dentry;
++ struct dentry *hidden_parent;
++ int err = 0;
++
++ verify_locked(dentry);
++
++ unionfs_lock_dentry(dentry->d_parent);
++ hidden_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
++ unionfs_unlock_dentry(dentry->d_parent);
++
++ BUG_ON(!S_ISDIR(hidden_parent->d_inode->i_mode));
++
++ hidden_dentry = lookup_one_len(dentry->d_name.name, hidden_parent,
++ dentry->d_name.len);
++ if (IS_ERR(hidden_dentry)) {
++ err = PTR_ERR(hidden_dentry);
++ goto out;
++ }
++
++ dput(unionfs_lower_dentry_idx(dentry, bindex));
++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
++
++ if (!hidden_dentry->d_inode) {
++ dput(hidden_dentry);
++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++ } else {
++ unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry);
++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
++ igrab(hidden_dentry->d_inode));
++ }
++
++out:
++ return err;
++}
++
++int make_dir_opaque(struct dentry *dentry, int bindex)
++{
++ int err = 0;
++ struct dentry *hidden_dentry, *diropq;
++ struct inode *hidden_dir;
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ hidden_dir = hidden_dentry->d_inode;
++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
++ !S_ISDIR(hidden_dir->i_mode));
++
++ mutex_lock(&hidden_dir->i_mutex);
++ diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry,
++ sizeof(UNIONFS_DIR_OPAQUE) - 1);
++ if (IS_ERR(diropq)) {
++ err = PTR_ERR(diropq);
++ goto out;
++ }
++
++ if (!diropq->d_inode)
++ err = vfs_create(hidden_dir, diropq, S_IRUGO, NULL);
++ if (!err)
++ set_dbopaque(dentry, bindex);
++
++ dput(diropq);
++
++out:
++ mutex_unlock(&hidden_dir->i_mutex);
++ return err;
++}
++
++/*
++ * returns the sum of the n_link values of all the underlying inodes of the
++ * passed inode
++ */
++int unionfs_get_nlinks(struct inode *inode)
++{
++ int sum_nlinks = 0;
++ int dirs = 0;
++ int bindex;
++ struct inode *hidden_inode;
++
++ /* don't bother to do all the work since we're unlinked */
++ if (inode->i_nlink == 0)
++ return 0;
++
++ if (!S_ISDIR(inode->i_mode))
++ return unionfs_lower_inode(inode)->i_nlink;
++
++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
++ hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++
++ /* ignore files */
++ if (!hidden_inode || !S_ISDIR(hidden_inode->i_mode))
++ continue;
++
++ BUG_ON(hidden_inode->i_nlink < 0);
++
++ /* A deleted directory. */
++ if (hidden_inode->i_nlink == 0)
++ continue;
++ dirs++;
++
++ /*
++ * A broken directory...
++ *
++ * Some filesystems don't properly set the number of links
++ * on empty directories
++ */
++ if (hidden_inode->i_nlink == 1)
++ sum_nlinks += 2;
++ else
++ sum_nlinks += (hidden_inode->i_nlink - 2);
++ }
++
++ return (!dirs ? 0 : sum_nlinks + 2);
++}
++
++/* construct whiteout filename */
++char *alloc_whname(const char *name, int len)
++{
++ char *buf;
++
++ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
++ if (!buf)
++ return ERR_PTR(-ENOMEM);
++
++ strcpy(buf, UNIONFS_WHPFX);
++ strlcat(buf, name, len + UNIONFS_WHLEN + 1);
++
++ return buf;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/super.c linux-2.6.22-591/fs/unionfs/super.c
+--- linux-2.6.22-570/fs/unionfs/super.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/super.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,1002 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * The inode cache is used with alloc_inode for both our inode info and the
++ * vfs inode.
++ */
++static struct kmem_cache *unionfs_inode_cachep;
++
++static void unionfs_read_inode(struct inode *inode)
++{
++ extern struct address_space_operations unionfs_aops;
++ int size;
++ struct unionfs_inode_info *info = UNIONFS_I(inode);
++
++ unionfs_read_lock(inode->i_sb);
++
++ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
++ info->bstart = -1;
++ info->bend = -1;
++ atomic_set(&info->generation,
++ atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
++ spin_lock_init(&info->rdlock);
++ info->rdcount = 1;
++ info->hashsize = -1;
++ INIT_LIST_HEAD(&info->readdircache);
++
++ size = sbmax(inode->i_sb) * sizeof(struct inode *);
++ info->lower_inodes = kzalloc(size, GFP_KERNEL);
++ if (!info->lower_inodes) {
++ printk(KERN_ERR "unionfs: no kernel memory when allocating "
++ "lower-pointer array!\n");
++ BUG();
++ }
++
++ inode->i_version++;
++ inode->i_op = &unionfs_main_iops;
++ inode->i_fop = &unionfs_main_fops;
++
++ inode->i_mapping->a_ops = &unionfs_aops;
++
++ unionfs_read_unlock(inode->i_sb);
++}
++
++/*
++ * we now define delete_inode, because there are two VFS paths that may
++ * destroy an inode: one of them calls clear inode before doing everything
++ * else that's needed, and the other is fine. This way we truncate the inode
++ * size (and its pages) and then clear our own inode, which will do an iput
++ * on our and the lower inode.
++ *
++ * No need to lock sb info's rwsem.
++ */
++static void unionfs_delete_inode(struct inode *inode)
++{
++ inode->i_size = 0; /* every f/s seems to do that */
++
++ if (inode->i_data.nrpages)
++ truncate_inode_pages(&inode->i_data, 0);
++
++ clear_inode(inode);
++}
++
++/*
++ * final actions when unmounting a file system
++ *
++ * No need to lock rwsem.
++ */
++static void unionfs_put_super(struct super_block *sb)
++{
++ int bindex, bstart, bend;
++ struct unionfs_sb_info *spd;
++ int leaks = 0;
++
++ spd = UNIONFS_SB(sb);
++ if (!spd)
++ return;
++
++ bstart = sbstart(sb);
++ bend = sbend(sb);
++
++ /* Make sure we have no leaks of branchget/branchput. */
++ for (bindex = bstart; bindex <= bend; bindex++)
++ if (branch_count(sb, bindex) != 0) {
++ printk("unionfs: branch %d has %d references left!\n",
++ bindex, branch_count(sb, bindex));
++ leaks = 1;
++ }
++ BUG_ON(leaks != 0);
++
++ kfree(spd->data);
++ kfree(spd);
++ sb->s_fs_info = NULL;
++}
++
++/*
++ * Since people use this to answer the "How big of a file can I write?"
++ * question, we report the size of the highest priority branch as the size of
++ * the union.
++ */
++static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++ int err = 0;
++ struct super_block *sb;
++ struct dentry *lower_dentry;
++
++ sb = dentry->d_sb;
++
++ unionfs_read_lock(sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ lower_dentry = unionfs_lower_dentry(sb->s_root);
++ err = vfs_statfs(lower_dentry, buf);
++
++ /* set return buf to our f/s to avoid confusing user-level utils */
++ buf->f_type = UNIONFS_SUPER_MAGIC;
++
++ /*
++ * Our maximum file name can is shorter by a few bytes because every
++ * file name could potentially be whited-out.
++ */
++ buf->f_namelen -= UNIONFS_WHLEN;
++
++ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
++ memset(&buf->f_spare, 0, sizeof(buf->f_spare));
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(sb);
++ return err;
++}
++
++/* handle mode changing during remount */
++static noinline int do_remount_mode_option(char *optarg, int cur_branches,
++ struct unionfs_data *new_data,
++ struct path *new_lower_paths)
++{
++ int err = -EINVAL;
++ int perms, idx;
++ char *modename = strchr(optarg, '=');
++ struct nameidata nd;
++
++ /* by now, optarg contains the branch name */
++ if (!*optarg) {
++ printk("unionfs: no branch specified for mode change.\n");
++ goto out;
++ }
++ if (!modename) {
++ printk("unionfs: branch \"%s\" requires a mode.\n", optarg);
++ goto out;
++ }
++ *modename++ = '\0';
++ perms = __parse_branch_mode(modename);
++ if (perms == 0) {
++ printk("unionfs: invalid mode \"%s\" for \"%s\".\n",
++ modename, optarg);
++ goto out;
++ }
++
++ /*
++ * Find matching branch index. For now, this assumes that nothing
++ * has been mounted on top of this Unionfs stack. Once we have /odf
++ * and cache-coherency resolved, we'll address the branch-path
++ * uniqueness.
++ */
++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++ if (err) {
++ printk(KERN_WARNING "unionfs: error accessing "
++ "hidden directory \"%s\" (error %d)\n",
++ optarg, err);
++ goto out;
++ }
++ for (idx=0; idx<cur_branches; idx++)
++ if (nd.mnt == new_lower_paths[idx].mnt &&
++ nd.dentry == new_lower_paths[idx].dentry)
++ break;
++ path_release(&nd); /* no longer needed */
++ if (idx == cur_branches) {
++ err = -ENOENT; /* err may have been reset above */
++ printk(KERN_WARNING "unionfs: branch \"%s\" "
++ "not found\n", optarg);
++ goto out;
++ }
++ /* check/change mode for existing branch */
++ /* we don't warn if perms==branchperms */
++ new_data[idx].branchperms = perms;
++ err = 0;
++out:
++ return err;
++}
++
++/* handle branch deletion during remount */
++static noinline int do_remount_del_option(char *optarg, int cur_branches,
++ struct unionfs_data *new_data,
++ struct path *new_lower_paths)
++{
++ int err = -EINVAL;
++ int idx;
++ struct nameidata nd;
++
++ /* optarg contains the branch name to delete */
++
++ /*
++ * Find matching branch index. For now, this assumes that nothing
++ * has been mounted on top of this Unionfs stack. Once we have /odf
++ * and cache-coherency resolved, we'll address the branch-path
++ * uniqueness.
++ */
++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++ if (err) {
++ printk(KERN_WARNING "unionfs: error accessing "
++ "hidden directory \"%s\" (error %d)\n",
++ optarg, err);
++ goto out;
++ }
++ for (idx=0; idx < cur_branches; idx++)
++ if (nd.mnt == new_lower_paths[idx].mnt &&
++ nd.dentry == new_lower_paths[idx].dentry)
++ break;
++ path_release(&nd); /* no longer needed */
++ if (idx == cur_branches) {
++ printk(KERN_WARNING "unionfs: branch \"%s\" "
++ "not found\n", optarg);
++ err = -ENOENT;
++ goto out;
++ }
++ /* check if there are any open files on the branch to be deleted */
++ if (atomic_read(&new_data[idx].open_files) > 0) {
++ err = -EBUSY;
++ goto out;
++ }
++
++ /*
++ * Now we have to delete the branch. First, release any handles it
++ * has. Then, move the remaining array indexes past "idx" in
++ * new_data and new_lower_paths one to the left. Finally, adjust
++ * cur_branches.
++ */
++ pathput(&new_lower_paths[idx]);
++
++ if (idx < cur_branches - 1) {
++ /* if idx==cur_branches-1, we delete last branch: easy */
++ memmove(&new_data[idx], &new_data[idx+1],
++ (cur_branches - 1 - idx) *
++ sizeof(struct unionfs_data));
++ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
++ (cur_branches - 1 - idx) * sizeof(struct path));
++ }
++
++ err = 0;
++out:
++ return err;
++}
++
++/* handle branch insertion during remount */
++static noinline int do_remount_add_option(char *optarg, int cur_branches,
++ struct unionfs_data *new_data,
++ struct path *new_lower_paths,
++ int *high_branch_id)
++{
++ int err = -EINVAL;
++ int perms;
++ int idx = 0; /* default: insert at beginning */
++ char *new_branch , *modename = NULL;
++ struct nameidata nd;
++
++ /*
++ * optarg can be of several forms:
++ *
++ * /bar:/foo insert /foo before /bar
++ * /bar:/foo=ro insert /foo in ro mode before /bar
++ * /foo insert /foo in the beginning (prepend)
++ * :/foo insert /foo at the end (append)
++ */
++ if (*optarg == ':') { /* append? */
++ new_branch = optarg + 1; /* skip ':' */
++ idx = cur_branches;
++ goto found_insertion_point;
++ }
++ new_branch = strchr(optarg, ':');
++ if (!new_branch) { /* prepend? */
++ new_branch = optarg;
++ goto found_insertion_point;
++ }
++ *new_branch++ = '\0'; /* holds path+mode of new branch */
++
++ /*
++ * Find matching branch index. For now, this assumes that nothing
++ * has been mounted on top of this Unionfs stack. Once we have /odf
++ * and cache-coherency resolved, we'll address the branch-path
++ * uniqueness.
++ */
++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++ if (err) {
++ printk(KERN_WARNING "unionfs: error accessing "
++ "hidden directory \"%s\" (error %d)\n",
++ optarg, err);
++ goto out;
++ }
++ for (idx=0; idx < cur_branches; idx++)
++ if (nd.mnt == new_lower_paths[idx].mnt &&
++ nd.dentry == new_lower_paths[idx].dentry)
++ break;
++ path_release(&nd); /* no longer needed */
++ if (idx == cur_branches) {
++ printk(KERN_WARNING "unionfs: branch \"%s\" "
++ "not found\n", optarg);
++ err = -ENOENT;
++ goto out;
++ }
++
++ /*
++ * At this point idx will hold the index where the new branch should
++ * be inserted before.
++ */
++found_insertion_point:
++ /* find the mode for the new branch */
++ if (new_branch)
++ modename = strchr(new_branch, '=');
++ if (modename)
++ *modename++ = '\0';
++ perms = parse_branch_mode(modename);
++
++ if (!new_branch || !*new_branch) {
++ printk(KERN_WARNING "unionfs: null new branch\n");
++ err = -EINVAL;
++ goto out;
++ }
++ err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
++ if (err) {
++ printk(KERN_WARNING "unionfs: error accessing "
++ "hidden directory \"%s\" (error %d)\n",
++ new_branch, err);
++ goto out;
++ }
++ /*
++ * It's probably safe to check_mode the new branch to insert. Note:
++ * we don't allow inserting branches which are unionfs's by
++ * themselves (check_branch returns EINVAL in that case). This is
++ * because this code base doesn't support stacking unionfs: the ODF
++ * code base supports that correctly.
++ */
++ if ((err = check_branch(&nd))) {
++ printk(KERN_WARNING "unionfs: hidden directory "
++ "\"%s\" is not a valid branch\n", optarg);
++ path_release(&nd);
++ goto out;
++ }
++
++ /*
++ * Now we have to insert the new branch. But first, move the bits
++ * to make space for the new branch, if needed. Finally, adjust
++ * cur_branches.
++ * We don't release nd here; it's kept until umount/remount.
++ */
++ if (idx < cur_branches) {
++ /* if idx==cur_branches, we append: easy */
++ memmove(&new_data[idx+1], &new_data[idx],
++ (cur_branches - idx) * sizeof(struct unionfs_data));
++ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
++ (cur_branches - idx) * sizeof(struct path));
++ }
++ new_lower_paths[idx].dentry = nd.dentry;
++ new_lower_paths[idx].mnt = nd.mnt;
++
++ new_data[idx].sb = nd.dentry->d_sb;
++ atomic_set(&new_data[idx].open_files, 0);
++ new_data[idx].branchperms = perms;
++ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
++
++ err = 0;
++out:
++ return err;
++}
++
++
++/*
++ * Support branch management options on remount.
++ *
++ * See Documentation/filesystems/unionfs/ for details.
++ *
++ * @flags: numeric mount options
++ * @options: mount options string
++ *
++ * This function can rearrange a mounted union dynamically, adding and
++ * removing branches, including changing branch modes. Clearly this has to
++ * be done safely and atomically. Luckily, the VFS already calls this
++ * function with lock_super(sb) and lock_kernel() held, preventing
++ * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
++ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
++ * to purge dentries/inodes from our superblock, and also called
++ * fsync_super(sb) to purge any dirty pages. So we're good.
++ *
++ * XXX: however, our remount code may also need to invalidate mapped pages
++ * so as to force them to be re-gotten from the (newly reconfigured) lower
++ * branches. This has to wait for proper mmap and cache coherency support
++ * in the VFS.
++ *
++ */
++static int unionfs_remount_fs(struct super_block *sb, int *flags,
++ char *options)
++{
++ int err = 0;
++ int i;
++ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
++ char *optname;
++ int cur_branches = 0; /* no. of current branches */
++ int new_branches = 0; /* no. of branches actually left in the end */
++ int add_branches; /* est. no. of branches to add */
++ int del_branches; /* est. no. of branches to del */
++ int max_branches; /* max possible no. of branches */
++ struct unionfs_data *new_data = NULL, *tmp_data = NULL;
++ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
++ struct inode **new_lower_inodes = NULL;
++ int new_high_branch_id; /* new high branch ID */
++ int size; /* memory allocation size, temp var */
++ int old_ibstart, old_ibend;
++
++ unionfs_write_lock(sb);
++
++ /*
++ * The VFS will take care of "ro" and "rw" flags, and we can safely
++ * ignore MS_SILENT, but anything else left over is an error. So we
++ * need to check if any other flags may have been passed (none are
++ * allowed/supported as of now).
++ */
++ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
++ printk(KERN_WARNING
++ "unionfs: remount flags 0x%x unsupported\n", *flags);
++ err = -EINVAL;
++ goto out_error;
++ }
++
++ /*
++ * If 'options' is NULL, it's probably because the user just changed
++ * the union to a "ro" or "rw" and the VFS took care of it. So
++ * nothing to do and we're done.
++ */
++ if (!options || options[0] == '\0')
++ goto out_error;
++
++ /*
++ * Find out how many branches we will have in the end, counting
++ * "add" and "del" commands. Copy the "options" string because
++ * strsep modifies the string and we need it later.
++ */
++ optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL);
++ if (!optionstmp) {
++ err = -ENOMEM;
++ goto out_free;
++ }
++ new_branches = cur_branches = sbmax(sb); /* current no. branches */
++ add_branches = del_branches = 0;
++ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
++ while ((optname = strsep(&optionstmp, ",")) != NULL) {
++ char *optarg;
++
++ if (!optname || !*optname)
++ continue;
++
++ optarg = strchr(optname, '=');
++ if (optarg)
++ *optarg++ = '\0';
++
++ if (!strcmp("add", optname))
++ add_branches++;
++ else if (!strcmp("del", optname))
++ del_branches++;
++ }
++ kfree(tmp_to_free);
++ /* after all changes, will we have at least one branch left? */
++ if ((new_branches + add_branches - del_branches) < 1) {
++ printk(KERN_WARNING
++ "unionfs: no branches left after remount\n");
++ err = -EINVAL;
++ goto out_free;
++ }
++
++ /*
++ * Since we haven't actually parsed all the add/del options, nor
++ * have we checked them for errors, we don't know for sure how many
++ * branches we will have after all changes have taken place. In
++ * fact, the total number of branches left could be less than what
++ * we have now. So we need to allocate space for a temporary
++ * placeholder that is at least as large as the maximum number of
++ * branches we *could* have, which is the current number plus all
++ * the additions. Once we're done with these temp placeholders, we
++ * may have to re-allocate the final size, copy over from the temp,
++ * and then free the temps (done near the end of this function).
++ */
++ max_branches = cur_branches + add_branches;
++ /* allocate space for new pointers to hidden dentry */
++ tmp_data = kcalloc(max_branches,
++ sizeof(struct unionfs_data), GFP_KERNEL);
++ if (!tmp_data) {
++ err = -ENOMEM;
++ goto out_free;
++ }
++ /* allocate space for new pointers to lower paths */
++ tmp_lower_paths = kcalloc(max_branches,
++ sizeof(struct path), GFP_KERNEL);
++ if (!tmp_lower_paths) {
++ err = -ENOMEM;
++ goto out_free;
++ }
++ /* copy current info into new placeholders, incrementing refcnts */
++ memcpy(tmp_data, UNIONFS_SB(sb)->data,
++ cur_branches * sizeof(struct unionfs_data));
++ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
++ cur_branches * sizeof(struct path));
++ for (i=0; i<cur_branches; i++)
++ pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
++
++ /*******************************************************************
++ * For each branch command, do path_lookup on the requested branch,
++ * and apply the change to a temp branch list. To handle errors, we
++ * already dup'ed the old arrays (above), and increased the refcnts
++ * on various f/s objects. So now we can do all the path_lookups
++ * and branch-management commands on the new arrays. If it fail mid
++ * way, we free the tmp arrays and *put all objects. If we succeed,
++ * then we free old arrays and *put its objects, and then replace
++ * the arrays with the new tmp list (we may have to re-allocate the
++ * memory because the temp lists could have been larger than what we
++ * actually needed).
++ *******************************************************************/
++
++ while ((optname = strsep(&options, ",")) != NULL) {
++ char *optarg;
++
++ if (!optname || !*optname)
++ continue;
++ /*
++ * At this stage optname holds a comma-delimited option, but
++ * without the commas. Next, we need to break the string on
++ * the '=' symbol to separate CMD=ARG, where ARG itself can
++ * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
++ * KEY is "/foo", and VAL is "rw".
++ */
++ optarg = strchr(optname, '=');
++ if (optarg)
++ *optarg++ = '\0';
++ /* incgen remount option (instead of old ioctl) */
++ if (!strcmp("incgen", optname)) {
++ err = 0;
++ goto out_no_change;
++ }
++
++ /*
++ * All of our options take an argument now. (Insert ones
++ * that don't above this check.) So at this stage optname
++ * contains the CMD part and optarg contains the ARG part.
++ */
++ if (!optarg || !*optarg) {
++ printk("unionfs: all remount options require "
++ "an argument (%s).\n", optname);
++ err = -EINVAL;
++ goto out_release;
++ }
++
++ if (!strcmp("add", optname)) {
++ err = do_remount_add_option(optarg, new_branches,
++ tmp_data,
++ tmp_lower_paths,
++ &new_high_branch_id);
++ if (err)
++ goto out_release;
++ new_branches++;
++ if (new_branches > UNIONFS_MAX_BRANCHES) {
++ printk("unionfs: command exceeds "
++ "%d branches\n", UNIONFS_MAX_BRANCHES);
++ err = -E2BIG;
++ goto out_release;
++ }
++ continue;
++ }
++ if (!strcmp("del", optname)) {
++ err = do_remount_del_option(optarg, new_branches,
++ tmp_data,
++ tmp_lower_paths);
++ if (err)
++ goto out_release;
++ new_branches--;
++ continue;
++ }
++ if (!strcmp("mode", optname)) {
++ err = do_remount_mode_option(optarg, new_branches,
++ tmp_data,
++ tmp_lower_paths);
++ if (err)
++ goto out_release;
++ continue;
++ }
++
++ /*
++ * When you use "mount -o remount,ro", mount(8) will
++ * reportedly pass the original dirs= string from
++ * /proc/mounts. So for now, we have to ignore dirs= and
++ * not consider it an error, unless we want to allow users
++ * to pass dirs= in remount. Note that to allow the VFS to
++ * actually process the ro/rw remount options, we have to
++ * return 0 from this function.
++ */
++ if (!strcmp("dirs", optname)) {
++ printk(KERN_WARNING
++ "unionfs: remount ignoring option \"%s\".\n",
++ optname);
++ continue;
++ }
++
++ err = -EINVAL;
++ printk(KERN_WARNING
++ "unionfs: unrecognized option \"%s\"\n", optname);
++ goto out_release;
++ }
++
++out_no_change:
++
++ /******************************************************************
++ * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
++ * see if we need to allocate a small-sized new vector, copy the
++ * vectors to their correct place, release the refcnt of the older
++ * ones, and return. Also handle invalidating any pages that will
++ * have to be re-read.
++ *******************************************************************/
++
++ if (!(tmp_data[0].branchperms & MAY_WRITE)) {
++ printk("unionfs: leftmost branch cannot be read-only "
++ "(use \"remount,ro\" to create a read-only union)\n");
++ err = -EINVAL;
++ goto out_release;
++ }
++
++ /* (re)allocate space for new pointers to hidden dentry */
++ size = new_branches * sizeof(struct unionfs_data);
++ new_data = krealloc(tmp_data, size, GFP_KERNEL);
++ if (!new_data) {
++ err = -ENOMEM;
++ goto out_release;
++ }
++
++ /* allocate space for new pointers to lower paths */
++ size = new_branches * sizeof(struct path);
++ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
++ if (!new_lower_paths) {
++ err = -ENOMEM;
++ goto out_release;
++ }
++
++ /* allocate space for new pointers to lower inodes */
++ new_lower_inodes = kcalloc(new_branches,
++ sizeof(struct inode *), GFP_KERNEL);
++ if (!new_lower_inodes) {
++ err = -ENOMEM;
++ goto out_release;
++ }
++
++ /*
++ * OK, just before we actually put the new set of branches in place,
++ * we need to ensure that our own f/s has no dirty objects left.
++ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
++ * fsync_super(sb), taking care of dentries, inodes, and dirty
++ * pages. So all that's left is for us to invalidate any leftover
++ * (non-dirty) pages to ensure that they will be re-read from the
++ * new lower branches (and to support mmap).
++ */
++
++ /*
++ * No we call drop_pagecache_sb() to invalidate all pages in this
++ * super. This function calls invalidate_inode_pages(mapping),
++ * which calls invalidate_mapping_pages(): the latter, however, will
++ * not invalidate pages which are dirty, locked, under writeback, or
++ * mapped into page tables. We shouldn't have to worry about dirty
++ * or under-writeback pages, because do_remount_sb() called
++ * fsync_super() which would not have returned until all dirty pages
++ * were flushed.
++ *
++ * But do we have to worry about locked pages? Is there any chance
++ * that in here we'll get locked pages?
++ *
++ * XXX: what about pages mapped into pagetables? Are these pages
++ * which user processes may have mmap(2)'ed? If so, then we need to
++ * invalidate those too, no? Maybe we'll have to write our own
++ * version of invalidate_mapping_pages() which also handled mapped
++ * pages.
++ *
++ * XXX: Alternatively, maybe we should call truncate_inode_pages(),
++ * which use two passes over the pages list, and will truncate all
++ * pages.
++ */
++ drop_pagecache_sb(sb);
++
++ /* copy new vectors into their correct place */
++ tmp_data = UNIONFS_SB(sb)->data;
++ UNIONFS_SB(sb)->data = new_data;
++ new_data = NULL; /* so don't free good pointers below */
++ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
++ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
++ new_lower_paths = NULL; /* so don't free good pointers below */
++
++ /* update our unionfs_sb_info and root dentry index of last branch */
++ i = sbmax(sb); /* save no. of branches to release at end */
++ sbend(sb) = new_branches - 1;
++ set_dbend(sb->s_root, new_branches - 1);
++ old_ibstart = ibstart(sb->s_root->d_inode);
++ old_ibend = ibend(sb->s_root->d_inode);
++ ibend(sb->s_root->d_inode) = new_branches - 1;
++ UNIONFS_D(sb->s_root)->bcount = new_branches;
++ new_branches = i; /* no. of branches to release below */
++
++ /*
++ * Update lower inodes: 3 steps
++ * 1. grab ref on all new lower inodes
++ */
++ for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) {
++ struct dentry *lower_dentry =
++ unionfs_lower_dentry_idx(sb->s_root, i);
++ atomic_inc(&lower_dentry->d_inode->i_count);
++ new_lower_inodes[i] = lower_dentry->d_inode;
++ }
++ /* 2. release reference on all older lower inodes */
++ for (i=old_ibstart; i<=old_ibend; i++) {
++ iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
++ unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
++ }
++ kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
++ /* 3. update root dentry's inode to new lower_inodes array */
++ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
++ new_lower_inodes = NULL;
++
++ /* maxbytes may have changed */
++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
++ /* update high branch ID */
++ sbhbid(sb) = new_high_branch_id;
++
++ /* update our sb->generation for revalidating objects */
++ i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
++ atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
++ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
++
++ err = 0; /* reset to success */
++
++ if (!(*flags & MS_SILENT))
++ printk("unionfs: new generation number %d\n", i);
++
++ /*
++ * The code above falls through to the next label, and releases the
++ * refcnts of the older ones (stored in tmp_*): if we fell through
++ * here, it means success. However, if we jump directly to this
++ * label from any error above, then an error occurred after we
++ * grabbed various refcnts, and so we have to release the
++ * temporarily constructed structures.
++ */
++out_release:
++ /* no need to cleanup/release anything in tmp_data */
++ if (tmp_lower_paths)
++ for (i=0; i<new_branches; i++)
++ pathput(&tmp_lower_paths[i]);
++out_free:
++ kfree(tmp_lower_paths);
++ kfree(tmp_data);
++ kfree(new_lower_paths);
++ kfree(new_data);
++ kfree(new_lower_inodes);
++out_error:
++ unionfs_write_unlock(sb);
++ return err;
++}
++
++/*
++ * Called by iput() when the inode reference count reached zero
++ * and the inode is not hashed anywhere. Used to clear anything
++ * that needs to be, before the inode is completely destroyed and put
++ * on the inode free list.
++ *
++ * No need to lock sb info's rwsem.
++ */
++static void unionfs_clear_inode(struct inode *inode)
++{
++ int bindex, bstart, bend;
++ struct inode *hidden_inode;
++ struct list_head *pos, *n;
++ struct unionfs_dir_state *rdstate;
++
++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
++ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
++ list_del(&rdstate->cache);
++ free_rdstate(rdstate);
++ }
++
++ /*
++ * Decrement a reference to a hidden_inode, which was incremented
++ * by our read_inode when it was created initially.
++ */
++ bstart = ibstart(inode);
++ bend = ibend(inode);
++ if (bstart >= 0) {
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++ if (!hidden_inode)
++ continue;
++ iput(hidden_inode);
++ }
++ }
++
++ kfree(UNIONFS_I(inode)->lower_inodes);
++ UNIONFS_I(inode)->lower_inodes = NULL;
++}
++
++static struct inode *unionfs_alloc_inode(struct super_block *sb)
++{
++ struct unionfs_inode_info *i;
++
++ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
++ if (!i)
++ return NULL;
++
++ /* memset everything up to the inode to 0 */
++ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
++
++ i->vfs_inode.i_version = 1;
++ return &i->vfs_inode;
++}
++
++static void unionfs_destroy_inode(struct inode *inode)
++{
++ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
++}
++
++/* unionfs inode cache constructor */
++static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags)
++{
++ struct unionfs_inode_info *i = v;
++
++ inode_init_once(&i->vfs_inode);
++}
++
++int unionfs_init_inode_cache(void)
++{
++ int err = 0;
++
++ unionfs_inode_cachep =
++ kmem_cache_create("unionfs_inode_cache",
++ sizeof(struct unionfs_inode_info), 0,
++ SLAB_RECLAIM_ACCOUNT, init_once, NULL);
++ if (!unionfs_inode_cachep)
++ err = -ENOMEM;
++ return err;
++}
++
++/* unionfs inode cache destructor */
++void unionfs_destroy_inode_cache(void)
++{
++ if (unionfs_inode_cachep)
++ kmem_cache_destroy(unionfs_inode_cachep);
++}
++
++/*
++ * Called when we have a dirty inode, right here we only throw out
++ * parts of our readdir list that are too old.
++ *
++ * No need to grab sb info's rwsem.
++ */
++static int unionfs_write_inode(struct inode *inode, int sync)
++{
++ struct list_head *pos, *n;
++ struct unionfs_dir_state *rdstate;
++
++ spin_lock(&UNIONFS_I(inode)->rdlock);
++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
++ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
++ /* We keep this list in LRU order. */
++ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
++ break;
++ UNIONFS_I(inode)->rdcount--;
++ list_del(&rdstate->cache);
++ free_rdstate(rdstate);
++ }
++ spin_unlock(&UNIONFS_I(inode)->rdlock);
++
++ return 0;
++}
++
++/*
++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
++ * code can actually succeed and won't leave tasks that need handling.
++ */
++static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
++{
++ struct super_block *sb, *hidden_sb;
++ struct vfsmount *hidden_mnt;
++ int bindex, bstart, bend;
++
++ if (!(flags & MNT_FORCE))
++ /*
++ * we are not being MNT_FORCE'd, therefore we should emulate
++ * old behavior
++ */
++ return;
++
++ sb = mnt->mnt_sb;
++
++ unionfs_read_lock(sb);
++
++ bstart = sbstart(sb);
++ bend = sbend(sb);
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ hidden_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
++ hidden_sb = unionfs_lower_super_idx(sb, bindex);
++
++ if (hidden_mnt && hidden_sb && hidden_sb->s_op &&
++ hidden_sb->s_op->umount_begin)
++ hidden_sb->s_op->umount_begin(hidden_mnt, flags);
++ }
++
++ unionfs_read_unlock(sb);
++}
++
++static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
++{
++ struct super_block *sb = mnt->mnt_sb;
++ int ret = 0;
++ char *tmp_page;
++ char *path;
++ int bindex, bstart, bend;
++ int perms;
++
++ unionfs_read_lock(sb);
++
++ unionfs_lock_dentry(sb->s_root);
++
++ tmp_page = (char*) __get_free_page(GFP_KERNEL);
++ if (!tmp_page) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ bstart = sbstart(sb);
++ bend = sbend(sb);
++
++ seq_printf(m, ",dirs=");
++ for (bindex = bstart; bindex <= bend; bindex++) {
++ path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
++ unionfs_lower_mnt_idx(sb->s_root, bindex),
++ tmp_page, PAGE_SIZE);
++ if (IS_ERR(path)) {
++ ret = PTR_ERR(path);
++ goto out;
++ }
++
++ perms = branchperms(sb, bindex);
++
++ seq_printf(m, "%s=%s", path,
++ perms & MAY_WRITE ? "rw" : "ro");
++ if (bindex != bend)
++ seq_printf(m, ":");
++ }
++
++out:
++ free_page((unsigned long) tmp_page);
++
++ unionfs_unlock_dentry(sb->s_root);
++
++ unionfs_read_unlock(sb);
++
++ return ret;
++}
++
++struct super_operations unionfs_sops = {
++ .read_inode = unionfs_read_inode,
++ .delete_inode = unionfs_delete_inode,
++ .put_super = unionfs_put_super,
++ .statfs = unionfs_statfs,
++ .remount_fs = unionfs_remount_fs,
++ .clear_inode = unionfs_clear_inode,
++ .umount_begin = unionfs_umount_begin,
++ .show_options = unionfs_show_options,
++ .write_inode = unionfs_write_inode,
++ .alloc_inode = unionfs_alloc_inode,
++ .destroy_inode = unionfs_destroy_inode,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/union.h linux-2.6.22-591/fs/unionfs/union.h
+--- linux-2.6.22-570/fs/unionfs/union.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/union.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,467 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _UNION_H_
++#define _UNION_H_
++
++#include <linux/dcache.h>
++#include <linux/file.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/page-flags.h>
++#include <linux/pagemap.h>
++#include <linux/poll.h>
++#include <linux/security.h>
++#include <linux/seq_file.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/smp_lock.h>
++#include <linux/statfs.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/writeback.h>
++#include <linux/buffer_head.h>
++#include <linux/xattr.h>
++#include <linux/fs_stack.h>
++#include <linux/magic.h>
++#include <linux/log2.h>
++
++#include <asm/mman.h>
++#include <asm/system.h>
++
++#include <linux/union_fs.h>
++
++/* the file system name */
++#define UNIONFS_NAME "unionfs"
++
++/* unionfs root inode number */
++#define UNIONFS_ROOT_INO 1
++
++/* number of times we try to get a unique temporary file name */
++#define GET_TMPNAM_MAX_RETRY 5
++
++/* maximum number of branches we support, to avoid memory blowup */
++#define UNIONFS_MAX_BRANCHES 128
++
++/* Operations vectors defined in specific files. */
++extern struct file_operations unionfs_main_fops;
++extern struct file_operations unionfs_dir_fops;
++extern struct inode_operations unionfs_main_iops;
++extern struct inode_operations unionfs_dir_iops;
++extern struct inode_operations unionfs_symlink_iops;
++extern struct super_operations unionfs_sops;
++extern struct dentry_operations unionfs_dops;
++
++/* How long should an entry be allowed to persist */
++#define RDCACHE_JIFFIES (5*HZ)
++
++/* file private data. */
++struct unionfs_file_info {
++ int bstart;
++ int bend;
++ atomic_t generation;
++
++ struct unionfs_dir_state *rdstate;
++ struct file **lower_files;
++ int *saved_branch_ids; /* IDs of branches when file was opened */
++};
++
++/* unionfs inode data in memory */
++struct unionfs_inode_info {
++ int bstart;
++ int bend;
++ atomic_t generation;
++ int stale;
++ /* Stuff for readdir over NFS. */
++ spinlock_t rdlock;
++ struct list_head readdircache;
++ int rdcount;
++ int hashsize;
++ int cookie;
++
++ /* The hidden inodes */
++ struct inode **lower_inodes;
++ /* to keep track of reads/writes for unlinks before closes */
++ atomic_t totalopens;
++
++ struct inode vfs_inode;
++};
++
++/* unionfs dentry data in memory */
++struct unionfs_dentry_info {
++ /*
++ * The semaphore is used to lock the dentry as soon as we get into a
++ * unionfs function from the VFS. Our lock ordering is that children
++ * go before their parents.
++ */
++ struct mutex lock;
++ int bstart;
++ int bend;
++ int bopaque;
++ int bcount;
++ atomic_t generation;
++ struct path *lower_paths;
++};
++
++/* These are the pointers to our various objects. */
++struct unionfs_data {
++ struct super_block *sb;
++ atomic_t open_files; /* number of open files on branch */
++ int branchperms;
++ int branch_id; /* unique branch ID at re/mount time */
++};
++
++/* unionfs super-block data in memory */
++struct unionfs_sb_info {
++ int bend;
++
++ atomic_t generation;
++
++ /*
++ * This rwsem is used to make sure that a branch management
++ * operation...
++ * 1) will not begin before all currently in-flight operations
++ * complete
++ * 2) any new operations do not execute until the currently
++ * running branch management operation completes
++ */
++ struct rw_semaphore rwsem;
++ int high_branch_id; /* last unique branch ID given */
++ struct unionfs_data *data;
++};
++
++/*
++ * structure for making the linked list of entries by readdir on left branch
++ * to compare with entries on right branch
++ */
++struct filldir_node {
++ struct list_head file_list; /* list for directory entries */
++ char *name; /* name entry */
++ int hash; /* name hash */
++ int namelen; /* name len since name is not 0 terminated */
++
++ /*
++ * we can check for duplicate whiteouts and files in the same branch
++ * in order to return -EIO.
++ */
++ int bindex;
++
++ /* is this a whiteout entry? */
++ int whiteout;
++
++ /* Inline name, so we don't need to separately kmalloc small ones */
++ char iname[DNAME_INLINE_LEN_MIN];
++};
++
++/* Directory hash table. */
++struct unionfs_dir_state {
++ unsigned int cookie; /* the cookie, based off of rdversion */
++ unsigned int offset; /* The entry we have returned. */
++ int bindex;
++ loff_t dirpos; /* offset within the lower level directory */
++ int size; /* How big is the hash table? */
++ int hashentries; /* How many entries have been inserted? */
++ unsigned long access;
++
++ /* This cache list is used when the inode keeps us around. */
++ struct list_head cache;
++ struct list_head list[0];
++};
++
++/* include miscellaneous macros */
++#include "fanout.h"
++#include "sioq.h"
++
++/* externs for cache creation/deletion routines */
++extern void unionfs_destroy_filldir_cache(void);
++extern int unionfs_init_filldir_cache(void);
++extern int unionfs_init_inode_cache(void);
++extern void unionfs_destroy_inode_cache(void);
++extern int unionfs_init_dentry_cache(void);
++extern void unionfs_destroy_dentry_cache(void);
++
++/* Initialize and free readdir-specific state. */
++extern int init_rdstate(struct file *file);
++extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex);
++extern struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos);
++extern void free_rdstate(struct unionfs_dir_state *state);
++extern int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
++ int namelen, int bindex, int whiteout);
++extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
++ const char *name, int namelen);
++
++extern struct dentry **alloc_new_dentries(int objs);
++extern struct unionfs_data *alloc_new_data(int objs);
++
++/* We can only use 32-bits of offset for rdstate --- blech! */
++#define DIREOF (0xfffff)
++#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
++#define MAXRDCOOKIE (0xfff)
++/* Turn an rdstate into an offset. */
++static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
++{
++ off_t tmp;
++
++ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
++ | (buf->offset & DIREOF);
++ return tmp;
++}
++
++#define unionfs_read_lock(sb) down_read(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_read_unlock(sb) up_read(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_write_lock(sb) down_write(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem)
++
++static inline void unionfs_double_lock_dentry(struct dentry *d1,
++ struct dentry *d2)
++{
++ if (d2 < d1) {
++ struct dentry *tmp = d1;
++ d1 = d2;
++ d2 = tmp;
++ }
++ unionfs_lock_dentry(d1);
++ unionfs_lock_dentry(d2);
++}
++
++extern int realloc_dentry_private_data(struct dentry *dentry);
++extern int new_dentry_private_data(struct dentry *dentry);
++extern void free_dentry_private_data(struct dentry *dentry);
++extern void update_bstart(struct dentry *dentry);
++
++/*
++ * EXTERNALS:
++ */
++
++/* replicates the directory structure up to given dentry in given branch */
++extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
++ int bindex);
++extern int make_dir_opaque(struct dentry *dir, int bindex);
++
++/* partial lookup */
++extern int unionfs_partial_lookup(struct dentry *dentry);
++
++/*
++ * Pass an unionfs dentry and an index and it will try to create a whiteout
++ * in branch 'index'.
++ *
++ * On error, it will proceed to a branch to the left
++ */
++extern int create_whiteout(struct dentry *dentry, int start);
++/* copies a file from dbstart to newbindex branch */
++extern int copyup_file(struct inode *dir, struct file *file, int bstart,
++ int newbindex, loff_t size);
++extern int copyup_named_file(struct inode *dir, struct file *file,
++ char *name, int bstart, int new_bindex,
++ loff_t len);
++/* copies a dentry from dbstart to newbindex branch */
++extern int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
++ int new_bindex, struct file **copyup_file,
++ loff_t len);
++
++extern int remove_whiteouts(struct dentry *dentry,
++ struct dentry *hidden_dentry, int bindex);
++
++extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
++ struct unionfs_dir_state *namelist);
++
++extern int unionfs_get_nlinks(struct inode *inode);
++
++/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
++extern int check_empty(struct dentry *dentry,
++ struct unionfs_dir_state **namelist);
++/* Delete whiteouts from this directory in branch bindex. */
++extern int delete_whiteouts(struct dentry *dentry, int bindex,
++ struct unionfs_dir_state *namelist);
++
++/* Re-lookup a hidden dentry. */
++extern int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex);
++
++extern void unionfs_reinterpose(struct dentry *this_dentry);
++extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
++
++/* Locking functions. */
++extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
++extern int unionfs_getlk(struct file *file, struct file_lock *fl);
++
++/* Common file operations. */
++extern int unionfs_file_revalidate(struct file *file, int willwrite);
++extern int unionfs_open(struct inode *inode, struct file *file);
++extern int unionfs_file_release(struct inode *inode, struct file *file);
++extern int unionfs_flush(struct file *file, fl_owner_t id);
++extern long unionfs_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg);
++
++/* Inode operations */
++extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry);
++extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
++extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
++
++extern int __unionfs_d_revalidate_chain(struct dentry *dentry,
++ struct nameidata *nd);
++
++/* The values for unionfs_interpose's flag. */
++#define INTERPOSE_DEFAULT 0
++#define INTERPOSE_LOOKUP 1
++#define INTERPOSE_REVAL 2
++#define INTERPOSE_REVAL_NEG 3
++#define INTERPOSE_PARTIAL 4
++
++extern int unionfs_interpose(struct dentry *this_dentry,
++ struct super_block *sb, int flag);
++
++#ifdef CONFIG_UNION_FS_XATTR
++/* Extended attribute functions. */
++extern void *unionfs_xattr_alloc(size_t size, size_t limit);
++extern void unionfs_xattr_free(void *ptr, size_t size);
++
++extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
++ void *value, size_t size);
++extern int unionfs_removexattr(struct dentry *dentry, const char *name);
++extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
++ size_t size);
++extern int unionfs_setxattr(struct dentry *dentry, const char *name,
++ const void *value, size_t size, int flags);
++#endif /* CONFIG_UNION_FS_XATTR */
++
++/* The root directory is unhashed, but isn't deleted. */
++static inline int d_deleted(struct dentry *d)
++{
++ return d_unhashed(d) && (d != d->d_sb->s_root);
++}
++
++struct dentry *unionfs_lookup_backend(struct dentry *dentry,
++ struct nameidata *nd, int lookupmode);
++
++/* unionfs_permission, check if we should bypass error to facilitate copyup */
++#define IS_COPYUP_ERR(err) ((err) == -EROFS)
++
++/* unionfs_open, check if we need to copyup the file */
++#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
++#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
++
++static inline int branchperms(const struct super_block *sb, int index)
++{
++ BUG_ON(index < 0);
++
++ return UNIONFS_SB(sb)->data[index].branchperms;
++}
++
++static inline int set_branchperms(struct super_block *sb, int index, int perms)
++{
++ BUG_ON(index < 0);
++
++ UNIONFS_SB(sb)->data[index].branchperms = perms;
++
++ return perms;
++}
++
++/* Is this file on a read-only branch? */
++static inline int is_robranch_super(const struct super_block *sb, int index)
++{
++ int ret;
++
++ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
++ return ret;
++}
++
++/* Is this file on a read-only branch? */
++static inline int is_robranch_idx(const struct dentry *dentry, int index)
++{
++ int err = 0;
++
++ BUG_ON(index < 0);
++
++ if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ||
++ IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode))
++ err = -EROFS;
++ return err;
++}
++
++static inline int is_robranch(const struct dentry *dentry)
++{
++ int index;
++
++ index = UNIONFS_D(dentry)->bstart;
++ BUG_ON(index < 0);
++
++ return is_robranch_idx(dentry, index);
++}
++
++/* What do we use for whiteouts. */
++#define UNIONFS_WHPFX ".wh."
++#define UNIONFS_WHLEN 4
++/*
++ * If a directory contains this file, then it is opaque. We start with the
++ * .wh. flag so that it is blocked by lookup.
++ */
++#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
++#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
++
++#ifndef DEFAULT_POLLMASK
++#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
++#endif
++
++/*
++ * EXTERNALS:
++ */
++extern char *alloc_whname(const char *name, int len);
++extern int check_branch(struct nameidata *nd);
++extern int __parse_branch_mode(const char *name);
++extern int parse_branch_mode(const char *name);
++
++/*
++ * These two functions are here because it is kind of daft to copy and paste
++ * the contents of the two functions to 32+ places in unionfs
++ */
++static inline struct dentry *lock_parent(struct dentry *dentry)
++{
++ struct dentry *dir = dget(dentry->d_parent);
++
++ mutex_lock(&dir->d_inode->i_mutex);
++ return dir;
++}
++
++static inline void unlock_dir(struct dentry *dir)
++{
++ mutex_unlock(&dir->d_inode->i_mutex);
++ dput(dir);
++}
++
++static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
++ int bindex)
++{
++ BUG_ON(!dentry || bindex < 0);
++
++ return mntget(unionfs_lower_mnt_idx(dentry, bindex));
++}
++
++static inline void unionfs_mntput(struct dentry *dentry, int bindex)
++{
++ if (!dentry)
++ return;
++
++ BUG_ON(bindex < 0);
++
++ mntput(unionfs_lower_mnt_idx(dentry, bindex));
++}
++#endif /* not _UNION_H_ */
+diff -Nurb linux-2.6.22-570/fs/unionfs/unlink.c linux-2.6.22-591/fs/unionfs/unlink.c
+--- linux-2.6.22-570/fs/unionfs/unlink.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/unlink.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,176 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* unlink a file by creating a whiteout */
++static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
++{
++ struct dentry *hidden_dentry;
++ struct dentry *hidden_dir_dentry;
++ int bindex;
++ int err = 0;
++
++ if ((err = unionfs_partial_lookup(dentry)))
++ goto out;
++
++ bindex = dbstart(dentry);
++
++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++ if (!hidden_dentry)
++ goto out;
++
++ hidden_dir_dentry = lock_parent(hidden_dentry);
++
++ /* avoid destroying the hidden inode if the file is in use */
++ dget(hidden_dentry);
++ if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++ err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry);
++ dput(hidden_dentry);
++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++ unlock_dir(hidden_dir_dentry);
++
++ if (err && !IS_COPYUP_ERR(err))
++ goto out;
++
++ if (err) {
++ if (dbstart(dentry) == 0)
++ goto out;
++
++ err = create_whiteout(dentry, dbstart(dentry) - 1);
++ } else if (dbopaque(dentry) != -1)
++ /* There is a hidden lower-priority file with the same name. */
++ err = create_whiteout(dentry, dbopaque(dentry));
++ else
++ err = create_whiteout(dentry, dbstart(dentry));
++
++out:
++ if (!err)
++ dentry->d_inode->i_nlink--;
++
++ /* We don't want to leave negative leftover dentries for revalidate. */
++ if (!err && (dbopaque(dentry) != -1))
++ update_bstart(dentry);
++
++ return err;
++}
++
++int unionfs_unlink(struct inode *dir, struct dentry *dentry)
++{
++ int err = 0;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ err = unionfs_unlink_whiteout(dir, dentry);
++ /* call d_drop so the system "forgets" about us */
++ if (!err)
++ d_drop(dentry);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
++ struct unionfs_dir_state *namelist)
++{
++ int err;
++ struct dentry *hidden_dentry;
++ struct dentry *hidden_dir_dentry = NULL;
++
++ /* Here we need to remove whiteout entries. */
++ err = delete_whiteouts(dentry, dbstart(dentry), namelist);
++ if (err)
++ goto out;
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ hidden_dir_dentry = lock_parent(hidden_dentry);
++
++ /* avoid destroying the hidden inode if the file is in use */
++ dget(hidden_dentry);
++ if (!(err = is_robranch(dentry)))
++ err = vfs_rmdir(hidden_dir_dentry->d_inode, hidden_dentry);
++ dput(hidden_dentry);
++
++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++ /* propagate number of hard-links */
++ dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
++
++out:
++ if (hidden_dir_dentry)
++ unlock_dir(hidden_dir_dentry);
++ return err;
++}
++
++int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++ int err = 0;
++ struct unionfs_dir_state *namelist = NULL;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ /* check if this unionfs directory is empty or not */
++ err = check_empty(dentry, &namelist);
++ if (err)
++ goto out;
++
++ err = unionfs_rmdir_first(dir, dentry, namelist);
++ /* create whiteout */
++ if (!err)
++ err = create_whiteout(dentry, dbstart(dentry));
++ else {
++ int new_err;
++
++ if (dbstart(dentry) == 0)
++ goto out;
++
++ /* exit if the error returned was NOT -EROFS */
++ if (!IS_COPYUP_ERR(err))
++ goto out;
++
++ new_err = create_whiteout(dentry, dbstart(dentry) - 1);
++ if (new_err != -EEXIST)
++ err = new_err;
++ }
++
++out:
++ /* call d_drop so the system "forgets" about us */
++ if (!err)
++ d_drop(dentry);
++
++ if (namelist)
++ free_rdstate(namelist);
++
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/xattr.c linux-2.6.22-591/fs/unionfs/xattr.c
+--- linux-2.6.22-570/fs/unionfs/xattr.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/fs/unionfs/xattr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,161 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005 Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003 Puja Gupta
++ * Copyright (c) 2003 Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* This is lifted from fs/xattr.c */
++void *unionfs_xattr_alloc(size_t size, size_t limit)
++{
++ void *ptr;
++
++ if (size > limit)
++ return ERR_PTR(-E2BIG);
++
++ if (!size) /* size request, no buffer is needed */
++ return NULL;
++ else if (size <= PAGE_SIZE)
++ ptr = kmalloc(size, GFP_KERNEL);
++ else
++ ptr = vmalloc(size);
++ if (!ptr)
++ return ERR_PTR(-ENOMEM);
++ return ptr;
++}
++
++void unionfs_xattr_free(void *ptr, size_t size)
++{
++ if (!size) /* size request, no buffer was needed */
++ return;
++ else if (size <= PAGE_SIZE)
++ kfree(ptr);
++ else
++ vfree(ptr);
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
++ size_t size)
++{
++ struct dentry *hidden_dentry = NULL;
++ int err = -EOPNOTSUPP;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ err = vfs_getxattr(hidden_dentry, (char*) name, value, size);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++int unionfs_setxattr(struct dentry *dentry, const char *name,
++ const void *value, size_t size, int flags)
++{
++ struct dentry *hidden_dentry = NULL;
++ int err = -EOPNOTSUPP;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ err = vfs_setxattr(hidden_dentry, (char*) name, (void*) value,
++ size, flags);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++int unionfs_removexattr(struct dentry *dentry, const char *name)
++{
++ struct dentry *hidden_dentry = NULL;
++ int err = -EOPNOTSUPP;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ err = vfs_removexattr(hidden_dentry, (char*) name);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
++{
++ struct dentry *hidden_dentry = NULL;
++ int err = -EOPNOTSUPP;
++ char *encoded_list = NULL;
++
++ unionfs_read_lock(dentry->d_sb);
++ unionfs_lock_dentry(dentry);
++
++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++ err = -ESTALE;
++ goto out;
++ }
++
++ hidden_dentry = unionfs_lower_dentry(dentry);
++
++ encoded_list = list;
++ err = vfs_listxattr(hidden_dentry, encoded_list, size);
++
++out:
++ unionfs_unlock_dentry(dentry);
++ unionfs_read_unlock(dentry->d_sb);
++ return err;
++}
+diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c
+--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c 2007-12-21 15:36:12.000000000 -0500
+@@ -246,18 +246,19 @@
+
+ #ifdef CONFIG_XFS_DMAPI
+ STATIC struct page *
+-xfs_vm_nopage(
+- struct vm_area_struct *area,
+- unsigned long address,
+- int *type)
++xfs_vm_fault(
++ struct vm_area_struct *vma,
++ struct fault_data *fdata)
+ {
+- struct inode *inode = area->vm_file->f_path.dentry->d_inode;
++ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ bhv_vnode_t *vp = vn_from_inode(inode);
+
+ ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
+- if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0))
++ if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) {
++ fdata->type = VM_FAULT_SIGBUS;
+ return NULL;
+- return filemap_nopage(area, address, type);
++ }
++ return filemap_fault(vma, fdata);
+ }
+ #endif /* CONFIG_XFS_DMAPI */
+
+@@ -343,6 +344,7 @@
+ struct vm_area_struct *vma)
+ {
+ vma->vm_ops = &xfs_file_vm_ops;
++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+
+ #ifdef CONFIG_XFS_DMAPI
+ if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
+@@ -501,14 +503,12 @@
+ };
+
+ static struct vm_operations_struct xfs_file_vm_ops = {
+- .nopage = filemap_nopage,
+- .populate = filemap_populate,
++ .fault = filemap_fault,
+ };
+
+ #ifdef CONFIG_XFS_DMAPI
+ static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
+- .nopage = xfs_vm_nopage,
+- .populate = filemap_populate,
++ .fault = xfs_vm_fault,
+ #ifdef HAVE_VMOP_MPROTECT
+ .mprotect = xfs_vm_mprotect,
+ #endif
+diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c
+--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:36:12.000000000 -0500
+@@ -570,6 +570,7 @@
+ bhv_vfs_sync_work_t *work, *n;
+ LIST_HEAD (tmp);
+
++ set_freezable();
+ timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
+ for (;;) {
+ timeleft = schedule_timeout_interruptible(timeleft);
+diff -Nurb linux-2.6.22-570/include/acpi/acmacros.h linux-2.6.22-591/include/acpi/acmacros.h
+--- linux-2.6.22-570/include/acpi/acmacros.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/acpi/acmacros.h 2007-12-21 15:36:12.000000000 -0500
+@@ -486,6 +486,8 @@
+ #define ACPI_FUNCTION_NAME(name)
+ #endif
+
++#ifdef DEBUG_FUNC_TRACE
++
+ #define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \
+ acpi_ut_trace(ACPI_DEBUG_PARAMETERS)
+ #define ACPI_FUNCTION_TRACE_PTR(a,b) ACPI_FUNCTION_NAME(a) \
+@@ -563,6 +565,27 @@
+
+ #endif /* ACPI_SIMPLE_RETURN_MACROS */
+
++#else /* !DEBUG_FUNC_TRACE */
++
++#define ACPI_FUNCTION_TRACE(a)
++#define ACPI_FUNCTION_TRACE_PTR(a,b)
++#define ACPI_FUNCTION_TRACE_U32(a,b)
++#define ACPI_FUNCTION_TRACE_STR(a,b)
++#define ACPI_FUNCTION_EXIT
++#define ACPI_FUNCTION_STATUS_EXIT(s)
++#define ACPI_FUNCTION_VALUE_EXIT(s)
++#define ACPI_FUNCTION_TRACE(a)
++#define ACPI_FUNCTION_ENTRY()
++
++#define return_VOID return
++#define return_ACPI_STATUS(s) return(s)
++#define return_VALUE(s) return(s)
++#define return_UINT8(s) return(s)
++#define return_UINT32(s) return(s)
++#define return_PTR(s) return(s)
++
++#endif /* DEBUG_FUNC_TRACE */
++
+ /* Conditional execution */
+
+ #define ACPI_DEBUG_EXEC(a) a
+diff -Nurb linux-2.6.22-570/include/acpi/acoutput.h linux-2.6.22-591/include/acpi/acoutput.h
+--- linux-2.6.22-570/include/acpi/acoutput.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/acpi/acoutput.h 2007-12-21 15:36:12.000000000 -0500
+@@ -178,8 +178,8 @@
+
+ /* Defaults for debug_level, debug and normal */
+
+-#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT)
+-#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT)
++#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR)
++#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR)
+ #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
+
+ #endif /* __ACOUTPUT_H__ */
+diff -Nurb linux-2.6.22-570/include/acpi/platform/acenv.h linux-2.6.22-591/include/acpi/platform/acenv.h
+--- linux-2.6.22-570/include/acpi/platform/acenv.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/acpi/platform/acenv.h 2007-12-21 15:36:12.000000000 -0500
+@@ -136,7 +136,7 @@
+
+ /*! [Begin] no source code translation */
+
+-#if defined(__linux__)
++#if defined(_LINUX) || defined(__linux__)
+ #include "aclinux.h"
+
+ #elif defined(_AED_EFI)
+diff -Nurb linux-2.6.22-570/include/acpi/platform/aclinux.h linux-2.6.22-591/include/acpi/platform/aclinux.h
+--- linux-2.6.22-570/include/acpi/platform/aclinux.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/acpi/platform/aclinux.h 2007-12-21 15:36:12.000000000 -0500
+@@ -91,7 +91,10 @@
+ #define ACPI_USE_NATIVE_DIVIDE
+ #endif
+
++#ifndef __cdecl
+ #define __cdecl
++#endif
++
+ #define ACPI_FLUSH_CPU_CACHE()
+ #endif /* __KERNEL__ */
+
+diff -Nurb linux-2.6.22-570/include/acpi/processor.h linux-2.6.22-591/include/acpi/processor.h
+--- linux-2.6.22-570/include/acpi/processor.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/acpi/processor.h 2007-12-21 15:36:12.000000000 -0500
+@@ -21,6 +21,8 @@
+ #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */
+ #define ACPI_PSD_REV0_ENTRIES 5
+
++#define ACPI_TSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */
++#define ACPI_TSD_REV0_ENTRIES 5
+ /*
+ * Types of coordination defined in ACPI 3.0. Same macros can be used across
+ * P, C and T states
+@@ -125,17 +127,53 @@
+
+ /* Throttling Control */
+
++struct acpi_tsd_package {
++ acpi_integer num_entries;
++ acpi_integer revision;
++ acpi_integer domain;
++ acpi_integer coord_type;
++ acpi_integer num_processors;
++} __attribute__ ((packed));
++
++struct acpi_ptc_register {
++ u8 descriptor;
++ u16 length;
++ u8 space_id;
++ u8 bit_width;
++ u8 bit_offset;
++ u8 reserved;
++ u64 address;
++} __attribute__ ((packed));
++
++struct acpi_processor_tx_tss {
++ acpi_integer freqpercentage; /* */
++ acpi_integer power; /* milliWatts */
++ acpi_integer transition_latency; /* microseconds */
++ acpi_integer control; /* control value */
++ acpi_integer status; /* success indicator */
++};
+ struct acpi_processor_tx {
+ u16 power;
+ u16 performance;
+ };
+
++struct acpi_processor;
+ struct acpi_processor_throttling {
+- int state;
++ unsigned int state;
++ unsigned int platform_limit;
++ struct acpi_pct_register control_register;
++ struct acpi_pct_register status_register;
++ unsigned int state_count;
++ struct acpi_processor_tx_tss *states_tss;
++ struct acpi_tsd_package domain_info;
++ cpumask_t shared_cpu_map;
++ int (*acpi_processor_get_throttling) (struct acpi_processor * pr);
++ int (*acpi_processor_set_throttling) (struct acpi_processor * pr,
++ int state);
++
+ u32 address;
+ u8 duty_offset;
+ u8 duty_width;
+- int state_count;
+ struct acpi_processor_tx states[ACPI_PROCESSOR_MAX_THROTTLING];
+ };
+
+@@ -161,6 +199,7 @@
+ u8 bm_check:1;
+ u8 has_cst:1;
+ u8 power_setup_done:1;
++ u8 bm_rld_set:1;
+ };
+
+ struct acpi_processor {
+@@ -169,6 +208,9 @@
+ u32 id;
+ u32 pblk;
+ int performance_platform_limit;
++ int throttling_platform_limit;
++ /* 0 - states 0..n-th state available */
++
+ struct acpi_processor_flags flags;
+ struct acpi_processor_power power;
+ struct acpi_processor_performance *performance;
+@@ -270,7 +312,7 @@
+
+ /* in processor_throttling.c */
+ int acpi_processor_get_throttling_info(struct acpi_processor *pr);
+-int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
++extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
+ extern struct file_operations acpi_processor_throttling_fops;
+
+ /* in processor_idle.c */
+@@ -279,6 +321,9 @@
+ int acpi_processor_cst_has_changed(struct acpi_processor *pr);
+ int acpi_processor_power_exit(struct acpi_processor *pr,
+ struct acpi_device *device);
++
++extern struct cpuidle_driver acpi_idle_driver;
++void acpi_max_cstate_changed(void);
+ int acpi_processor_suspend(struct acpi_device * device, pm_message_t state);
+ int acpi_processor_resume(struct acpi_device * device);
+
+diff -Nurb linux-2.6.22-570/include/asm-alpha/page.h linux-2.6.22-591/include/asm-alpha/page.h
+--- linux-2.6.22-570/include/asm-alpha/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-alpha/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -17,7 +17,8 @@
+ extern void clear_page(void *page);
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ extern void copy_page(void * _to, void * _from);
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,544 @@
++/*
++ * Copyright(c) 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ADMA_H
++#define _ADMA_H
++#include <linux/types.h>
++#include <linux/io.h>
++#include <asm/hardware.h>
++#include <asm/hardware/iop_adma.h>
++
++#define ADMA_ACCR(chan) (chan->mmr_base + 0x0)
++#define ADMA_ACSR(chan) (chan->mmr_base + 0x4)
++#define ADMA_ADAR(chan) (chan->mmr_base + 0x8)
++#define ADMA_IIPCR(chan) (chan->mmr_base + 0x18)
++#define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c)
++#define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20)
++#define ADMA_ANDAR(chan) (chan->mmr_base + 0x24)
++#define ADMA_ADCR(chan) (chan->mmr_base + 0x28)
++#define ADMA_CARMD(chan) (chan->mmr_base + 0x2c)
++#define ADMA_ABCR(chan) (chan->mmr_base + 0x30)
++#define ADMA_DLADR(chan) (chan->mmr_base + 0x34)
++#define ADMA_DUADR(chan) (chan->mmr_base + 0x38)
++#define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3)))
++#define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3)))
++
++struct iop13xx_adma_src {
++ u32 src_addr;
++ union {
++ u32 upper_src_addr;
++ struct {
++ unsigned int pq_upper_src_addr:24;
++ unsigned int pq_dmlt:8;
++ };
++ };
++};
++
++struct iop13xx_adma_desc_ctrl {
++ unsigned int int_en:1;
++ unsigned int xfer_dir:2;
++ unsigned int src_select:4;
++ unsigned int zero_result:1;
++ unsigned int block_fill_en:1;
++ unsigned int crc_gen_en:1;
++ unsigned int crc_xfer_dis:1;
++ unsigned int crc_seed_fetch_dis:1;
++ unsigned int status_write_back_en:1;
++ unsigned int endian_swap_en:1;
++ unsigned int reserved0:2;
++ unsigned int pq_update_xfer_en:1;
++ unsigned int dual_xor_en:1;
++ unsigned int pq_xfer_en:1;
++ unsigned int p_xfer_dis:1;
++ unsigned int reserved1:10;
++ unsigned int relax_order_en:1;
++ unsigned int no_snoop_en:1;
++};
++
++struct iop13xx_adma_byte_count {
++ unsigned int byte_count:24;
++ unsigned int host_if:3;
++ unsigned int reserved:2;
++ unsigned int zero_result_err_q:1;
++ unsigned int zero_result_err:1;
++ unsigned int tx_complete:1;
++};
++
++struct iop13xx_adma_desc_hw {
++ u32 next_desc;
++ union {
++ u32 desc_ctrl;
++ struct iop13xx_adma_desc_ctrl desc_ctrl_field;
++ };
++ union {
++ u32 crc_addr;
++ u32 block_fill_data;
++ u32 q_dest_addr;
++ };
++ union {
++ u32 byte_count;
++ struct iop13xx_adma_byte_count byte_count_field;
++ };
++ union {
++ u32 dest_addr;
++ u32 p_dest_addr;
++ };
++ union {
++ u32 upper_dest_addr;
++ u32 pq_upper_dest_addr;
++ };
++ struct iop13xx_adma_src src[1];
++};
++
++struct iop13xx_adma_desc_dual_xor {
++ u32 next_desc;
++ u32 desc_ctrl;
++ u32 reserved;
++ u32 byte_count;
++ u32 h_dest_addr;
++ u32 h_upper_dest_addr;
++ u32 src0_addr;
++ u32 upper_src0_addr;
++ u32 src1_addr;
++ u32 upper_src1_addr;
++ u32 h_src_addr;
++ u32 h_upper_src_addr;
++ u32 d_src_addr;
++ u32 d_upper_src_addr;
++ u32 d_dest_addr;
++ u32 d_upper_dest_addr;
++};
++
++struct iop13xx_adma_desc_pq_update {
++ u32 next_desc;
++ u32 desc_ctrl;
++ u32 reserved;
++ u32 byte_count;
++ u32 p_dest_addr;
++ u32 p_upper_dest_addr;
++ u32 src0_addr;
++ u32 upper_src0_addr;
++ u32 src1_addr;
++ u32 upper_src1_addr;
++ u32 p_src_addr;
++ u32 p_upper_src_addr;
++ u32 q_src_addr;
++ struct {
++ unsigned int q_upper_src_addr:24;
++ unsigned int q_dmlt:8;
++ };
++ u32 q_dest_addr;
++ u32 q_upper_dest_addr;
++};
++
++static inline int iop_adma_get_max_xor(void)
++{
++ return 16;
++}
++
++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
++{
++ return __raw_readl(ADMA_ADAR(chan));
++}
++
++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
++ u32 next_desc_addr)
++{
++ __raw_writel(next_desc_addr, ADMA_ANDAR(chan));
++}
++
++#define ADMA_STATUS_BUSY (1 << 13)
++
++static inline char iop_chan_is_busy(struct iop_adma_chan *chan)
++{
++ if (__raw_readl(ADMA_ACSR(chan)) &
++ ADMA_STATUS_BUSY)
++ return 1;
++ else
++ return 0;
++}
++
++static inline int
++iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots)
++{
++ return 1;
++}
++#define iop_desc_is_aligned(x, y) 1
++
++static inline int
++iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
++{
++ *slots_per_op = 1;
++ return 1;
++}
++
++#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s)
++
++static inline int
++iop_chan_memset_slot_count(size_t len, int *slots_per_op)
++{
++ *slots_per_op = 1;
++ return 1;
++}
++
++static inline int
++iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
++{
++ int num_slots;
++ /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1
++ * (1 source => 8 bytes) (1 slot => 32 bytes)
++ */
++ num_slots = 1 + (((src_cnt - 1) << 3) >> 5);
++ if (((src_cnt - 1) << 3) & 0x1f)
++ num_slots++;
++
++ *slots_per_op = num_slots;
++
++ return num_slots;
++}
++
++#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
++#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
++
++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ return hw_desc->dest_addr;
++}
++
++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ return hw_desc->byte_count_field.byte_count;
++}
++
++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ int src_idx)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ return hw_desc->src[src_idx].src_addr;
++}
++
++static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ return hw_desc->desc_ctrl_field.src_select + 1;
++}
++
++static inline void
++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop13xx_adma_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++ hw_desc->crc_addr = 0;
++}
++
++static inline void
++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop13xx_adma_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++ u_desc_ctrl.field.block_fill_en = 1;
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++ hw_desc->crc_addr = 0;
++}
++
++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
++static inline void
++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop13xx_adma_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.src_select = src_cnt - 1;
++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++ hw_desc->crc_addr = 0;
++
++}
++#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i)
++
++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
++static inline int
++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop13xx_adma_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.src_select = src_cnt - 1;
++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++ u_desc_ctrl.field.zero_result = 1;
++ u_desc_ctrl.field.status_write_back_en = 1;
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++ hw_desc->crc_addr = 0;
++
++ return 1;
++}
++
++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ u32 byte_count)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ hw_desc->byte_count = byte_count;
++}
++
++static inline void
++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
++{
++ int slots_per_op = desc->slots_per_op;
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
++ int i = 0;
++
++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++ hw_desc->byte_count = len;
++ } else {
++ do {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ i += slots_per_op;
++ } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
++
++ if (len) {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iter->byte_count = len;
++ }
++ }
++}
++
++
++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ dma_addr_t addr)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ hw_desc->dest_addr = addr;
++ hw_desc->upper_dest_addr = 0;
++}
++
++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
++ dma_addr_t addr)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ hw_desc->src[0].src_addr = addr;
++ hw_desc->src[0].upper_src_addr = 0;
++}
++
++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
++ int src_idx, dma_addr_t addr)
++{
++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
++ int i = 0;
++
++ do {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iter->src[src_idx].src_addr = addr;
++ iter->src[src_idx].upper_src_addr = 0;
++ slot_cnt -= slots_per_op;
++ if (slot_cnt) {
++ i += slots_per_op;
++ addr += IOP_ADMA_XOR_MAX_BYTE_COUNT;
++ }
++ } while (slot_cnt);
++}
++
++static inline void
++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ iop_desc_init_memcpy(desc, 1);
++ iop_desc_set_byte_count(desc, chan, 0);
++ iop_desc_set_dest_addr(desc, chan, 0);
++ iop_desc_set_memcpy_src_addr(desc, 0);
++}
++
++#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
++
++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
++ u32 next_desc_addr)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ BUG_ON(hw_desc->next_desc);
++ hw_desc->next_desc = next_desc_addr;
++}
++
++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ return hw_desc->next_desc;
++}
++
++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ hw_desc->next_desc = 0;
++}
++
++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
++ u32 val)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ hw_desc->block_fill_data = val;
++}
++
++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
++{
++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++ struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
++ struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
++
++ BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
++
++ if (desc_ctrl.pq_xfer_en)
++ return byte_count.zero_result_err_q;
++ else
++ return byte_count.zero_result_err;
++}
++
++static inline void iop_chan_append(struct iop_adma_chan *chan)
++{
++ u32 adma_accr;
++
++ adma_accr = __raw_readl(ADMA_ACCR(chan));
++ adma_accr |= 0x2;
++ __raw_writel(adma_accr, ADMA_ACCR(chan));
++}
++
++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
++{
++ do { } while (0);
++}
++
++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
++{
++ return __raw_readl(ADMA_ACSR(chan));
++}
++
++static inline void iop_chan_disable(struct iop_adma_chan *chan)
++{
++ u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
++ adma_chan_ctrl &= ~0x1;
++ __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
++}
++
++static inline void iop_chan_enable(struct iop_adma_chan *chan)
++{
++ u32 adma_chan_ctrl;
++
++ adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
++ adma_chan_ctrl |= 0x1;
++ __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
++}
++
++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(ADMA_ACSR(chan));
++ status &= (1 << 12);
++ __raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(ADMA_ACSR(chan));
++ status &= (1 << 11);
++ __raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(ADMA_ACSR(chan));
++ status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3);
++ __raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline int
++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
++{
++ return test_bit(9, &status);
++}
++
++static inline int
++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return test_bit(5, &status);
++}
++
++static inline int
++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return test_bit(4, &status);
++}
++
++static inline int
++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return test_bit(3, &status);
++}
++
++static inline int
++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++static inline int
++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++static inline int
++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++#endif /* _ADMA_H */
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h 2007-12-21 15:36:12.000000000 -0500
+@@ -166,12 +166,22 @@
+ #define IOP13XX_INIT_I2C_1 (1 << 1)
+ #define IOP13XX_INIT_I2C_2 (1 << 2)
+
++/* ADMA selection flags */
++/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */
++#define IOP13XX_INIT_ADMA_DEFAULT (0)
++#define IOP13XX_INIT_ADMA_0 (1 << 0)
++#define IOP13XX_INIT_ADMA_1 (1 << 1)
++#define IOP13XX_INIT_ADMA_2 (1 << 2)
++
++/* Platform devices */
+ #define IQ81340_NUM_UART 2
+ #define IQ81340_NUM_I2C 3
+ #define IQ81340_NUM_PHYS_MAP_FLASH 1
+-#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\
+- IQ81340_NUM_I2C +\
+- IQ81340_NUM_PHYS_MAP_FLASH)
++#define IQ81340_NUM_ADMA 3
++#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \
++ IQ81340_NUM_I2C + \
++ IQ81340_NUM_PHYS_MAP_FLASH + \
++ IQ81340_NUM_ADMA)
+
+ /*========================== PMMR offsets for key registers ============*/
+ #define IOP13XX_ATU0_PMMR_OFFSET 0x00048000
+@@ -444,22 +454,6 @@
+ /*==============================ADMA UNITS===============================*/
+ #define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9))
+ #define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0)
+-#define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs))
+-
+-#define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0)
+-#define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4)
+-#define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8)
+-#define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18)
+-#define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c)
+-#define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20)
+-#define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24)
+-#define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28)
+-#define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c)
+-#define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30)
+-#define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34)
+-#define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38)
+-#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3))
+-#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3))
+
+ /*==============================XSI BRIDGE===============================*/
+ #define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c)
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,5 @@
++#ifndef IOP32X_ADMA_H
++#define IOP32X_ADMA_H
++#include <asm/hardware/iop3xx-adma.h>
++#endif
++
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,5 @@
++#ifndef IOP33X_ADMA_H
++#define IOP33X_ADMA_H
++#include <asm/hardware/iop3xx-adma.h>
++#endif
++
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,891 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ADMA_H
++#define _ADMA_H
++#include <linux/types.h>
++#include <linux/io.h>
++#include <asm/hardware.h>
++#include <asm/hardware/iop_adma.h>
++
++/* Memory copy units */
++#define DMA_CCR(chan) (chan->mmr_base + 0x0)
++#define DMA_CSR(chan) (chan->mmr_base + 0x4)
++#define DMA_DAR(chan) (chan->mmr_base + 0xc)
++#define DMA_NDAR(chan) (chan->mmr_base + 0x10)
++#define DMA_PADR(chan) (chan->mmr_base + 0x14)
++#define DMA_PUADR(chan) (chan->mmr_base + 0x18)
++#define DMA_LADR(chan) (chan->mmr_base + 0x1c)
++#define DMA_BCR(chan) (chan->mmr_base + 0x20)
++#define DMA_DCR(chan) (chan->mmr_base + 0x24)
++
++/* Application accelerator unit */
++#define AAU_ACR(chan) (chan->mmr_base + 0x0)
++#define AAU_ASR(chan) (chan->mmr_base + 0x4)
++#define AAU_ADAR(chan) (chan->mmr_base + 0x8)
++#define AAU_ANDAR(chan) (chan->mmr_base + 0xc)
++#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2)))
++#define AAU_DAR(chan) (chan->mmr_base + 0x20)
++#define AAU_ABCR(chan) (chan->mmr_base + 0x24)
++#define AAU_ADCR(chan) (chan->mmr_base + 0x28)
++#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
++#define AAU_EDCR0_IDX 8
++#define AAU_EDCR1_IDX 17
++#define AAU_EDCR2_IDX 26
++
++#define DMA0_ID 0
++#define DMA1_ID 1
++#define AAU_ID 2
++
++struct iop3xx_aau_desc_ctrl {
++ unsigned int int_en:1;
++ unsigned int blk1_cmd_ctrl:3;
++ unsigned int blk2_cmd_ctrl:3;
++ unsigned int blk3_cmd_ctrl:3;
++ unsigned int blk4_cmd_ctrl:3;
++ unsigned int blk5_cmd_ctrl:3;
++ unsigned int blk6_cmd_ctrl:3;
++ unsigned int blk7_cmd_ctrl:3;
++ unsigned int blk8_cmd_ctrl:3;
++ unsigned int blk_ctrl:2;
++ unsigned int dual_xor_en:1;
++ unsigned int tx_complete:1;
++ unsigned int zero_result_err:1;
++ unsigned int zero_result_en:1;
++ unsigned int dest_write_en:1;
++};
++
++struct iop3xx_aau_e_desc_ctrl {
++ unsigned int reserved:1;
++ unsigned int blk1_cmd_ctrl:3;
++ unsigned int blk2_cmd_ctrl:3;
++ unsigned int blk3_cmd_ctrl:3;
++ unsigned int blk4_cmd_ctrl:3;
++ unsigned int blk5_cmd_ctrl:3;
++ unsigned int blk6_cmd_ctrl:3;
++ unsigned int blk7_cmd_ctrl:3;
++ unsigned int blk8_cmd_ctrl:3;
++ unsigned int reserved2:7;
++};
++
++struct iop3xx_dma_desc_ctrl {
++ unsigned int pci_transaction:4;
++ unsigned int int_en:1;
++ unsigned int dac_cycle_en:1;
++ unsigned int mem_to_mem_en:1;
++ unsigned int crc_data_tx_en:1;
++ unsigned int crc_gen_en:1;
++ unsigned int crc_seed_dis:1;
++ unsigned int reserved:21;
++ unsigned int crc_tx_complete:1;
++};
++
++struct iop3xx_desc_dma {
++ u32 next_desc;
++ union {
++ u32 pci_src_addr;
++ u32 pci_dest_addr;
++ u32 src_addr;
++ };
++ union {
++ u32 upper_pci_src_addr;
++ u32 upper_pci_dest_addr;
++ };
++ union {
++ u32 local_pci_src_addr;
++ u32 local_pci_dest_addr;
++ u32 dest_addr;
++ };
++ u32 byte_count;
++ union {
++ u32 desc_ctrl;
++ struct iop3xx_dma_desc_ctrl desc_ctrl_field;
++ };
++ u32 crc_addr;
++};
++
++struct iop3xx_desc_aau {
++ u32 next_desc;
++ u32 src[4];
++ u32 dest_addr;
++ u32 byte_count;
++ union {
++ u32 desc_ctrl;
++ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++ };
++ union {
++ u32 src_addr;
++ u32 e_desc_ctrl;
++ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
++ } src_edc[31];
++};
++
++struct iop3xx_aau_gfmr {
++ unsigned int gfmr1:8;
++ unsigned int gfmr2:8;
++ unsigned int gfmr3:8;
++ unsigned int gfmr4:8;
++};
++
++struct iop3xx_desc_pq_xor {
++ u32 next_desc;
++ u32 src[3];
++ union {
++ u32 data_mult1;
++ struct iop3xx_aau_gfmr data_mult1_field;
++ };
++ u32 dest_addr;
++ u32 byte_count;
++ union {
++ u32 desc_ctrl;
++ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++ };
++ union {
++ u32 src_addr;
++ u32 e_desc_ctrl;
++ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
++ u32 data_multiplier;
++ struct iop3xx_aau_gfmr data_mult_field;
++ u32 reserved;
++ } src_edc_gfmr[19];
++};
++
++struct iop3xx_desc_dual_xor {
++ u32 next_desc;
++ u32 src0_addr;
++ u32 src1_addr;
++ u32 h_src_addr;
++ u32 d_src_addr;
++ u32 h_dest_addr;
++ u32 byte_count;
++ union {
++ u32 desc_ctrl;
++ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++ };
++ u32 d_dest_addr;
++};
++
++union iop3xx_desc {
++ struct iop3xx_desc_aau *aau;
++ struct iop3xx_desc_dma *dma;
++ struct iop3xx_desc_pq_xor *pq_xor;
++ struct iop3xx_desc_dual_xor *dual_xor;
++ void *ptr;
++};
++
++static inline int iop_adma_get_max_xor(void)
++{
++ return 32;
++}
++
++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
++{
++ int id = chan->device->id;
++
++ switch (id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return __raw_readl(DMA_DAR(chan));
++ case AAU_ID:
++ return __raw_readl(AAU_ADAR(chan));
++ default:
++ BUG();
++ }
++ return 0;
++}
++
++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
++ u32 next_desc_addr)
++{
++ int id = chan->device->id;
++
++ switch (id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ __raw_writel(next_desc_addr, DMA_NDAR(chan));
++ break;
++ case AAU_ID:
++ __raw_writel(next_desc_addr, AAU_ANDAR(chan));
++ break;
++ }
++
++}
++
++#define IOP_ADMA_STATUS_BUSY (1 << 10)
++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
++#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
++#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
++
++static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(DMA_CSR(chan));
++ return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
++}
++
++static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
++ int num_slots)
++{
++ /* num_slots will only ever be 1, 2, 4, or 8 */
++ return (desc->idx & (num_slots - 1)) ? 0 : 1;
++}
++
++/* to do: support large (i.e. > hw max) buffer sizes */
++static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
++{
++ *slots_per_op = 1;
++ return 1;
++}
++
++/* to do: support large (i.e. > hw max) buffer sizes */
++static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
++{
++ *slots_per_op = 1;
++ return 1;
++}
++
++static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
++ int *slots_per_op)
++{
++ const static int slot_count_table[] = { 0,
++ 1, 1, 1, 1, /* 01 - 04 */
++ 2, 2, 2, 2, /* 05 - 08 */
++ 4, 4, 4, 4, /* 09 - 12 */
++ 4, 4, 4, 4, /* 13 - 16 */
++ 8, 8, 8, 8, /* 17 - 20 */
++ 8, 8, 8, 8, /* 21 - 24 */
++ 8, 8, 8, 8, /* 25 - 28 */
++ 8, 8, 8, 8, /* 29 - 32 */
++ };
++ *slots_per_op = slot_count_table[src_cnt];
++ return *slots_per_op;
++}
++
++static inline int
++iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
++{
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return iop_chan_memcpy_slot_count(0, slots_per_op);
++ case AAU_ID:
++ return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
++ default:
++ BUG();
++ }
++ return 0;
++}
++
++static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
++ int *slots_per_op)
++{
++ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
++
++ if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
++ return slot_cnt;
++
++ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
++ while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
++ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
++ slot_cnt += *slots_per_op;
++ }
++
++ if (len)
++ slot_cnt += *slots_per_op;
++
++ return slot_cnt;
++}
++
++/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
++ * descriptors
++ */
++static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
++ int *slots_per_op)
++{
++ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
++
++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
++ return slot_cnt;
++
++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ slot_cnt += *slots_per_op;
++ }
++
++ if (len)
++ slot_cnt += *slots_per_op;
++
++ return slot_cnt;
++}
++
++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return hw_desc.dma->dest_addr;
++ case AAU_ID:
++ return hw_desc.aau->dest_addr;
++ default:
++ BUG();
++ }
++ return 0;
++}
++
++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return hw_desc.dma->byte_count;
++ case AAU_ID:
++ return hw_desc.aau->byte_count;
++ default:
++ BUG();
++ }
++ return 0;
++}
++
++/* translate the src_idx to a descriptor word index */
++static inline int __desc_idx(int src_idx)
++{
++ const static int desc_idx_table[] = { 0, 0, 0, 0,
++ 0, 1, 2, 3,
++ 5, 6, 7, 8,
++ 9, 10, 11, 12,
++ 14, 15, 16, 17,
++ 18, 19, 20, 21,
++ 23, 24, 25, 26,
++ 27, 28, 29, 30,
++ };
++
++ return desc_idx_table[src_idx];
++}
++
++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ int src_idx)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return hw_desc.dma->src_addr;
++ case AAU_ID:
++ break;
++ default:
++ BUG();
++ }
++
++ if (src_idx < 4)
++ return hw_desc.aau->src[src_idx];
++ else
++ return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
++}
++
++static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
++ int src_idx, dma_addr_t addr)
++{
++ if (src_idx < 4)
++ hw_desc->src[src_idx] = addr;
++ else
++ hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
++}
++
++static inline void
++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
++{
++ struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop3xx_dma_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.mem_to_mem_en = 1;
++ u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++ hw_desc->upper_pci_src_addr = 0;
++ hw_desc->crc_addr = 0;
++}
++
++static inline void
++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
++{
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop3xx_aau_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
++ u_desc_ctrl.field.dest_write_en = 1;
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++}
++
++static inline u32
++iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
++{
++ int i, shift;
++ u32 edcr;
++ union {
++ u32 value;
++ struct iop3xx_aau_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ switch (src_cnt) {
++ case 25 ... 32:
++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++ edcr = 0;
++ shift = 1;
++ for (i = 24; i < src_cnt; i++) {
++ edcr |= (1 << shift);
++ shift += 3;
++ }
++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
++ src_cnt = 24;
++ /* fall through */
++ case 17 ... 24:
++ if (!u_desc_ctrl.field.blk_ctrl) {
++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++ }
++ edcr = 0;
++ shift = 1;
++ for (i = 16; i < src_cnt; i++) {
++ edcr |= (1 << shift);
++ shift += 3;
++ }
++ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
++ src_cnt = 16;
++ /* fall through */
++ case 9 ... 16:
++ if (!u_desc_ctrl.field.blk_ctrl)
++ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
++ edcr = 0;
++ shift = 1;
++ for (i = 8; i < src_cnt; i++) {
++ edcr |= (1 << shift);
++ shift += 3;
++ }
++ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
++ src_cnt = 8;
++ /* fall through */
++ case 2 ... 8:
++ shift = 1;
++ for (i = 0; i < src_cnt; i++) {
++ u_desc_ctrl.value |= (1 << shift);
++ shift += 3;
++ }
++
++ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
++ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
++ }
++
++ u_desc_ctrl.field.dest_write_en = 1;
++ u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++
++ return u_desc_ctrl.value;
++}
++
++static inline void
++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++ iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
++}
++
++/* return the number of operations */
++static inline int
++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++ struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
++ union {
++ u32 value;
++ struct iop3xx_aau_desc_ctrl field;
++ } u_desc_ctrl;
++ int i, j;
++
++ hw_desc = desc->hw_desc;
++
++ for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
++ i += slots_per_op, j++) {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
++ u_desc_ctrl.field.dest_write_en = 0;
++ u_desc_ctrl.field.zero_result_en = 1;
++ u_desc_ctrl.field.int_en = int_en;
++ iter->desc_ctrl = u_desc_ctrl.value;
++
++ /* for the subsequent descriptors preserve the store queue
++ * and chain them together
++ */
++ if (i) {
++ prev_hw_desc =
++ iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
++ prev_hw_desc->next_desc = (u32) (desc->phys + (i << 5));
++ }
++ }
++
++ return j;
++}
++
++static inline void
++iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++ union {
++ u32 value;
++ struct iop3xx_aau_desc_ctrl field;
++ } u_desc_ctrl;
++
++ u_desc_ctrl.value = 0;
++ switch (src_cnt) {
++ case 25 ... 32:
++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++ /* fall through */
++ case 17 ... 24:
++ if (!u_desc_ctrl.field.blk_ctrl) {
++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++ }
++ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
++ /* fall through */
++ case 9 ... 16:
++ if (!u_desc_ctrl.field.blk_ctrl)
++ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
++ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
++ /* fall through */
++ case 1 ... 8:
++ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
++ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
++ }
++
++ u_desc_ctrl.field.dest_write_en = 0;
++ u_desc_ctrl.field.int_en = int_en;
++ hw_desc->desc_ctrl = u_desc_ctrl.value;
++}
++
++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ u32 byte_count)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ hw_desc.dma->byte_count = byte_count;
++ break;
++ case AAU_ID:
++ hw_desc.aau->byte_count = byte_count;
++ break;
++ default:
++ BUG();
++ }
++}
++
++static inline void
++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ iop_desc_init_memcpy(desc, 1);
++ hw_desc.dma->byte_count = 0;
++ hw_desc.dma->dest_addr = 0;
++ hw_desc.dma->src_addr = 0;
++ break;
++ case AAU_ID:
++ iop_desc_init_null_xor(desc, 2, 1);
++ hw_desc.aau->byte_count = 0;
++ hw_desc.aau->dest_addr = 0;
++ hw_desc.aau->src[0] = 0;
++ hw_desc.aau->src[1] = 0;
++ break;
++ default:
++ BUG();
++ }
++}
++
++static inline void
++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
++{
++ int slots_per_op = desc->slots_per_op;
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++ int i = 0;
++
++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++ hw_desc->byte_count = len;
++ } else {
++ do {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++ i += slots_per_op;
++ } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
++
++ if (len) {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iter->byte_count = len;
++ }
++ }
++}
++
++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
++ struct iop_adma_chan *chan,
++ dma_addr_t addr)
++{
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ hw_desc.dma->dest_addr = addr;
++ break;
++ case AAU_ID:
++ hw_desc.aau->dest_addr = addr;
++ break;
++ default:
++ BUG();
++ }
++}
++
++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
++ dma_addr_t addr)
++{
++ struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
++ hw_desc->src_addr = addr;
++}
++
++static inline void
++iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
++ dma_addr_t addr)
++{
++
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++ int i;
++
++ for (i = 0; (slot_cnt -= slots_per_op) >= 0;
++ i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
++ }
++}
++
++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
++ int src_idx, dma_addr_t addr)
++{
++
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++ int i;
++
++ for (i = 0; (slot_cnt -= slots_per_op) >= 0;
++ i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
++ iter = iop_hw_desc_slot_idx(hw_desc, i);
++ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
++ }
++}
++
++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
++ u32 next_desc_addr)
++{
++ /* hw_desc->next_desc is the same location for all channels */
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++ BUG_ON(hw_desc.dma->next_desc);
++ hw_desc.dma->next_desc = next_desc_addr;
++}
++
++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
++{
++ /* hw_desc->next_desc is the same location for all channels */
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++ return hw_desc.dma->next_desc;
++}
++
++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
++{
++ /* hw_desc->next_desc is the same location for all channels */
++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++ hw_desc.dma->next_desc = 0;
++}
++
++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
++ u32 val)
++{
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++ hw_desc->src[0] = val;
++}
++
++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
++{
++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++ struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
++
++ BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
++ return desc_ctrl.zero_result_err;
++}
++
++static inline void iop_chan_append(struct iop_adma_chan *chan)
++{
++ u32 dma_chan_ctrl;
++ /* workaround dropped interrupts on 3xx */
++ mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3));
++
++ dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++ dma_chan_ctrl |= 0x2;
++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
++{
++ if (!busy)
++ del_timer(&chan->cleanup_watchdog);
++}
++
++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
++{
++ return __raw_readl(DMA_CSR(chan));
++}
++
++static inline void iop_chan_disable(struct iop_adma_chan *chan)
++{
++ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++ dma_chan_ctrl &= ~1;
++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_chan_enable(struct iop_adma_chan *chan)
++{
++ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++
++ dma_chan_ctrl |= 1;
++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(DMA_CSR(chan));
++ status &= (1 << 9);
++ __raw_writel(status, DMA_CSR(chan));
++}
++
++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(DMA_CSR(chan));
++ status &= (1 << 8);
++ __raw_writel(status, DMA_CSR(chan));
++}
++
++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
++{
++ u32 status = __raw_readl(DMA_CSR(chan));
++
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
++ break;
++ case AAU_ID:
++ status &= (1 << 5);
++ break;
++ default:
++ BUG();
++ }
++
++ __raw_writel(status, DMA_CSR(chan));
++}
++
++static inline int
++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++static inline int
++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++static inline int
++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return 0;
++}
++
++static inline int
++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ return test_bit(5, &status);
++}
++
++static inline int
++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return test_bit(2, &status);
++ default:
++ return 0;
++ }
++}
++
++static inline int
++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return test_bit(3, &status);
++ default:
++ return 0;
++ }
++}
++
++static inline int
++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
++{
++ switch (chan->device->id) {
++ case DMA0_ID:
++ case DMA1_ID:
++ return test_bit(1, &status);
++ default:
++ return 0;
++ }
++}
++#endif /* _ADMA_H */
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h 2007-12-21 15:36:12.000000000 -0500
+@@ -144,24 +144,9 @@
+ #define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380)
+
+ /* DMA Controller */
+-#define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400)
+-#define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404)
+-#define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c)
+-#define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410)
+-#define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414)
+-#define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418)
+-#define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c)
+-#define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420)
+-#define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424)
+-#define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440)
+-#define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444)
+-#define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c)
+-#define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450)
+-#define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454)
+-#define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458)
+-#define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c)
+-#define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460)
+-#define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464)
++#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \
++ (0x400 + (chan << 6)))
++#define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27)
+
+ /* Peripheral bus interface */
+ #define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680)
+@@ -210,48 +195,8 @@
+ #define IOP_TMR_RATIO_1_1 0x00
+
+ /* Application accelerator unit */
+-#define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800)
+-#define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804)
+-#define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808)
+-#define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c)
+-#define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810)
+-#define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814)
+-#define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818)
+-#define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c)
+-#define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820)
+-#define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824)
+-#define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828)
+-#define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c)
+-#define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830)
+-#define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834)
+-#define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838)
+-#define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c)
+-#define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840)
+-#define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844)
+-#define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848)
+-#define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c)
+-#define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850)
+-#define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854)
+-#define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858)
+-#define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c)
+-#define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860)
+-#define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864)
+-#define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868)
+-#define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c)
+-#define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870)
+-#define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874)
+-#define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878)
+-#define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c)
+-#define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880)
+-#define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884)
+-#define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888)
+-#define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c)
+-#define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890)
+-#define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894)
+-#define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898)
+-#define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c)
+-#define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0)
+-#define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4)
++#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800)
++#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7)
+
+ /* I2C bus interface unit */
+ #define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680)
+@@ -329,6 +274,9 @@
+ asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val));
+ }
+
++extern struct platform_device iop3xx_dma_0_channel;
++extern struct platform_device iop3xx_dma_1_channel;
++extern struct platform_device iop3xx_aau_channel;
+ extern struct platform_device iop3xx_i2c0_device;
+ extern struct platform_device iop3xx_i2c1_device;
+
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,120 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef IOP_ADMA_H
++#define IOP_ADMA_H
++#include <linux/types.h>
++#include <linux/dmaengine.h>
++#include <linux/interrupt.h>
++
++#define IOP_ADMA_SLOT_SIZE 32
++#define IOP_ADMA_THRESHOLD 4
++
++/**
++ * struct iop_adma_device - internal representation of an ADMA device
++ * @pdev: Platform device
++ * @id: HW ADMA Device selector
++ * @dma_desc_pool: base of DMA descriptor region (DMA address)
++ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
++ * @common: embedded struct dma_device
++ */
++struct iop_adma_device {
++ struct platform_device *pdev;
++ int id;
++ dma_addr_t dma_desc_pool;
++ void *dma_desc_pool_virt;
++ struct dma_device common;
++};
++
++/**
++ * struct iop_adma_chan - internal representation of an ADMA device
++ * @pending: allows batching of hardware operations
++ * @completed_cookie: identifier for the most recently completed operation
++ * @lock: serializes enqueue/dequeue operations to the slot pool
++ * @mmr_base: memory mapped register base
++ * @chain: device chain view of the descriptors
++ * @device: parent device
++ * @common: common dmaengine channel object members
++ * @last_used: place holder for allocation to continue from where it left off
++ * @all_slots: complete domain of slots usable by the channel
++ * @cleanup_watchdog: workaround missed interrupts on iop3xx
++ * @slots_allocated: records the actual size of the descriptor slot pool
++ * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
++ */
++struct iop_adma_chan {
++ int pending;
++ dma_cookie_t completed_cookie;
++ spinlock_t lock; /* protects the descriptor slot pool */
++ void __iomem *mmr_base;
++ struct list_head chain;
++ struct iop_adma_device *device;
++ struct dma_chan common;
++ struct iop_adma_desc_slot *last_used;
++ struct list_head all_slots;
++ struct timer_list cleanup_watchdog;
++ int slots_allocated;
++ struct tasklet_struct irq_tasklet;
++};
++
++/**
++ * struct iop_adma_desc_slot - IOP-ADMA software descriptor
++ * @slot_node: node on the iop_adma_chan.all_slots list
++ * @chain_node: node on the op_adma_chan.chain list
++ * @hw_desc: virtual address of the hardware descriptor chain
++ * @phys: hardware address of the hardware descriptor chain
++ * @group_head: first operation in a transaction
++ * @slot_cnt: total slots used in an transaction (group of operations)
++ * @slots_per_op: number of slots per operation
++ * @idx: pool index
++ * @unmap_src_cnt: number of xor sources
++ * @unmap_len: transaction bytecount
++ * @async_tx: support for the async_tx api
++ * @group_list: list of slots that make up a multi-descriptor transaction
++ * for example transfer lengths larger than the supported hw max
++ * @xor_check_result: result of zero sum
++ * @crc32_result: result crc calculation
++ */
++struct iop_adma_desc_slot {
++ struct list_head slot_node;
++ struct list_head chain_node;
++ void *hw_desc;
++ dma_addr_t phys;
++ struct iop_adma_desc_slot *group_head;
++ u16 slot_cnt;
++ u16 slots_per_op;
++ u16 idx;
++ u16 unmap_src_cnt;
++ size_t unmap_len;
++ struct dma_async_tx_descriptor async_tx;
++ struct list_head group_list;
++ union {
++ u32 *xor_check_result;
++ u32 *crc32_result;
++ };
++};
++
++struct iop_adma_platform_data {
++ int hw_id;
++ dma_cap_mask_t cap_mask;
++ size_t pool_size;
++};
++
++#define to_iop_sw_desc(addr_hw_desc) \
++ container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
++#define iop_hw_desc_slot_idx(hw_desc, idx) \
++ ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
++#endif
+diff -Nurb linux-2.6.22-570/include/asm-arm/kgdb.h linux-2.6.22-591/include/asm-arm/kgdb.h
+--- linux-2.6.22-570/include/asm-arm/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-arm/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,103 @@
++/*
++ * include/asm-arm/kgdb.h
++ *
++ * ARM KGDB support
++ *
++ * Author: Deepak Saxena <dsaxena@mvista.com>
++ *
++ * Copyright (C) 2002 MontaVista Software Inc.
++ *
++ */
++
++#ifndef __ASM_KGDB_H__
++#define __ASM_KGDB_H__
++
++#include <asm/ptrace.h>
++#include <asm-generic/kgdb.h>
++
++
++/*
++ * GDB assumes that we're a user process being debugged, so
++ * it will send us an SWI command to write into memory as the
++ * debug trap. When an SWI occurs, the next instruction addr is
++ * placed into R14_svc before jumping to the vector trap.
++ * This doesn't work for kernel debugging as we are already in SVC
++ * we would loose the kernel's LR, which is a bad thing. This
++ * is bad thing.
++ *
++ * By doing this as an undefined instruction trap, we force a mode
++ * switch from SVC to UND mode, allowing us to save full kernel state.
++ *
++ * We also define a KGDB_COMPILED_BREAK which can be used to compile
++ * in breakpoints. This is important for things like sysrq-G and for
++ * the initial breakpoint from trap_init().
++ *
++ * Note to ARM HW designers: Add real trap support like SH && PPC to
++ * make our lives much much simpler. :)
++ */
++#define BREAK_INSTR_SIZE 4
++#define GDB_BREAKINST 0xef9f0001
++#define KGDB_BREAKINST 0xe7ffdefe
++#define KGDB_COMPILED_BREAK 0xe7ffdeff
++#define CACHE_FLUSH_IS_SAFE 1
++
++#ifndef __ASSEMBLY__
++
++#define BREAKPOINT() asm(".word 0xe7ffdeff")
++
++
++extern void kgdb_handle_bus_error(void);
++extern int kgdb_fault_expected;
++#endif /* !__ASSEMBLY__ */
++
++/*
++ * From Kevin Hilman:
++ *
++ * gdb is expecting the following registers layout.
++ *
++ * r0-r15: 1 long word each
++ * f0-f7: unused, 3 long words each !!
++ * fps: unused, 1 long word
++ * cpsr: 1 long word
++ *
++ * Even though f0-f7 and fps are not used, they need to be
++ * present in the registers sent for correct processing in
++ * the host-side gdb.
++ *
++ * In particular, it is crucial that CPSR is in the right place,
++ * otherwise gdb will not be able to correctly interpret stepping over
++ * conditional branches.
++ */
++#define _GP_REGS 16
++#define _FP_REGS 8
++#define _EXTRA_REGS 2
++#define GDB_MAX_REGS (_GP_REGS + (_FP_REGS * 3) + _EXTRA_REGS)
++
++#define KGDB_MAX_NO_CPUS 1
++#define BUFMAX 400
++#define NUMREGBYTES (GDB_MAX_REGS << 2)
++#define NUMCRITREGBYTES (32 << 2)
++
++#define _R0 0
++#define _R1 1
++#define _R2 2
++#define _R3 3
++#define _R4 4
++#define _R5 5
++#define _R6 6
++#define _R7 7
++#define _R8 8
++#define _R9 9
++#define _R10 10
++#define _FP 11
++#define _IP 12
++#define _SP 13
++#define _LR 14
++#define _PC 15
++#define _CPSR (GDB_MAX_REGS - 1)
++
++/* So that we can denote the end of a frame for tracing, in the simple
++ * case. */
++#define CFI_END_FRAME(func) __CFI_END_FRAME(_PC,_SP,func)
++
++#endif /* __ASM_KGDB_H__ */
+diff -Nurb linux-2.6.22-570/include/asm-arm/system.h linux-2.6.22-591/include/asm-arm/system.h
+--- linux-2.6.22-570/include/asm-arm/system.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-arm/system.h 2007-12-21 15:36:12.000000000 -0500
+@@ -360,6 +360,41 @@
+ extern void disable_hlt(void);
+ extern void enable_hlt(void);
+
++#ifndef CONFIG_SMP
++/*
++ * Atomic compare and exchange.
++ */
++#define __HAVE_ARCH_CMPXCHG 1
++
++extern unsigned long wrong_size_cmpxchg(volatile void *ptr);
++
++static inline unsigned long __cmpxchg(volatile void *ptr,
++ unsigned long old,
++ unsigned long new, int size)
++{
++ unsigned long flags, prev;
++ volatile unsigned long *p = ptr;
++
++ if (size == 4) {
++ local_irq_save(flags);
++ if ((prev = *p) == old)
++ *p = new;
++ local_irq_restore(flags);
++ return(prev);
++ } else
++ return wrong_size_cmpxchg(ptr);
++}
++
++#define cmpxchg(ptr,o,n) \
++({ \
++ __typeof__(*(ptr)) _o_ = (o); \
++ __typeof__(*(ptr)) _n_ = (n); \
++ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
++ (unsigned long)_n_, sizeof(*(ptr))); \
++})
++
++#endif
++
+ #endif /* __ASSEMBLY__ */
+
+ #define arch_align_stack(x) (x)
+diff -Nurb linux-2.6.22-570/include/asm-cris/page.h linux-2.6.22-591/include/asm-cris/page.h
+--- linux-2.6.22-570/include/asm-cris/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-cris/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -20,7 +20,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-generic/kgdb.h linux-2.6.22-591/include/asm-generic/kgdb.h
+--- linux-2.6.22-570/include/asm-generic/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-generic/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,100 @@
++/*
++ * include/asm-generic/kgdb.h
++ *
++ * This provides the assembly level information so that KGDB can provide
++ * a GDB that has been patched with enough information to know to stop
++ * trying to unwind the function.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2005 (c) MontaVista Software, Inc.
++ * 2006 (c) Embedded Alley Solutions, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#ifndef __ASM_GENERIC_KGDB_H__
++#define __ASM_GENERIC_KGDB_H__
++
++#ifdef CONFIG_X86
++/**
++ * kgdb_skipexception - Bail of of KGDB when we've been triggered.
++ * @exception: Exception vector number
++ * @regs: Current &struct pt_regs.
++ *
++ * On some architectures we need to skip a breakpoint exception when
++ * it occurs after a breakpoint has been removed.
++ */
++int kgdb_skipexception(int exception, struct pt_regs *regs);
++#else
++#define kgdb_skipexception(exception, regs) 0
++#endif
++
++#if defined(CONFIG_X86)
++/**
++ * kgdb_post_master_code - Save error vector/code numbers.
++ * @regs: Original pt_regs.
++ * @e_vector: Original error vector.
++ * @err_code: Original error code.
++ *
++ * This is needed on architectures which support SMP and KGDB.
++ * This function is called after all the slave cpus have been put
++ * to a know spin state and the master CPU has control over KGDB.
++ */
++extern void kgdb_post_master_code(struct pt_regs *regs, int e_vector,
++ int err_code);
++
++/**
++ * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
++ * @regs: Current &struct pt_regs.
++ *
++ * This function will be called if the particular architecture must
++ * disable hardware debugging while it is processing gdb packets or
++ * handling exception.
++ */
++extern void kgdb_disable_hw_debug(struct pt_regs *regs);
++#else
++#define kgdb_disable_hw_debug(regs) do { } while (0)
++#define kgdb_post_master_code(regs, v, c) do { } while (0)
++#endif
++
++#ifdef CONFIG_KGDB_ARCH_HAS_SHADOW_INFO
++/**
++ * kgdb_shadowinfo - Get shadowed information on @threadid.
++ * @regs: The &struct pt_regs of the current process.
++ * @buffer: A buffer of %BUFMAX size.
++ * @threadid: The thread id of the shadowed process to get information on.
++ */
++extern void kgdb_shadowinfo(struct pt_regs *regs, char *buffer,
++ unsigned threadid);
++
++/**
++ * kgdb_get_shadow_thread - Get the shadowed &task_struct of @threadid.
++ * @regs: The &struct pt_regs of the current thread.
++ * @threadid: The thread id of the shadowed process to get information on.
++ *
++ * RETURN:
++ * This returns a pointer to the &struct task_struct of the shadowed
++ * thread, @threadid.
++ */
++extern struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs,
++ int threadid);
++
++/**
++ * kgdb_shadow_regs - Return the shadowed registers of @threadid.
++ * @regs: The &struct pt_regs of the current thread.
++ * @threadid: The thread id we want the &struct pt_regs for.
++ *
++ * RETURN:
++ * The a pointer to the &struct pt_regs of the shadowed thread @threadid.
++ */
++extern struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid);
++#else
++#define kgdb_shadowinfo(regs, buf, threadid) do { } while (0)
++#define kgdb_get_shadow_thread(regs, threadid) NULL
++#define kgdb_shadow_regs(regs, threadid) NULL
++#endif
++
++#endif /* __ASM_GENERIC_KGDB_H__ */
+diff -Nurb linux-2.6.22-570/include/asm-generic/vmlinux.lds.h linux-2.6.22-591/include/asm-generic/vmlinux.lds.h
+--- linux-2.6.22-570/include/asm-generic/vmlinux.lds.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-generic/vmlinux.lds.h 2007-12-21 15:36:12.000000000 -0500
+@@ -127,6 +127,8 @@
+ *(__ksymtab_strings) \
+ } \
+ \
++ EH_FRAME \
++ \
+ /* Built-in module parameters. */ \
+ __param : AT(ADDR(__param) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___param) = .; \
+@@ -177,6 +179,26 @@
+ *(.kprobes.text) \
+ VMLINUX_SYMBOL(__kprobes_text_end) = .;
+
++#ifdef CONFIG_STACK_UNWIND
++#define EH_FRAME \
++ /* Unwind data binary search table */ \
++ . = ALIGN(8); \
++ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \
++ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \
++ *(.eh_frame_hdr) \
++ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \
++ } \
++ /* Unwind data */ \
++ . = ALIGN(8); \
++ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \
++ VMLINUX_SYMBOL(__start_unwind) = .; \
++ *(.eh_frame) \
++ VMLINUX_SYMBOL(__end_unwind) = .; \
++ }
++#else
++#define EH_FRAME
++#endif
++
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to
+ the beginning of the section so we begin them at 0. */
+diff -Nurb linux-2.6.22-570/include/asm-h8300/page.h linux-2.6.22-591/include/asm-h8300/page.h
+--- linux-2.6.22-570/include/asm-h8300/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-h8300/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -22,7 +22,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-i386/kdebug.h linux-2.6.22-591/include/asm-i386/kdebug.h
+--- linux-2.6.22-570/include/asm-i386/kdebug.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-i386/kdebug.h 2007-12-21 15:36:12.000000000 -0500
+@@ -28,6 +28,7 @@
+ DIE_CALL,
+ DIE_NMI_IPI,
+ DIE_PAGE_FAULT,
++ DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-i386/kgdb.h linux-2.6.22-591/include/asm-i386/kgdb.h
+--- linux-2.6.22-570/include/asm-i386/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-i386/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,51 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++#include <asm-generic/kgdb.h>
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++/************************************************************************/
++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
++/* at least NUMREGBYTES*2 are needed for register packets */
++/* Longer buffer is needed to list all threads */
++#define BUFMAX 1024
++
++/* Number of bytes of registers. */
++#define NUMREGBYTES 64
++/* Number of bytes of registers we need to save for a setjmp/longjmp. */
++#define NUMCRITREGBYTES 24
++
++/*
++ * Note that this register image is in a different order than
++ * the register image that Linux produces at interrupt time.
++ *
++ * Linux's register image is defined by struct pt_regs in ptrace.h.
++ * Just why GDB uses a different order is a historical mystery.
++ */
++enum regnames { _EAX, /* 0 */
++ _ECX, /* 1 */
++ _EDX, /* 2 */
++ _EBX, /* 3 */
++ _ESP, /* 4 */
++ _EBP, /* 5 */
++ _ESI, /* 6 */
++ _EDI, /* 7 */
++ _PC, /* 8 also known as eip */
++ _PS, /* 9 also known as eflags */
++ _CS, /* 10 */
++ _SS, /* 11 */
++ _DS, /* 12 */
++ _ES, /* 13 */
++ _FS, /* 14 */
++ _GS /* 15 */
++};
++
++#define BREAKPOINT() asm(" int $3");
++#define BREAK_INSTR_SIZE 1
++#define CACHE_FLUSH_IS_SAFE 1
++#endif /* _ASM_KGDB_H_ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-i386/page.h linux-2.6.22-591/include/asm-i386/page.h
+--- linux-2.6.22-570/include/asm-i386/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-i386/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -34,7 +34,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-i386/unistd.h linux-2.6.22-591/include/asm-i386/unistd.h
+--- linux-2.6.22-570/include/asm-i386/unistd.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-i386/unistd.h 2007-12-21 15:36:12.000000000 -0500
+@@ -329,10 +329,13 @@
+ #define __NR_signalfd 321
+ #define __NR_timerfd 322
+ #define __NR_eventfd 323
++#define __NR_revokeat 324
++#define __NR_frevoke 325
++#define __NR_fallocate 326
+
+ #ifdef __KERNEL__
+
+-#define NR_syscalls 324
++#define NR_syscalls 327
+
+ #define __ARCH_WANT_IPC_PARSE_VERSION
+ #define __ARCH_WANT_OLD_READDIR
+diff -Nurb linux-2.6.22-570/include/asm-i386/unwind.h linux-2.6.22-591/include/asm-i386/unwind.h
+--- linux-2.6.22-570/include/asm-i386/unwind.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-i386/unwind.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,6 +1,95 @@
+ #ifndef _ASM_I386_UNWIND_H
+ #define _ASM_I386_UNWIND_H
+
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/fixmap.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++
++struct unwind_frame_info
++{
++ struct pt_regs regs;
++ struct task_struct *task;
++ unsigned call_frame:1;
++};
++
++#define UNW_PC(frame) (frame)->regs.eip
++#define UNW_SP(frame) (frame)->regs.esp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame) (frame)->regs.ebp
++#define FRAME_RETADDR_OFFSET 4
++#define FRAME_LINK_OFFSET 0
++#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0)
++#define STACK_TOP(tsk) ((tsk)->thread.esp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0)
++#endif
++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++ PTREGS_INFO(eax), \
++ PTREGS_INFO(ecx), \
++ PTREGS_INFO(edx), \
++ PTREGS_INFO(ebx), \
++ PTREGS_INFO(esp), \
++ PTREGS_INFO(ebp), \
++ PTREGS_INFO(esi), \
++ PTREGS_INFO(edi), \
++ PTREGS_INFO(eip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++ ((raItem).where == Memory && \
++ !((raItem).value * (dataAlign) + 4))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++ /*const*/ struct pt_regs *regs)
++{
++ if (user_mode_vm(regs))
++ info->regs = *regs;
++ else {
++ memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
++ info->regs.esp = (unsigned long)®s->esp;
++ info->regs.xss = __KERNEL_DS;
++ }
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++ memset(&info->regs, 0, sizeof(info->regs));
++ info->regs.eip = info->task->thread.eip;
++ info->regs.xcs = __KERNEL_CS;
++ __get_user(info->regs.ebp, (long *)info->task->thread.esp);
++ info->regs.esp = info->task->thread.esp;
++ info->regs.xss = __KERNEL_DS;
++ info->regs.xds = __USER_DS;
++ info->regs.xes = __USER_DS;
++ info->regs.xfs = __KERNEL_PERCPU;
++}
++
++extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
++{
++ return user_mode_vm(&info->regs)
++ || info->regs.eip < PAGE_OFFSET
++ || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
++ && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
++ || info->regs.esp < PAGE_OFFSET;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0)
+ #define UNW_SP(frame) ((void)(frame), 0)
+ #define UNW_FP(frame) ((void)(frame), 0)
+@@ -10,4 +99,6 @@
+ return 0;
+ }
+
++#endif
++
+ #endif /* _ASM_I386_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/asm-ia64/kdebug.h linux-2.6.22-591/include/asm-ia64/kdebug.h
+--- linux-2.6.22-570/include/asm-ia64/kdebug.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ia64/kdebug.h 2007-12-21 15:36:12.000000000 -0500
+@@ -69,6 +69,7 @@
+ DIE_KDEBUG_LEAVE,
+ DIE_KDUMP_ENTER,
+ DIE_KDUMP_LEAVE,
++ DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-ia64/kgdb.h linux-2.6.22-591/include/asm-ia64/kgdb.h
+--- linux-2.6.22-570/include/asm-ia64/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-ia64/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,37 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++#include <linux/threads.h>
++#include <asm-generic/kgdb.h>
++
++/************************************************************************/
++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
++/* at least NUMREGBYTES*2 are needed for register packets */
++/* Longer buffer is needed to list all threads */
++#define BUFMAX 1024
++
++/* Number of bytes of registers. We set this to 0 so that certain GDB
++ * packets will fail, forcing the use of others, which are more friendly
++ * on ia64. */
++#define NUMREGBYTES 0
++
++#define NUMCRITREGBYTES (70*8)
++#define JMP_REGS_ALIGNMENT __attribute__ ((aligned (16)))
++
++#define BREAKNUM 0x00003333300LL
++#define KGDBBREAKNUM 0x6665UL
++#define BREAKPOINT() asm volatile ("break.m 0x6665")
++#define BREAK_INSTR_SIZE 16
++#define CACHE_FLUSH_IS_SAFE 1
++
++struct pt_regs;
++extern volatile int kgdb_hwbreak_sstep[NR_CPUS];
++extern void smp_send_nmi_allbutself(void);
++extern void kgdb_wait_ipi(struct pt_regs *);
++#endif /* _ASM_KGDB_H_ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-ia64/page.h linux-2.6.22-591/include/asm-ia64/page.h
+--- linux-2.6.22-570/include/asm-ia64/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ia64/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -87,9 +87,10 @@
+ } while (0)
+
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) \
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+ ({ \
+- struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \
++ struct page *page = alloc_page_vma( \
++ GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr); \
+ if (page) \
+ flush_dcache_page(page); \
+ page; \
+diff -Nurb linux-2.6.22-570/include/asm-ia64/processor.h linux-2.6.22-591/include/asm-ia64/processor.h
+--- linux-2.6.22-570/include/asm-ia64/processor.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ia64/processor.h 2007-12-21 15:36:12.000000000 -0500
+@@ -295,9 +295,9 @@
+ regs->ar_bspstore = current->thread.rbs_bot; \
+ regs->ar_fpsr = FPSR_DEFAULT; \
+ regs->loadrs = 0; \
+- regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \
++ regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \
+ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \
+- if (unlikely(!current->mm->dumpable)) { \
++ if (unlikely(!get_dumpable(current->mm))) { \
+ /* \
+ * Zap scratch regs to avoid leaking bits between processes with different \
+ * uid/privileges. \
+diff -Nurb linux-2.6.22-570/include/asm-m32r/page.h linux-2.6.22-591/include/asm-m32r/page.h
+--- linux-2.6.22-570/include/asm-m32r/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-m32r/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -15,7 +15,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-m68knommu/page.h linux-2.6.22-591/include/asm-m68knommu/page.h
+--- linux-2.6.22-570/include/asm-m68knommu/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-m68knommu/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -22,7 +22,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-32.h linux-2.6.22-591/include/asm-mips/asmmacro-32.h
+--- linux-2.6.22-570/include/asm-mips/asmmacro-32.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-mips/asmmacro-32.h 2007-12-21 15:36:12.000000000 -0500
+@@ -11,6 +11,28 @@
+ #include <asm/regdef.h>
+ #include <asm/fpregdef.h>
+ #include <asm/mipsregs.h>
++#include <asm/gdb-stub.h>
++
++ .macro fpu_save_double_kgdb stack status tmp1=t0
++ cfc1 \tmp1, fcr31
++ sdc1 $f0, GDB_FR_FPR0(\stack)
++ sdc1 $f2, GDB_FR_FPR2(\stack)
++ sdc1 $f4, GDB_FR_FPR4(\stack)
++ sdc1 $f6, GDB_FR_FPR6(\stack)
++ sdc1 $f8, GDB_FR_FPR8(\stack)
++ sdc1 $f10, GDB_FR_FPR10(\stack)
++ sdc1 $f12, GDB_FR_FPR12(\stack)
++ sdc1 $f14, GDB_FR_FPR14(\stack)
++ sdc1 $f16, GDB_FR_FPR16(\stack)
++ sdc1 $f18, GDB_FR_FPR18(\stack)
++ sdc1 $f20, GDB_FR_FPR20(\stack)
++ sdc1 $f22, GDB_FR_FPR22(\stack)
++ sdc1 $f24, GDB_FR_FPR24(\stack)
++ sdc1 $f26, GDB_FR_FPR26(\stack)
++ sdc1 $f28, GDB_FR_FPR28(\stack)
++ sdc1 $f30, GDB_FR_FPR30(\stack)
++ sw \tmp1, GDB_FR_FSR(\stack)
++ .endm
+
+ .macro fpu_save_double thread status tmp1=t0
+ cfc1 \tmp1, fcr31
+@@ -91,6 +113,27 @@
+ ctc1 \tmp, fcr31
+ .endm
+
++ .macro fpu_restore_double_kgdb stack status tmp=t0
++ lw \tmp, GDB_FR_FSR(\stack)
++ ldc1 $f0, GDB_FR_FPR0(\stack)
++ ldc1 $f2, GDB_FR_FPR2(\stack)
++ ldc1 $f4, GDB_FR_FPR4(\stack)
++ ldc1 $f6, GDB_FR_FPR6(\stack)
++ ldc1 $f8, GDB_FR_FPR8(\stack)
++ ldc1 $f10, GDB_FR_FPR10(\stack)
++ ldc1 $f12, GDB_FR_FPR12(\stack)
++ ldc1 $f14, GDB_FR_FPR14(\stack)
++ ldc1 $f16, GDB_FR_FPR16(\stack)
++ ldc1 $f18, GDB_FR_FPR18(\stack)
++ ldc1 $f20, GDB_FR_FPR20(\stack)
++ ldc1 $f22, GDB_FR_FPR22(\stack)
++ ldc1 $f24, GDB_FR_FPR24(\stack)
++ ldc1 $f26, GDB_FR_FPR26(\stack)
++ ldc1 $f28, GDB_FR_FPR28(\stack)
++ ldc1 $f30, GDB_FR_FPR30(\stack)
++ ctc1 \tmp, fcr31
++ .endm
++
+ .macro fpu_restore_single thread tmp=t0
+ lw \tmp, THREAD_FCR31(\thread)
+ lwc1 $f0, THREAD_FPR0(\thread)
+diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-64.h linux-2.6.22-591/include/asm-mips/asmmacro-64.h
+--- linux-2.6.22-570/include/asm-mips/asmmacro-64.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-mips/asmmacro-64.h 2007-12-21 15:36:12.000000000 -0500
+@@ -12,6 +12,7 @@
+ #include <asm/regdef.h>
+ #include <asm/fpregdef.h>
+ #include <asm/mipsregs.h>
++#include <asm/gdb-stub.h>
+
+ .macro fpu_save_16even thread tmp=t0
+ cfc1 \tmp, fcr31
+@@ -53,6 +54,46 @@
+ sdc1 $f31, THREAD_FPR31(\thread)
+ .endm
+
++ .macro fpu_save_16odd_kgdb stack
++ sdc1 $f1, GDB_FR_FPR1(\stack)
++ sdc1 $f3, GDB_FR_FPR3(\stack)
++ sdc1 $f5, GDB_FR_FPR5(\stack)
++ sdc1 $f7, GDB_FR_FPR7(\stack)
++ sdc1 $f9, GDB_FR_FPR9(\stack)
++ sdc1 $f11, GDB_FR_FPR11(\stack)
++ sdc1 $f13, GDB_FR_FPR13(\stack)
++ sdc1 $f15, GDB_FR_FPR15(\stack)
++ sdc1 $f17, GDB_FR_FPR17(\stack)
++ sdc1 $f19, GDB_FR_FPR19(\stack)
++ sdc1 $f21, GDB_FR_FPR21(\stack)
++ sdc1 $f23, GDB_FR_FPR23(\stack)
++ sdc1 $f25, GDB_FR_FPR25(\stack)
++ sdc1 $f27, GDB_FR_FPR27(\stack)
++ sdc1 $f29, GDB_FR_FPR29(\stack)
++ sdc1 $f31, GDB_FR_FPR31(\stack)
++ .endm
++
++ .macro fpu_save_16even_kgdb stack tmp=t0
++ cfc1 \tmp, fcr31
++ sdc1 $f0, GDB_FR_FPR0(\stack)
++ sdc1 $f2, GDB_FR_FPR2(\stack)
++ sdc1 $f4, GDB_FR_FPR4(\stack)
++ sdc1 $f6, GDB_FR_FPR6(\stack)
++ sdc1 $f8, GDB_FR_FPR8(\stack)
++ sdc1 $f10, GDB_FR_FPR10(\stack)
++ sdc1 $f12, GDB_FR_FPR12(\stack)
++ sdc1 $f14, GDB_FR_FPR14(\stack)
++ sdc1 $f16, GDB_FR_FPR16(\stack)
++ sdc1 $f18, GDB_FR_FPR18(\stack)
++ sdc1 $f20, GDB_FR_FPR20(\stack)
++ sdc1 $f22, GDB_FR_FPR22(\stack)
++ sdc1 $f24, GDB_FR_FPR24(\stack)
++ sdc1 $f26, GDB_FR_FPR26(\stack)
++ sdc1 $f28, GDB_FR_FPR28(\stack)
++ sdc1 $f30, GDB_FR_FPR30(\stack)
++ sw \tmp, GDB_FR_FSR(\stack)
++ .endm
++
+ .macro fpu_save_double thread status tmp
+ sll \tmp, \status, 5
+ bgez \tmp, 2f
+@@ -61,6 +102,15 @@
+ fpu_save_16even \thread \tmp
+ .endm
+
++ .macro fpu_save_double_kgdb stack status tmp
++ sll \tmp, \status, 5
++ bgez \tmp, 2f
++ nop
++ fpu_save_16odd_kgdb \stack
++2:
++ fpu_save_16even_kgdb \stack \tmp
++ .endm
++
+ .macro fpu_restore_16even thread tmp=t0
+ lw \tmp, THREAD_FCR31(\thread)
+ ldc1 $f0, THREAD_FPR0(\thread)
+@@ -101,6 +151,46 @@
+ ldc1 $f31, THREAD_FPR31(\thread)
+ .endm
+
++ .macro fpu_restore_16even_kgdb stack tmp=t0
++ lw \tmp, GDB_FR_FSR(\stack)
++ ldc1 $f0, GDB_FR_FPR0(\stack)
++ ldc1 $f2, GDB_FR_FPR2(\stack)
++ ldc1 $f4, GDB_FR_FPR4(\stack)
++ ldc1 $f6, GDB_FR_FPR6(\stack)
++ ldc1 $f8, GDB_FR_FPR8(\stack)
++ ldc1 $f10, GDB_FR_FPR10(\stack)
++ ldc1 $f12, GDB_FR_FPR12(\stack)
++ ldc1 $f14, GDB_FR_FPR14(\stack)
++ ldc1 $f16, GDB_FR_FPR16(\stack)
++ ldc1 $f18, GDB_FR_FPR18(\stack)
++ ldc1 $f20, GDB_FR_FPR20(\stack)
++ ldc1 $f22, GDB_FR_FPR22(\stack)
++ ldc1 $f24, GDB_FR_FPR24(\stack)
++ ldc1 $f26, GDB_FR_FPR26(\stack)
++ ldc1 $f28, GDB_FR_FPR28(\stack)
++ ldc1 $f30, GDB_FR_FPR30(\stack)
++ ctc1 \tmp, fcr31
++ .endm
++
++ .macro fpu_restore_16odd_kgdb stack
++ ldc1 $f1, GDB_FR_FPR1(\stack)
++ ldc1 $f3, GDB_FR_FPR3(\stack)
++ ldc1 $f5, GDB_FR_FPR5(\stack)
++ ldc1 $f7, GDB_FR_FPR7(\stack)
++ ldc1 $f9, GDB_FR_FPR9(\stack)
++ ldc1 $f11, GDB_FR_FPR11(\stack)
++ ldc1 $f13, GDB_FR_FPR13(\stack)
++ ldc1 $f15, GDB_FR_FPR15(\stack)
++ ldc1 $f17, GDB_FR_FPR17(\stack)
++ ldc1 $f19, GDB_FR_FPR19(\stack)
++ ldc1 $f21, GDB_FR_FPR21(\stack)
++ ldc1 $f23, GDB_FR_FPR23(\stack)
++ ldc1 $f25, GDB_FR_FPR25(\stack)
++ ldc1 $f27, GDB_FR_FPR27(\stack)
++ ldc1 $f29, GDB_FR_FPR29(\stack)
++ ldc1 $f31, GDB_FR_FPR31(\stack)
++ .endm
++
+ .macro fpu_restore_double thread status tmp
+ sll \tmp, \status, 5
+ bgez \tmp, 1f # 16 register mode?
+@@ -109,6 +199,15 @@
+ 1: fpu_restore_16even \thread \tmp
+ .endm
+
++ .macro fpu_restore_double_kgdb stack status tmp
++ sll \tmp, \status, 5
++ bgez \tmp, 1f # 16 register mode?
++ nop
++
++ fpu_restore_16odd_kgdb \stack
++1: fpu_restore_16even_kgdb \stack \tmp
++ .endm
++
+ .macro cpu_save_nonscratch thread
+ LONG_S s0, THREAD_REG16(\thread)
+ LONG_S s1, THREAD_REG17(\thread)
+diff -Nurb linux-2.6.22-570/include/asm-mips/kdebug.h linux-2.6.22-591/include/asm-mips/kdebug.h
+--- linux-2.6.22-570/include/asm-mips/kdebug.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-mips/kdebug.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1 +1,30 @@
+-#include <asm-generic/kdebug.h>
++/*
++ *
++ * Copyright (C) 2004 MontaVista Software Inc.
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2 of the License, or (at your
++ * option) any later version.
++ *
++ */
++#ifndef _MIPS_KDEBUG_H
++#define _MIPS_KDEBUG_H
++
++#include <linux/notifier.h>
++
++struct pt_regs;
++
++extern struct atomic_notifier_head mips_die_head;
++
++enum die_val {
++ DIE_OOPS = 1,
++ DIE_PANIC,
++ DIE_DIE,
++ DIE_KERNELDEBUG,
++ DIE_TRAP,
++ DIE_PAGE_FAULT,
++};
++
++#endif /* _MIPS_KDEBUG_H */
+diff -Nurb linux-2.6.22-570/include/asm-mips/kgdb.h linux-2.6.22-591/include/asm-mips/kgdb.h
+--- linux-2.6.22-570/include/asm-mips/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-mips/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,41 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++#include <asm/sgidefs.h>
++#include <asm-generic/kgdb.h>
++
++#ifndef __ASSEMBLY__
++#if (_MIPS_ISA == _MIPS_ISA_MIPS1) || (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS32)
++
++typedef u32 gdb_reg_t;
++
++#elif (_MIPS_ISA == _MIPS_ISA_MIPS3) || (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
++
++#ifdef CONFIG_32BIT
++typedef u32 gdb_reg_t;
++#else /* CONFIG_CPU_32BIT */
++typedef u64 gdb_reg_t;
++#endif
++#else
++#error "Need to set typedef for gdb_reg_t"
++#endif /* _MIPS_ISA */
++
++#define BUFMAX 2048
++#define NUMREGBYTES (90*sizeof(gdb_reg_t))
++#define NUMCRITREGBYTES (12*sizeof(gdb_reg_t))
++#define BREAK_INSTR_SIZE 4
++#define BREAKPOINT() __asm__ __volatile__( \
++ ".globl breakinst\n\t" \
++ ".set\tnoreorder\n\t" \
++ "nop\n" \
++ "breakinst:\tbreak\n\t" \
++ "nop\n\t" \
++ ".set\treorder")
++#define CACHE_FLUSH_IS_SAFE 0
++
++extern int kgdb_early_setup;
++
++#endif /* !__ASSEMBLY__ */
++#endif /* _ASM_KGDB_H_ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-mips/ptrace.h linux-2.6.22-591/include/asm-mips/ptrace.h
+--- linux-2.6.22-570/include/asm-mips/ptrace.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-mips/ptrace.h 2007-12-21 15:36:12.000000000 -0500
+@@ -28,7 +28,7 @@
+ * system call/exception. As usual the registers k0/k1 aren't being saved.
+ */
+ struct pt_regs {
+-#ifdef CONFIG_32BIT
++#if defined(CONFIG_32BIT) || defined(CONFIG_KGDB)
+ /* Pad bytes for argument save space on the stack. */
+ unsigned long pad0[6];
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/cputable.h linux-2.6.22-591/include/asm-powerpc/cputable.h
+--- linux-2.6.22-570/include/asm-powerpc/cputable.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/cputable.h 2007-12-21 15:36:12.000000000 -0500
+@@ -111,7 +111,7 @@
+ /* CPU kernel features */
+
+ /* Retain the 32b definitions all use bottom half of word */
+-#define CPU_FTR_SPLIT_ID_CACHE ASM_CONST(0x0000000000000001)
++#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000000000000001)
+ #define CPU_FTR_L2CR ASM_CONST(0x0000000000000002)
+ #define CPU_FTR_SPEC7450 ASM_CONST(0x0000000000000004)
+ #define CPU_FTR_ALTIVEC ASM_CONST(0x0000000000000008)
+@@ -135,6 +135,7 @@
+ #define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000)
+ #define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000)
+ #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x0000000000800000)
++#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x0000000001000000)
+
+ /*
+ * Add the 64-bit processor unique features in the top half of the word;
+@@ -154,7 +155,6 @@
+ #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000)
+ #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000)
+ #define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000)
+-#define CPU_FTR_COHERENT_ICACHE LONG_ASM_CONST(0x0000020000000000)
+ #define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000)
+ #define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000)
+ #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000)
+@@ -206,164 +206,163 @@
+ !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \
+ !defined(CONFIG_BOOKE))
+
+-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE)
+-#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE | \
++ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
++#define CPU_FTRS_603 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_604 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE | \
+ CPU_FTR_PPC_LE)
+-#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_740 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_740 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750CL (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750CL (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_NO_DPM | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750GX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750GX (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7400 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447A (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7448 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_PPC_LE)
+-#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+-#define CPU_FTRS_G2_LE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_G2_LE (CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS)
+-#define CPU_FTRS_E300 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_COMMON)
+-#define CPU_FTRS_E300C2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \
+ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
+-#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | \
+ CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE)
+-#define CPU_FTRS_8XX (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB)
+-#define CPU_FTRS_40X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+- CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_44X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+- CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E500 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+- CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E500_2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_8XX (CPU_FTR_USE_TB)
++#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
++ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
++#define CPU_FTRS_E500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_E500_2 (CPU_FTR_USE_TB | \
+ CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN)
+ #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
+
+ /* 64-bit CPUs */
+-#define CPU_FTRS_POWER3 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_RS64 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \
+ CPU_FTR_MMCRA | CPU_FTR_CTRL)
+-#define CPU_FTRS_POWER4 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_MMCRA)
+-#define CPU_FTRS_PPC970 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA)
+-#define CPU_FTRS_POWER5 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ CPU_FTR_PURR)
+-#define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR)
+-#define CPU_FTRS_CELL (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_CELL (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG)
+-#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
+ CPU_FTR_PURR | CPU_FTR_REAL_LE)
+-#define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | \
+ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
+
+ #ifdef __powerpc64__
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/floppy.h linux-2.6.22-591/include/asm-powerpc/floppy.h
+--- linux-2.6.22-570/include/asm-powerpc/floppy.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/floppy.h 2007-12-21 15:36:12.000000000 -0500
+@@ -29,7 +29,7 @@
+ #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL);
+
+ #include <linux/pci.h>
+-#include <asm/ppc-pci.h> /* for ppc64_isabridge_dev */
++#include <asm/ppc-pci.h> /* for isa_bridge_pcidev */
+
+ #define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io)
+
+@@ -139,12 +139,12 @@
+ if (bus_addr
+ && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+ /* different from last time -- unmap prev */
+- pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
++ pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir);
+ bus_addr = 0;
+ }
+
+ if (!bus_addr) /* need to map it */
+- bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
++ bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir);
+
+ /* remember this one as prev */
+ prev_addr = addr;
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/io.h linux-2.6.22-591/include/asm-powerpc/io.h
+--- linux-2.6.22-570/include/asm-powerpc/io.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/io.h 2007-12-21 15:36:12.000000000 -0500
+@@ -607,9 +607,9 @@
+ *
+ * * iounmap undoes such a mapping and can be hooked
+ *
+- * * __ioremap_explicit (and the pending __iounmap_explicit) are low level
+- * functions to create hand-made mappings for use only by the PCI code
+- * and cannot currently be hooked.
++ * * __ioremap_at (and the pending __iounmap_at) are low level functions to
++ * create hand-made mappings for use only by the PCI code and cannot
++ * currently be hooked. Must be page aligned.
+ *
+ * * __ioremap is the low level implementation used by ioremap and
+ * ioremap_flags and cannot be hooked (but can be used by a hook on one
+@@ -629,19 +629,9 @@
+ unsigned long flags);
+ extern void __iounmap(volatile void __iomem *addr);
+
+-extern int __ioremap_explicit(phys_addr_t p_addr, unsigned long v_addr,
++extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
+ unsigned long size, unsigned long flags);
+-extern int __iounmap_explicit(volatile void __iomem *start,
+- unsigned long size);
+-
+-extern void __iomem * reserve_phb_iospace(unsigned long size);
+-
+-/* Those are more 32 bits only functions */
+-extern unsigned long iopa(unsigned long addr);
+-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__;
+-extern void io_block_mapping(unsigned long virt, phys_addr_t phys,
+- unsigned int size, int flags);
+-
++extern void __iounmap_at(void *ea, unsigned long size);
+
+ /*
+ * When CONFIG_PPC_INDIRECT_IO is set, we use the generic iomap implementation
+@@ -651,8 +641,8 @@
+ */
+ #define HAVE_ARCH_PIO_SIZE 1
+ #define PIO_OFFSET 0x00000000UL
+-#define PIO_MASK 0x3fffffffUL
+-#define PIO_RESERVED 0x40000000UL
++#define PIO_MASK (FULL_IO_SIZE - 1)
++#define PIO_RESERVED (FULL_IO_SIZE)
+
+ #define mmio_read16be(addr) readw_be(addr)
+ #define mmio_read32be(addr) readl_be(addr)
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/kgdb.h linux-2.6.22-591/include/asm-powerpc/kgdb.h
+--- linux-2.6.22-570/include/asm-powerpc/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-powerpc/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,75 @@
++/*
++ * include/asm-powerpc/kgdb.h
++ *
++ * The PowerPC (32/64) specific defines / externs for KGDB. Based on
++ * the previous 32bit and 64bit specific files, which had the following
++ * copyrights:
++ *
++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
++ * PPC Mods (C) 2004 Tom Rini (trini@mvista.com)
++ * PPC Mods (C) 2003 John Whitney (john.whitney@timesys.com)
++ * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu)
++ *
++ *
++ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2006 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++#ifdef __KERNEL__
++#ifndef __POWERPC_KGDB_H__
++#define __POWERPC_KGDB_H__
++
++#include <asm-generic/kgdb.h>
++
++#ifndef __ASSEMBLY__
++
++#define BREAK_INSTR_SIZE 4
++#define BUFMAX ((NUMREGBYTES * 2) + 512)
++#define OUTBUFMAX ((NUMREGBYTES * 2) + 512)
++#define BREAKPOINT() asm(".long 0x7d821008"); /* twge r2, r2 */
++#define CACHE_FLUSH_IS_SAFE 1
++
++/* The number bytes of registers we have to save depends on a few
++ * things. For 64bit we default to not including vector registers and
++ * vector state registers. */
++#ifdef CONFIG_PPC64
++/*
++ * 64 bit (8 byte) registers:
++ * 32 gpr, 32 fpr, nip, msr, link, ctr
++ * 32 bit (4 byte) registers:
++ * ccr, xer, fpscr
++ */
++#define NUMREGBYTES ((68 * 8) + (3 * 4))
++#if 0
++/* The following adds in vector registers and vector state registers. */
++/* 128 bit (16 byte) registers:
++ * 32 vr
++ * 64 bit (8 byte) registers:
++ * 32 gpr, 32 fpr, nip, msr, link, ctr
++ * 32 bit (4 byte) registers:
++ * ccr, xer, fpscr, vscr, vrsave
++ */
++#define NUMREGBYTES ((128 * 16) + (68 * 8) + (5 * 4))
++#endif
++#define NUMCRITREGBYTES 184
++#else /* CONFIG_PPC32 */
++/* On non-E500 family PPC32 we determine the size by picking the last
++ * register we need, but on E500 we skip sections so we list what we
++ * need to store, and add it up. */
++#ifndef CONFIG_E500
++#define MAXREG (PT_FPSCR+1)
++#else
++/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/
++#define MAXREG ((32*2)+6+2+1)
++#endif
++#define NUMREGBYTES (MAXREG * sizeof(int))
++/* CR/LR, R1, R2, R13-R31 inclusive. */
++#define NUMCRITREGBYTES (23 * sizeof(int))
++#endif /* 32/64 */
++#endif /* !(__ASSEMBLY__) */
++#endif /* !__POWERPC_KGDB_H__ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/lppaca.h linux-2.6.22-591/include/asm-powerpc/lppaca.h
+--- linux-2.6.22-570/include/asm-powerpc/lppaca.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/lppaca.h 2007-12-21 15:36:12.000000000 -0500
+@@ -98,7 +98,7 @@
+ u64 saved_gpr5; // Saved GPR5 x30-x37
+
+ u8 reserved4; // Reserved x38-x38
+- u8 cpuctls_task_attrs; // Task attributes for cpuctls x39-x39
++ u8 donate_dedicated_cpu; // Donate dedicated CPU cycles x39-x39
+ u8 fpregs_in_use; // FP regs in use x3A-x3A
+ u8 pmcregs_in_use; // PMC regs in use x3B-x3B
+ volatile u32 saved_decr; // Saved Decr Value x3C-x3F
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,91 @@
++#ifndef _ASM_POWERPC_MMU_HASH32_H_
++#define _ASM_POWERPC_MMU_HASH32_H_
++/*
++ * 32-bit hash table MMU support
++ */
++
++/*
++ * BATs
++ */
++
++/* Block size masks */
++#define BL_128K 0x000
++#define BL_256K 0x001
++#define BL_512K 0x003
++#define BL_1M 0x007
++#define BL_2M 0x00F
++#define BL_4M 0x01F
++#define BL_8M 0x03F
++#define BL_16M 0x07F
++#define BL_32M 0x0FF
++#define BL_64M 0x1FF
++#define BL_128M 0x3FF
++#define BL_256M 0x7FF
++
++/* BAT Access Protection */
++#define BPP_XX 0x00 /* No access */
++#define BPP_RX 0x01 /* Read only */
++#define BPP_RW 0x02 /* Read/write */
++
++#ifndef __ASSEMBLY__
++struct ppc_bat {
++ struct {
++ unsigned long bepi:15; /* Effective page index (virtual address) */
++ unsigned long :4; /* Unused */
++ unsigned long bl:11; /* Block size mask */
++ unsigned long vs:1; /* Supervisor valid */
++ unsigned long vp:1; /* User valid */
++ } batu; /* Upper register */
++ struct {
++ unsigned long brpn:15; /* Real page index (physical address) */
++ unsigned long :10; /* Unused */
++ unsigned long w:1; /* Write-thru cache */
++ unsigned long i:1; /* Cache inhibit */
++ unsigned long m:1; /* Memory coherence */
++ unsigned long g:1; /* Guarded (MBZ in IBAT) */
++ unsigned long :1; /* Unused */
++ unsigned long pp:2; /* Page access protections */
++ } batl; /* Lower register */
++};
++#endif /* !__ASSEMBLY__ */
++
++/*
++ * Hash table
++ */
++
++/* Values for PP (assumes Ks=0, Kp=1) */
++#define PP_RWXX 0 /* Supervisor read/write, User none */
++#define PP_RWRX 1 /* Supervisor read/write, User read */
++#define PP_RWRW 2 /* Supervisor read/write, User read/write */
++#define PP_RXRX 3 /* Supervisor read, User read */
++
++#ifndef __ASSEMBLY__
++
++/* Hardware Page Table Entry */
++struct hash_pte {
++ unsigned long v:1; /* Entry is valid */
++ unsigned long vsid:24; /* Virtual segment identifier */
++ unsigned long h:1; /* Hash algorithm indicator */
++ unsigned long api:6; /* Abbreviated page index */
++ unsigned long rpn:20; /* Real (physical) page number */
++ unsigned long :3; /* Unused */
++ unsigned long r:1; /* Referenced */
++ unsigned long c:1; /* Changed */
++ unsigned long w:1; /* Write-thru cache mode */
++ unsigned long i:1; /* Cache inhibited */
++ unsigned long m:1; /* Memory coherence */
++ unsigned long g:1; /* Guarded */
++ unsigned long :1; /* Unused */
++ unsigned long pp:2; /* Page protection */
++};
++
++typedef struct {
++ unsigned long id;
++ unsigned long vdso_base;
++} mm_context_t;
++
++typedef unsigned long phys_addr_t;
++
++#endif /* !__ASSEMBLY__ */
++
++#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h 2007-12-21 15:36:12.000000000 -0500
+@@ -103,12 +103,12 @@
+
+ #ifndef __ASSEMBLY__
+
+-typedef struct {
++struct hash_pte {
+ unsigned long v;
+ unsigned long r;
+-} hpte_t;
++};
+
+-extern hpte_t *htab_address;
++extern struct hash_pte *htab_address;
+ extern unsigned long htab_size_bytes;
+ extern unsigned long htab_hash_mask;
+
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu.h linux-2.6.22-591/include/asm-powerpc/mmu.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/mmu.h 2007-12-21 15:36:12.000000000 -0500
+@@ -5,6 +5,9 @@
+ #ifdef CONFIG_PPC64
+ /* 64-bit classic hash table MMU */
+ # include <asm/mmu-hash64.h>
++#elif defined(CONFIG_PPC_STD_MMU)
++/* 32-bit classic hash table MMU */
++# include <asm/mmu-hash32.h>
+ #elif defined(CONFIG_44x)
+ /* 44x-style software loaded TLB */
+ # include <asm/mmu-44x.h>
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci-bridge.h linux-2.6.22-591/include/asm-powerpc/pci-bridge.h
+--- linux-2.6.22-570/include/asm-powerpc/pci-bridge.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/pci-bridge.h 2007-12-21 15:36:12.000000000 -0500
+@@ -31,6 +31,7 @@
+ int last_busno;
+
+ void __iomem *io_base_virt;
++ void *io_base_alloc;
+ resource_size_t io_base_phys;
+
+ /* Some machines have a non 1:1 mapping of
+@@ -70,19 +71,22 @@
+ int devfn; /* pci device and function number */
+ int class_code; /* pci device class */
+
+-#ifdef CONFIG_PPC_PSERIES
++ struct pci_controller *phb; /* for pci devices */
++ struct iommu_table *iommu_table; /* for phb's or bridges */
++ struct pci_dev *pcidev; /* back-pointer to the pci device */
++ struct device_node *node; /* back-pointer to the device_node */
++
++ int pci_ext_config_space; /* for pci devices */
++
++#ifdef CONFIG_EEH
+ int eeh_mode; /* See eeh.h for possible EEH_MODEs */
+ int eeh_config_addr;
+ int eeh_pe_config_addr; /* new-style partition endpoint address */
+ int eeh_check_count; /* # times driver ignored error */
+ int eeh_freeze_count; /* # times this device froze up. */
+-#endif
+- int pci_ext_config_space; /* for pci devices */
+- struct pci_controller *phb; /* for pci devices */
+- struct iommu_table *iommu_table; /* for phb's or bridges */
+- struct pci_dev *pcidev; /* back-pointer to the pci device */
+- struct device_node *node; /* back-pointer to the device_node */
++ int eeh_false_positives; /* # times this device reported #ff's */
+ u32 config_space[16]; /* saved PCI config space */
++#endif
+ };
+
+ /* Get the pointer to a device_node's pci_dn */
+@@ -164,6 +168,11 @@
+ }
+ #endif
+
++extern void isa_bridge_find_early(struct pci_controller *hose);
++
++extern int pcibios_unmap_io_space(struct pci_bus *bus);
++extern int pcibios_map_io_space(struct pci_bus *bus);
++
+ /* Return values for ppc_md.pci_probe_mode function */
+ #define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
+ #define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci.h linux-2.6.22-591/include/asm-powerpc/pci.h
+--- linux-2.6.22-570/include/asm-powerpc/pci.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/pci.h 2007-12-21 15:36:12.000000000 -0500
+@@ -220,10 +220,6 @@
+ return root;
+ }
+
+-extern int unmap_bus_range(struct pci_bus *bus);
+-
+-extern int remap_bus_range(struct pci_bus *bus);
+-
+ extern void pcibios_fixup_device_resources(struct pci_dev *dev,
+ struct pci_bus *bus);
+
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h 2007-12-21 15:36:12.000000000 -0500
+@@ -6,11 +6,7 @@
+ #ifndef __ASSEMBLY__
+ #include <linux/sched.h>
+ #include <linux/threads.h>
+-#include <asm/processor.h> /* For TASK_SIZE */
+-#include <asm/mmu.h>
+-#include <asm/page.h>
+ #include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */
+-struct mm_struct;
+
+ extern unsigned long va_to_phys(unsigned long address);
+ extern pte_t *va_to_pte(unsigned long address);
+@@ -488,14 +484,6 @@
+ #define pfn_pte(pfn, prot) __pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) |\
+ pgprot_val(prot))
+ #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern unsigned long empty_zero_page[1024];
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-
+ #endif /* __ASSEMBLY__ */
+
+ #define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
+@@ -734,10 +722,6 @@
+ #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+ #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+
+-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+-
+-extern void paging_init(void);
+-
+ /*
+ * Encode and decode a swap entry.
+ * Note that the bits we use in a PTE for representing a swap entry
+@@ -755,40 +739,6 @@
+ #define pte_to_pgoff(pte) (pte_val(pte) >> 3)
+ #define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
+
+-/* CONFIG_APUS */
+-/* For virtual address to physical address conversion */
+-extern void cache_clear(__u32 addr, int length);
+-extern void cache_push(__u32 addr, int length);
+-extern int mm_end_of_chunk (unsigned long addr, int len);
+-extern unsigned long iopa(unsigned long addr);
+-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__;
+-
+-/* Values for nocacheflag and cmode */
+-/* These are not used by the APUS kernel_map, but prevents
+- compilation errors. */
+-#define KERNELMAP_FULL_CACHING 0
+-#define KERNELMAP_NOCACHE_SER 1
+-#define KERNELMAP_NOCACHE_NONSER 2
+-#define KERNELMAP_NO_COPYBACK 3
+-
+-/*
+- * Map some physical address range into the kernel address space.
+- */
+-extern unsigned long kernel_map(unsigned long paddr, unsigned long size,
+- int nocacheflag, unsigned long *memavailp );
+-
+-/*
+- * Set cache mode of (kernel space) address range.
+- */
+-extern void kernel_set_cachemode (unsigned long address, unsigned long size,
+- unsigned int cmode);
+-
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-#define kern_addr_valid(addr) (1)
+-
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+- remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ /*
+ * No page table caches to initialise
+ */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h 2007-12-21 15:36:12.000000000 -0500
+@@ -7,11 +7,7 @@
+
+ #ifndef __ASSEMBLY__
+ #include <linux/stddef.h>
+-#include <asm/processor.h> /* For TASK_SIZE */
+-#include <asm/mmu.h>
+-#include <asm/page.h>
+ #include <asm/tlbflush.h>
+-struct mm_struct;
+ #endif /* __ASSEMBLY__ */
+
+ #ifdef CONFIG_PPC_64K_PAGES
+@@ -27,7 +23,7 @@
+ */
+ #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+-#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
++#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+ #if TASK_SIZE_USER64 > PGTABLE_RANGE
+ #error TASK_SIZE_USER64 exceeds pagetable range
+@@ -37,19 +33,28 @@
+ #error TASK_SIZE_USER64 exceeds user VSID range
+ #endif
+
++
+ /*
+ * Define the address range of the vmalloc VM area.
+ */
+ #define VMALLOC_START ASM_CONST(0xD000000000000000)
+-#define VMALLOC_SIZE ASM_CONST(0x80000000000)
++#define VMALLOC_SIZE (PGTABLE_RANGE >> 1)
+ #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
+
+ /*
+- * Define the address range of the imalloc VM area.
+- */
+-#define PHBS_IO_BASE VMALLOC_END
+-#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
+-#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
++ * Define the address ranges for MMIO and IO space :
++ *
++ * ISA_IO_BASE = VMALLOC_END, 64K reserved area
++ * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
++ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
++ */
++#define FULL_IO_SIZE 0x80000000ul
++#define ISA_IO_BASE (VMALLOC_END)
++#define ISA_IO_END (VMALLOC_END + 0x10000ul)
++#define PHB_IO_BASE (ISA_IO_END)
++#define PHB_IO_END (VMALLOC_END + FULL_IO_SIZE)
++#define IOREMAP_BASE (PHB_IO_END)
++#define IOREMAP_END (VMALLOC_START + PGTABLE_RANGE)
+
+ /*
+ * Region IDs
+@@ -134,16 +139,6 @@
+ #define __S110 PAGE_SHARED_X
+ #define __S111 PAGE_SHARED_X
+
+-#ifndef __ASSEMBLY__
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-#endif /* __ASSEMBLY__ */
+-
+ #ifdef CONFIG_HUGETLB_PAGE
+
+ #define HAVE_ARCH_UNMAPPED_AREA
+@@ -442,10 +437,6 @@
+ #define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+-extern pgd_t swapper_pg_dir[];
+-
+-extern void paging_init(void);
+-
+ /* Encode and de-code a swap entry */
+ #define __swp_type(entry) (((entry).val >> 1) & 0x3f)
+ #define __swp_offset(entry) ((entry).val >> 8)
+@@ -456,17 +447,6 @@
+ #define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
+ #define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
+
+-/*
+- * kern_addr_valid is intended to indicate whether an address is a valid
+- * kernel address. Most 32-bit archs define it as always true (like this)
+- * but most 64-bit archs actually perform a test. What should we do here?
+- * The only use is in fs/ncpfs/dir.c
+- */
+-#define kern_addr_valid(addr) (1)
+-
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+- remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ void pgtable_cache_init(void);
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable.h linux-2.6.22-591/include/asm-powerpc/pgtable.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/pgtable.h 2007-12-21 15:36:12.000000000 -0500
+@@ -2,6 +2,13 @@
+ #define _ASM_POWERPC_PGTABLE_H
+ #ifdef __KERNEL__
+
++#ifndef __ASSEMBLY__
++#include <asm/processor.h> /* For TASK_SIZE */
++#include <asm/mmu.h>
++#include <asm/page.h>
++struct mm_struct;
++#endif /* !__ASSEMBLY__ */
++
+ #if defined(CONFIG_PPC64)
+ # include <asm/pgtable-ppc64.h>
+ #else
+@@ -9,6 +16,27 @@
+ #endif
+
+ #ifndef __ASSEMBLY__
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern unsigned long empty_zero_page[];
++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
++
++extern pgd_t swapper_pg_dir[];
++
++extern void paging_init(void);
++
++/*
++ * kern_addr_valid is intended to indicate whether an address is a valid
++ * kernel address. Most 32-bit archs define it as always true (like this)
++ * but most 64-bit archs actually perform a test. What should we do here?
++ */
++#define kern_addr_valid(addr) (1)
++
++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
++ remap_pfn_range(vma, vaddr, pfn, size, prot)
++
+ #include <asm-generic/pgtable.h>
+ #endif /* __ASSEMBLY__ */
+
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/ppc-pci.h linux-2.6.22-591/include/asm-powerpc/ppc-pci.h
+--- linux-2.6.22-570/include/asm-powerpc/ppc-pci.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/ppc-pci.h 2007-12-21 15:36:12.000000000 -0500
+@@ -26,7 +26,7 @@
+
+ extern void find_and_init_phbs(void);
+
+-extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */
++extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */
+
+ /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
+ #define BUID_HI(buid) ((buid) >> 32)
+@@ -47,8 +47,8 @@
+ extern unsigned long get_phb_buid (struct device_node *);
+ extern int rtas_setup_phb(struct pci_controller *phb);
+
+-/* From pSeries_pci.h */
+-extern void pSeries_final_fixup(void);
++/* From iSeries PCI */
++extern void iSeries_pcibios_init(void);
+
+ extern unsigned long pci_probe_only;
+
+@@ -139,6 +139,9 @@
+ */
+ struct device_node * find_device_pe(struct device_node *dn);
+
++void eeh_sysfs_add_device(struct pci_dev *pdev);
++void eeh_sysfs_remove_device(struct pci_dev *pdev);
++
+ #endif /* CONFIG_EEH */
+
+ #else /* CONFIG_PCI */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/ptrace.h linux-2.6.22-591/include/asm-powerpc/ptrace.h
+--- linux-2.6.22-570/include/asm-powerpc/ptrace.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/ptrace.h 2007-12-21 15:36:12.000000000 -0500
+@@ -92,6 +92,11 @@
+ set_thread_flag(TIF_NOERROR); \
+ } while(0)
+
++struct task_struct;
++extern unsigned long ptrace_get_reg(struct task_struct *task, int regno);
++extern int ptrace_put_reg(struct task_struct *task, int regno,
++ unsigned long data);
++
+ /*
+ * We use the least-significant bit of the trap field to indicate
+ * whether we have saved the full set of registers, or only a
+@@ -158,9 +163,7 @@
+
+ #define PT_NIP 32
+ #define PT_MSR 33
+-#ifdef __KERNEL__
+ #define PT_ORIG_R3 34
+-#endif
+ #define PT_CTR 35
+ #define PT_LNK 36
+ #define PT_XER 37
+@@ -169,11 +172,12 @@
+ #define PT_MQ 39
+ #else
+ #define PT_SOFTE 39
++#endif
+ #define PT_TRAP 40
+ #define PT_DAR 41
+ #define PT_DSISR 42
+ #define PT_RESULT 43
+-#endif
++#define PT_REGS_COUNT 44
+
+ #define PT_FPR0 48 /* each FP reg occupies 2 slots in this space */
+
+@@ -229,7 +233,17 @@
+ #define PTRACE_GET_DEBUGREG 25
+ #define PTRACE_SET_DEBUGREG 26
+
+-/* Additional PTRACE requests implemented on PowerPC. */
++/* (new) PTRACE requests using the same numbers as x86 and the same
++ * argument ordering. Additionally, they support more registers too
++ */
++#define PTRACE_GETREGS 12
++#define PTRACE_SETREGS 13
++#define PTRACE_GETFPREGS 14
++#define PTRACE_SETFPREGS 15
++#define PTRACE_GETREGS64 22
++#define PTRACE_SETREGS64 23
++
++/* (old) PTRACE requests with inverted arguments */
+ #define PPC_PTRACE_GETREGS 0x99 /* Get GPRs 0 - 31 */
+ #define PPC_PTRACE_SETREGS 0x98 /* Set GPRs 0 - 31 */
+ #define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/syscalls.h linux-2.6.22-591/include/asm-powerpc/syscalls.h
+--- linux-2.6.22-570/include/asm-powerpc/syscalls.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/syscalls.h 2007-12-21 15:36:12.000000000 -0500
+@@ -43,16 +43,9 @@
+
+ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
+ size_t sigsetsize);
+-
+-#ifndef __powerpc64__
+-asmlinkage long sys_sigaltstack(const stack_t __user *uss,
+- stack_t __user *uoss, int r5, int r6, int r7, int r8,
+- struct pt_regs *regs);
+-#else /* __powerpc64__ */
+ asmlinkage long sys_sigaltstack(const stack_t __user *uss,
+ stack_t __user *uoss, unsigned long r5, unsigned long r6,
+ unsigned long r7, unsigned long r8, struct pt_regs *regs);
+-#endif /* __powerpc64__ */
+
+ #endif /* __KERNEL__ */
+ #endif /* __ASM_POWERPC_SYSCALLS_H */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/systbl.h linux-2.6.22-591/include/asm-powerpc/systbl.h
+--- linux-2.6.22-570/include/asm-powerpc/systbl.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/asm-powerpc/systbl.h 2007-12-21 15:36:12.000000000 -0500
+@@ -312,3 +312,4 @@
+ COMPAT_SYS_SPU(timerfd)
+ SYSCALL_SPU(eventfd)
+ COMPAT_SYS_SPU(sync_file_range2)
++COMPAT_SYS(fallocate)
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/thread_info.h linux-2.6.22-591/include/asm-powerpc/thread_info.h
+--- linux-2.6.22-570/include/asm-powerpc/thread_info.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/thread_info.h 2007-12-21 15:36:12.000000000 -0500
+@@ -113,8 +113,8 @@
+ #define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling
+ TIF_NEED_RESCHED */
+ #define TIF_32BIT 5 /* 32 bit binary */
+-#define TIF_RUNLATCH 6 /* Is the runlatch enabled? */
+-#define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */
++#define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */
++#define TIF_PERFMON_CTXSW 7 /* perfmon needs ctxsw calls */
+ #define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */
+ #define TIF_SINGLESTEP 9 /* singlestepping active */
+ #define TIF_MEMDIE 10
+@@ -123,6 +123,8 @@
+ #define TIF_NOERROR 14 /* Force successful syscall return */
+ #define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */
+ #define TIF_FREEZE 16 /* Freezing for suspend */
++#define TIF_RUNLATCH 17 /* Is the runlatch enabled? */
++#define TIF_ABI_PENDING 18 /* 32/64 bit switch needed */
+
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+@@ -131,8 +133,8 @@
+ #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+ #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+ #define _TIF_32BIT (1<<TIF_32BIT)
+-#define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
+-#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
++#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
++#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
+ #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+ #define _TIF_SECCOMP (1<<TIF_SECCOMP)
+@@ -140,6 +142,8 @@
+ #define _TIF_NOERROR (1<<TIF_NOERROR)
+ #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+ #define _TIF_FREEZE (1<<TIF_FREEZE)
++#define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
++#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
+ #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
+
+ #define _TIF_USER_WORK_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/tlbflush.h linux-2.6.22-591/include/asm-powerpc/tlbflush.h
+--- linux-2.6.22-570/include/asm-powerpc/tlbflush.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-powerpc/tlbflush.h 2007-12-21 15:36:12.000000000 -0500
+@@ -155,6 +155,11 @@
+ {
+ }
+
++/* Private function for use by PCI IO mapping code */
++extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
++ unsigned long end);
++
++
+ #endif
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/unistd.h linux-2.6.22-591/include/asm-powerpc/unistd.h
+--- linux-2.6.22-570/include/asm-powerpc/unistd.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/asm-powerpc/unistd.h 2007-12-21 15:36:12.000000000 -0500
+@@ -331,10 +331,11 @@
+ #define __NR_timerfd 306
+ #define __NR_eventfd 307
+ #define __NR_sync_file_range2 308
++#define __NR_fallocate 309
+
+ #ifdef __KERNEL__
+
+-#define __NR_syscalls 309
++#define __NR_syscalls 310
+
+ #define __NR__exit __NR_exit
+ #define NR_syscalls __NR_syscalls
+diff -Nurb linux-2.6.22-570/include/asm-ppc/kgdb.h linux-2.6.22-591/include/asm-ppc/kgdb.h
+--- linux-2.6.22-570/include/asm-ppc/kgdb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ppc/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,57 +1,18 @@
+-/*
+- * kgdb.h: Defines and declarations for serial line source level
+- * remote debugging of the Linux kernel using gdb.
+- *
+- * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu)
+- *
+- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+- */
+ #ifdef __KERNEL__
+-#ifndef _PPC_KGDB_H
+-#define _PPC_KGDB_H
+-
++#ifndef __PPC_KGDB_H__
++#define __PPC_KGDB_H__
++#include <asm-powerpc/kgdb.h>
+ #ifndef __ASSEMBLY__
+-
+-/* Things specific to the gen550 backend. */
+-struct uart_port;
+-
+-extern void gen550_progress(char *, unsigned short);
+-extern void gen550_kgdb_map_scc(void);
+-extern void gen550_init(int, struct uart_port *);
+-
+-/* Things specific to the pmac backend. */
+-extern void zs_kgdb_hook(int tty_num);
+-
+-/* To init the kgdb engine. (called by serial hook)*/
+-extern void set_debug_traps(void);
+-
+-/* To enter the debugger explicitly. */
+-extern void breakpoint(void);
+-
+-/* For taking exceptions
++ /* For taking exceptions
+ * these are defined in traps.c
+ */
+-extern int (*debugger)(struct pt_regs *regs);
++struct pt_regs;
++extern void (*debugger)(struct pt_regs *regs);
+ extern int (*debugger_bpt)(struct pt_regs *regs);
+ extern int (*debugger_sstep)(struct pt_regs *regs);
+ extern int (*debugger_iabr_match)(struct pt_regs *regs);
+ extern int (*debugger_dabr_match)(struct pt_regs *regs);
+ extern void (*debugger_fault_handler)(struct pt_regs *regs);
+-
+-/* What we bring to the party */
+-int kgdb_bpt(struct pt_regs *regs);
+-int kgdb_sstep(struct pt_regs *regs);
+-void kgdb(struct pt_regs *regs);
+-int kgdb_iabr_match(struct pt_regs *regs);
+-int kgdb_dabr_match(struct pt_regs *regs);
+-
+-/*
+- * external low-level support routines (ie macserial.c)
+- */
+-extern void kgdb_interruptible(int); /* control interrupts from serial */
+-extern void putDebugChar(char); /* write a single character */
+-extern char getDebugChar(void); /* read and return a single char */
+-
+-#endif /* !(__ASSEMBLY__) */
+-#endif /* !(_PPC_KGDB_H) */
++#endif /* !__ASSEMBLY__ */
++#endif /* __PPC_KGDB_H__ */
+ #endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-ppc/machdep.h linux-2.6.22-591/include/asm-ppc/machdep.h
+--- linux-2.6.22-570/include/asm-ppc/machdep.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ppc/machdep.h 2007-12-21 15:36:12.000000000 -0500
+@@ -72,9 +72,7 @@
+ unsigned long (*find_end_of_memory)(void);
+ void (*setup_io_mappings)(void);
+
+- void (*early_serial_map)(void);
+ void (*progress)(char *, unsigned short);
+- void (*kgdb_map_scc)(void);
+
+ unsigned char (*nvram_read_val)(int addr);
+ void (*nvram_write_val)(int addr, unsigned char val);
+diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60.h linux-2.6.22-591/include/asm-ppc/mv64x60.h
+--- linux-2.6.22-570/include/asm-ppc/mv64x60.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ppc/mv64x60.h 2007-12-21 15:36:12.000000000 -0500
+@@ -348,6 +348,8 @@
+
+ void mv64x60_progress_init(u32 base);
+ void mv64x60_mpsc_progress(char *s, unsigned short hex);
++struct platform_device * mv64x60_early_get_pdev_data(const char *name,
++ int id, int remove);
+
+ extern struct mv64x60_32bit_window
+ gt64260_32bit_windows[MV64x60_32BIT_WIN_COUNT];
+diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h
+--- linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -57,7 +57,8 @@
+ #define MV64x60_IRQ_I2C 37
+ #define MV64x60_IRQ_BRG 39
+ #define MV64x60_IRQ_MPSC_0 40
+-#define MV64x60_IRQ_MPSC_1 42
++#define MV64360_IRQ_MPSC_1 41
++#define GT64260_IRQ_MPSC_1 42
+ #define MV64x60_IRQ_COMM 43
+ #define MV64x60_IRQ_P0_GPP_0_7 56
+ #define MV64x60_IRQ_P0_GPP_8_15 57
+diff -Nurb linux-2.6.22-570/include/asm-s390/page.h linux-2.6.22-591/include/asm-s390/page.h
+--- linux-2.6.22-570/include/asm-s390/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-s390/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -64,7 +64,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-sh/kgdb.h linux-2.6.22-591/include/asm-sh/kgdb.h
+--- linux-2.6.22-570/include/asm-sh/kgdb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-sh/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -2,78 +2,41 @@
+ * May be copied or modified under the terms of the GNU General Public
+ * License. See linux/COPYING for more information.
+ *
+- * Based on original code by Glenn Engel, Jim Kingdon,
+- * David Grothe <dave@gcom.com>, Tigran Aivazian, <tigran@sco.com> and
+- * Amit S. Kale <akale@veritas.com>
++ * Based on a file that was modified or based on files by: Glenn Engel,
++ * Jim Kingdon, David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
++ * Amit S. Kale <akale@veritas.com>, sh-stub.c from Ben Lee and
++ * Steve Chamberlain, Henry Bell <henry.bell@st.com>
+ *
+- * Super-H port based on sh-stub.c (Ben Lee and Steve Chamberlain) by
+- * Henry Bell <henry.bell@st.com>
+- *
+- * Header file for low-level support for remote debug using GDB.
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
+ *
+ */
+
+ #ifndef __KGDB_H
+ #define __KGDB_H
+
+-#include <asm/ptrace.h>
+-#include <asm/cacheflush.h>
++#include <asm-generic/kgdb.h>
+
+-struct console;
++/* Based on sh-gdb.c from gdb-6.1, Glenn
++ Engel at HP Ben Lee and Steve Chamberlain */
++#define NUMREGBYTES 112 /* 92 */
++#define NUMCRITREGBYTES (9 << 2)
++#define BUFMAX 400
+
+-/* Same as pt_regs but has vbr in place of syscall_nr */
++#ifndef __ASSEMBLY__
+ struct kgdb_regs {
+ unsigned long regs[16];
+ unsigned long pc;
+ unsigned long pr;
+- unsigned long sr;
+ unsigned long gbr;
++ unsigned long vbr;
+ unsigned long mach;
+ unsigned long macl;
+- unsigned long vbr;
++ unsigned long sr;
+ };
+
+-/* State info */
+-extern char kgdb_in_gdb_mode;
+-extern int kgdb_done_init;
+-extern int kgdb_enabled;
+-extern int kgdb_nofault; /* Ignore bus errors (in gdb mem access) */
+-extern int kgdb_halt; /* Execute initial breakpoint at startup */
+-extern char in_nmi; /* Debounce flag to prevent NMI reentry*/
+-
+-/* SCI */
+-extern int kgdb_portnum;
+-extern int kgdb_baud;
+-extern char kgdb_parity;
+-extern char kgdb_bits;
+-
+-/* Init and interface stuff */
+-extern int kgdb_init(void);
+-extern int (*kgdb_getchar)(void);
+-extern void (*kgdb_putchar)(int);
+-
+-/* Trap functions */
+-typedef void (kgdb_debug_hook_t)(struct pt_regs *regs);
+-typedef void (kgdb_bus_error_hook_t)(void);
+-extern kgdb_debug_hook_t *kgdb_debug_hook;
+-extern kgdb_bus_error_hook_t *kgdb_bus_err_hook;
+-
+-/* Console */
+-void kgdb_console_write(struct console *co, const char *s, unsigned count);
+-extern int kgdb_console_setup(struct console *, char *);
+-
+-/* Prototypes for jmp fns */
+-#define _JBLEN 9
+-typedef int jmp_buf[_JBLEN];
+-extern void longjmp(jmp_buf __jmpb, int __retval);
+-extern int setjmp(jmp_buf __jmpb);
+-
+-/* Forced breakpoint */
+-#define breakpoint() \
+-do { \
+- if (kgdb_enabled) \
+- __asm__ __volatile__("trapa #0x3c"); \
+-} while (0)
++#define BREAKPOINT() asm("trapa #0xff");
++#define BREAK_INSTR_SIZE 2
++#define CACHE_FLUSH_IS_SAFE 1
+
+ /* KGDB should be able to flush all kernel text space */
+ #if defined(CONFIG_CPU_SH4)
+@@ -100,4 +63,5 @@
+ {
+ return hexchars[x & 0xf];
+ }
++#endif /* !__ASSEMBLY__ */
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-sh/system.h linux-2.6.22-591/include/asm-sh/system.h
+--- linux-2.6.22-570/include/asm-sh/system.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-sh/system.h 2007-12-21 15:36:12.000000000 -0500
+@@ -264,6 +264,45 @@
+ #define instruction_size(insn) (2)
+ #endif
+
++static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old,
++ unsigned long new)
++{
++ __u32 retval;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ retval = *m;
++ if (retval == old)
++ *m = new;
++ local_irq_restore(flags); /* implies memory barrier */
++ return retval;
++}
++
++/* This function doesn't exist, so you'll get a linker error
++ * if something tries to do an invalid cmpxchg(). */
++extern void __cmpxchg_called_with_bad_pointer(void);
++
++#define __HAVE_ARCH_CMPXCHG 1
++
++static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
++ unsigned long new, int size)
++{
++ switch (size) {
++ case 4:
++ return __cmpxchg_u32(ptr, old, new);
++ }
++ __cmpxchg_called_with_bad_pointer();
++ return old;
++}
++
++#define cmpxchg(ptr,o,n) \
++ ({ \
++ __typeof__(*(ptr)) _o_ = (o); \
++ __typeof__(*(ptr)) _n_ = (n); \
++ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
++ (unsigned long)_n_, sizeof(*(ptr))); \
++ })
++
+ /* XXX
+ * disable hlt during certain critical i/o operations
+ */
+diff -Nurb linux-2.6.22-570/include/asm-um/thread_info.h linux-2.6.22-591/include/asm-um/thread_info.h
+--- linux-2.6.22-570/include/asm-um/thread_info.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-um/thread_info.h 2007-12-21 15:36:12.000000000 -0500
+@@ -52,10 +52,21 @@
+ return ti;
+ }
+
++#ifdef CONFIG_DEBUG_STACK_USAGE
++
++#define alloc_thread_info(tsk) \
++ ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
++ CONFIG_KERNEL_STACK_ORDER))
++#else
++
+ /* thread information allocation */
+ #define alloc_thread_info(tsk) \
+- ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL))
+-#define free_thread_info(ti) kfree(ti)
++ ((struct thread_info *) __get_free_pages(GFP_KERNEL, \
++ CONFIG_KERNEL_STACK_ORDER))
++#endif
++
++#define free_thread_info(ti) \
++ free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+
+ #endif
+
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/kdebug.h linux-2.6.22-591/include/asm-x86_64/kdebug.h
+--- linux-2.6.22-570/include/asm-x86_64/kdebug.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-x86_64/kdebug.h 2007-12-21 15:36:12.000000000 -0500
+@@ -23,6 +23,7 @@
+ DIE_CALL,
+ DIE_NMI_IPI,
+ DIE_PAGE_FAULT,
++ DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+
+ extern void printk_address(unsigned long address);
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/kgdb.h linux-2.6.22-591/include/asm-x86_64/kgdb.h
+--- linux-2.6.22-570/include/asm-x86_64/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/asm-x86_64/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,52 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++#include <asm-generic/kgdb.h>
++
++/*
++ * Note that this register image is in a different order than
++ * the register image that Linux produces at interrupt time.
++ *
++ * Linux's register image is defined by struct pt_regs in ptrace.h.
++ * Just why GDB uses a different order is a historical mystery.
++ */
++#define _RAX 0
++#define _RDX 1
++#define _RCX 2
++#define _RBX 3
++#define _RSI 4
++#define _RDI 5
++#define _RBP 6
++#define _RSP 7
++#define _R8 8
++#define _R9 9
++#define _R10 10
++#define _R11 11
++#define _R12 12
++#define _R13 13
++#define _R14 14
++#define _R15 15
++#define _PC 16
++#define _PS 17
++
++/* Number of bytes of registers. */
++#define NUMREGBYTES ((_PS+1)*8)
++#define NUMCRITREGBYTES (8 * 8) /* 8 registers. */
++
++#ifndef __ASSEMBLY__
++/* BUFMAX defines the maximum number of characters in inbound/outbound
++ * buffers at least NUMREGBYTES*2 are needed for register packets, and
++ * a longer buffer is needed to list all threads. */
++#define BUFMAX 1024
++#define BREAKPOINT() asm(" int $3");
++#define CHECK_EXCEPTION_STACK() ((&__get_cpu_var(init_tss))[0].ist[0])
++#define BREAK_INSTR_SIZE 1
++#define CACHE_FLUSH_IS_SAFE 1
++#endif /* !__ASSEMBLY__ */
++#endif /* _ASM_KGDB_H_ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/page.h linux-2.6.22-591/include/asm-x86_64/page.h
+--- linux-2.6.22-570/include/asm-x86_64/page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-x86_64/page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -48,7 +48,8 @@
+ #define clear_user_page(page, vaddr, pg) clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ /*
+ * These are used to make use of C type-checking..
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/proto.h linux-2.6.22-591/include/asm-x86_64/proto.h
+--- linux-2.6.22-570/include/asm-x86_64/proto.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-x86_64/proto.h 2007-12-21 15:36:12.000000000 -0500
+@@ -75,8 +75,6 @@
+ extern void early_quirks(void);
+ extern void check_efer(void);
+
+-extern int unhandled_signal(struct task_struct *tsk, int sig);
+-
+ extern void select_idle_routine(const struct cpuinfo_x86 *c);
+
+ extern unsigned long table_start, table_end;
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/system.h linux-2.6.22-591/include/asm-x86_64/system.h
+--- linux-2.6.22-570/include/asm-x86_64/system.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-x86_64/system.h 2007-12-21 15:36:12.000000000 -0500
+@@ -22,7 +22,9 @@
+
+ /* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev,next,last) \
+- asm volatile(SAVE_CONTEXT \
++ asm volatile(".globl __switch_to_begin\n\t" \
++ "__switch_to_begin:\n\t" \
++ SAVE_CONTEXT \
+ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
+ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
+ "call __switch_to\n\t" \
+@@ -34,6 +36,8 @@
+ "movq %%rax,%%rdi\n\t" \
+ "jc ret_from_fork\n\t" \
+ RESTORE_CONTEXT \
++ "\n.globl __switch_to_end\n\t" \
++ "__switch_to_end:\n\t" \
+ : "=a" (last) \
+ : [next] "S" (next), [prev] "D" (prev), \
+ [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/unistd.h linux-2.6.22-591/include/asm-x86_64/unistd.h
+--- linux-2.6.22-570/include/asm-x86_64/unistd.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/asm-x86_64/unistd.h 2007-12-21 15:36:12.000000000 -0500
+@@ -630,6 +630,8 @@
+ __SYSCALL(__NR_timerfd, sys_timerfd)
+ #define __NR_eventfd 284
+ __SYSCALL(__NR_eventfd, sys_eventfd)
++#define __NR_fallocate 284
++__SYSCALL(__NR_fallocate, sys_fallocate)
+
+ #ifndef __NO_STUBS
+ #define __ARCH_WANT_OLD_READDIR
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/unwind.h linux-2.6.22-591/include/asm-x86_64/unwind.h
+--- linux-2.6.22-570/include/asm-x86_64/unwind.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/asm-x86_64/unwind.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,6 +1,100 @@
+ #ifndef _ASM_X86_64_UNWIND_H
+ #define _ASM_X86_64_UNWIND_H
+
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <asm/vsyscall.h>
++
++struct unwind_frame_info
++{
++ struct pt_regs regs;
++ struct task_struct *task;
++ unsigned call_frame:1;
++};
++
++#define UNW_PC(frame) (frame)->regs.rip
++#define UNW_SP(frame) (frame)->regs.rsp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame) (frame)->regs.rbp
++#define FRAME_RETADDR_OFFSET 8
++#define FRAME_LINK_OFFSET 0
++#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
++#define STACK_TOP(tsk) ((tsk)->thread.rsp0)
++#endif
++/* Might need to account for the special exception and interrupt handling
++ stacks here, since normally
++ EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
++ but the construct is needed only for getting across the stack switch to
++ the interrupt stack - thus considering the IRQ stack itself is unnecessary,
++ and the overhead of comparing against all exception handling stacks seems
++ not desirable. */
++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++ PTREGS_INFO(rax), \
++ PTREGS_INFO(rdx), \
++ PTREGS_INFO(rcx), \
++ PTREGS_INFO(rbx), \
++ PTREGS_INFO(rsi), \
++ PTREGS_INFO(rdi), \
++ PTREGS_INFO(rbp), \
++ PTREGS_INFO(rsp), \
++ PTREGS_INFO(r8), \
++ PTREGS_INFO(r9), \
++ PTREGS_INFO(r10), \
++ PTREGS_INFO(r11), \
++ PTREGS_INFO(r12), \
++ PTREGS_INFO(r13), \
++ PTREGS_INFO(r14), \
++ PTREGS_INFO(r15), \
++ PTREGS_INFO(rip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++ ((raItem).where == Memory && \
++ !((raItem).value * (dataAlign) + 8))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++ /*const*/ struct pt_regs *regs)
++{
++ info->regs = *regs;
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++ extern const char thread_return[];
++
++ memset(&info->regs, 0, sizeof(info->regs));
++ info->regs.rip = (unsigned long)thread_return;
++ info->regs.cs = __KERNEL_CS;
++ __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
++ info->regs.rsp = info->task->thread.rsp;
++ info->regs.ss = __KERNEL_DS;
++}
++
++extern int arch_unwind_init_running(struct unwind_frame_info *,
++ int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
++{
++ return user_mode(&info->regs)
++ || (long)info->regs.rip >= 0
++ || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
++ || (long)info->regs.rsp >= 0;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0UL)
+ #define UNW_SP(frame) ((void)(frame), 0UL)
+
+@@ -9,4 +103,6 @@
+ return 0;
+ }
+
++#endif
++
+ #endif /* _ASM_X86_64_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/linux/Kbuild linux-2.6.22-591/include/linux/Kbuild
+--- linux-2.6.22-570/include/linux/Kbuild 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/Kbuild 2007-12-21 15:36:12.000000000 -0500
+@@ -91,7 +91,6 @@
+ header-y += in_route.h
+ header-y += ioctl.h
+ header-y += ipmi_msgdefs.h
+-header-y += ip_mp_alg.h
+ header-y += ipsec.h
+ header-y += ipx.h
+ header-y += irda.h
+diff -Nurb linux-2.6.22-570/include/linux/acpi.h linux-2.6.22-591/include/linux/acpi.h
+--- linux-2.6.22-570/include/linux/acpi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/acpi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -206,11 +206,8 @@
+ {
+ return max_cstate;
+ }
+-static inline void acpi_set_cstate_limit(unsigned int new_limit)
+-{
+- max_cstate = new_limit;
+- return;
+-}
++extern void (*acpi_do_set_cstate_limit)(void);
++extern void acpi_set_cstate_limit(unsigned int new_limit);
+ #else
+ static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
+ static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
+diff -Nurb linux-2.6.22-570/include/linux/async_tx.h linux-2.6.22-591/include/linux/async_tx.h
+--- linux-2.6.22-570/include/linux/async_tx.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/async_tx.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,156 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ASYNC_TX_H_
++#define _ASYNC_TX_H_
++#include <linux/dmaengine.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++
++/**
++ * dma_chan_ref - object used to manage dma channels received from the
++ * dmaengine core.
++ * @chan - the channel being tracked
++ * @node - node for the channel to be placed on async_tx_master_list
++ * @rcu - for list_del_rcu
++ * @count - number of times this channel is listed in the pool
++ * (for channels with multiple capabiities)
++ */
++struct dma_chan_ref {
++ struct dma_chan *chan;
++ struct list_head node;
++ struct rcu_head rcu;
++ atomic_t count;
++};
++
++/**
++ * async_tx_flags - modifiers for the async_* calls
++ * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the
++ * the destination address is not a source. The asynchronous case handles this
++ * implicitly, the synchronous case needs to zero the destination block.
++ * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is
++ * also one of the source addresses. In the synchronous case the destination
++ * address is an implied source, whereas the asynchronous case it must be listed
++ * as a source. The destination address must be the first address in the source
++ * array.
++ * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
++ * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
++ * dependency chain
++ * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
++ * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
++ * take an atomic mapping (KM_USER0) on the source page(s)
++ * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
++ * take an atomic mapping (KM_USER0) on the dest page(s)
++ */
++enum async_tx_flags {
++ ASYNC_TX_XOR_ZERO_DST = (1 << 0),
++ ASYNC_TX_XOR_DROP_DST = (1 << 1),
++ ASYNC_TX_ASSUME_COHERENT = (1 << 2),
++ ASYNC_TX_ACK = (1 << 3),
++ ASYNC_TX_DEP_ACK = (1 << 4),
++ ASYNC_TX_KMAP_SRC = (1 << 5),
++ ASYNC_TX_KMAP_DST = (1 << 6),
++};
++
++#ifdef CONFIG_DMA_ENGINE
++void async_tx_issue_pending_all(void);
++enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
++void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
++struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++ enum dma_transaction_type tx_type);
++#else
++static inline void async_tx_issue_pending_all(void)
++{
++ do { } while (0);
++}
++
++static inline enum dma_status
++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
++{
++ return DMA_SUCCESS;
++}
++
++static inline void
++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
++ struct dma_chan *host_chan)
++{
++ do { } while (0);
++}
++
++static inline struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++ enum dma_transaction_type tx_type)
++{
++ return NULL;
++}
++#endif
++
++/**
++ * async_tx_sync_epilog - actions to take if an operation is run synchronously
++ * @flags: async_tx flags
++ * @depend_tx: transaction depends on depend_tx
++ * @cb_fn: function to call when the transaction completes
++ * @cb_fn_param: parameter to pass to the callback routine
++ */
++static inline void
++async_tx_sync_epilog(unsigned long flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param)
++{
++ if (cb_fn)
++ cb_fn(cb_fn_param);
++
++ if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
++ async_tx_ack(depend_tx);
++}
++
++void
++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_xor(struct page *dest, struct page **src_list, unsigned int offset,
++ int src_cnt, size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_xor_zero_sum(struct page *dest, struct page **src_list,
++ unsigned int offset, int src_cnt, size_t len,
++ u32 *result, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
++ unsigned int src_offset, size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_memset(struct page *dest, int val, unsigned int offset,
++ size_t len, enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_trigger_callback(enum async_tx_flags flags,
++ struct dma_async_tx_descriptor *depend_tx,
++ dma_async_tx_callback cb_fn, void *cb_fn_param);
++#endif /* _ASYNC_TX_H_ */
+diff -Nurb linux-2.6.22-570/include/linux/configfs.h linux-2.6.22-591/include/linux/configfs.h
+--- linux-2.6.22-570/include/linux/configfs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/configfs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -75,7 +75,6 @@
+ extern void config_item_init_type_name(struct config_item *item,
+ const char *name,
+ struct config_item_type *type);
+-extern void config_item_cleanup(struct config_item *);
+
+ extern struct config_item * config_item_get(struct config_item *);
+ extern void config_item_put(struct config_item *);
+@@ -157,6 +156,7 @@
+ struct config_item *(*make_item)(struct config_group *group, const char *name);
+ struct config_group *(*make_group)(struct config_group *group, const char *name);
+ int (*commit_item)(struct config_item *item);
++ void (*disconnect_notify)(struct config_group *group, struct config_item *item);
+ void (*drop_item)(struct config_group *group, struct config_item *item);
+ };
+
+@@ -175,6 +175,11 @@
+ int configfs_register_subsystem(struct configfs_subsystem *subsys);
+ void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+
++/* These functions can sleep and can alloc with GFP_KERNEL */
++/* WARNING: These cannot be called underneath configfs callbacks!! */
++int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
++void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target);
++
+ #endif /* __KERNEL__ */
+
+ #endif /* _CONFIGFS_H_ */
+diff -Nurb linux-2.6.22-570/include/linux/container.h linux-2.6.22-591/include/linux/container.h
+--- linux-2.6.22-570/include/linux/container.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/container.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,295 @@
++#ifndef _LINUX_CONTAINER_H
++#define _LINUX_CONTAINER_H
++/*
++ * container interface
++ *
++ * Copyright (C) 2003 BULL SA
++ * Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/kref.h>
++#include <linux/cpumask.h>
++#include <linux/nodemask.h>
++#include <linux/rcupdate.h>
++
++#ifdef CONFIG_CONTAINERS
++
++struct containerfs_root;
++struct inode;
++
++extern int container_init_early(void);
++extern int container_init(void);
++extern void container_init_smp(void);
++extern void container_lock(void);
++extern void container_unlock(void);
++extern void container_fork(struct task_struct *p);
++extern void container_fork_callbacks(struct task_struct *p);
++extern void container_exit(struct task_struct *p, int run_callbacks);
++
++extern struct file_operations proc_container_operations;
++
++/* Define the enumeration of all container subsystems */
++#define SUBSYS(_x) _x ## _subsys_id,
++enum container_subsys_id {
++#include <linux/container_subsys.h>
++ CONTAINER_SUBSYS_COUNT
++};
++#undef SUBSYS
++
++/* Per-subsystem/per-container state maintained by the system. */
++struct container_subsys_state {
++ /* The container that this subsystem is attached to. Useful
++ * for subsystems that want to know about the container
++ * hierarchy structure */
++ struct container *container;
++
++ /* State maintained by the container system to allow
++ * subsystems to be "busy". Should be accessed via css_get()
++ * and css_put() */
++
++ atomic_t refcnt;
++};
++
++/*
++ * Call css_get() to hold a reference on the container;
++ *
++ */
++
++static inline void css_get(struct container_subsys_state *css)
++{
++ atomic_inc(&css->refcnt);
++}
++/*
++ * css_put() should be called to release a reference taken by
++ * css_get()
++ */
++void css_put(struct container_subsys_state *css);
++
++struct container {
++ unsigned long flags; /* "unsigned long" so bitops work */
++
++ /* count users of this container. >0 means busy, but doesn't
++ * necessarily indicate the number of tasks in the
++ * container */
++ atomic_t count;
++
++ /*
++ * We link our 'sibling' struct into our parent's 'children'.
++ * Our children link their 'sibling' into our 'children'.
++ */
++ struct list_head sibling; /* my parent's children */
++ struct list_head children; /* my children */
++
++ struct container *parent; /* my parent */
++ struct dentry *dentry; /* container fs entry */
++
++ /* Private pointers for each registered subsystem */
++ struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
++
++ struct containerfs_root *root;
++ struct container *top_container;
++
++ /*
++ * List of cg_container_links pointing at css_groups with
++ * tasks in this container. Protected by css_group_lock
++ */
++ struct list_head css_groups;
++
++ /*
++ * Linked list running through all containers that can
++ * potentially be reaped by the release agent. Protected by
++ * container_mutex
++ */
++ struct list_head release_list;
++};
++
++/* A css_group is a structure holding pointers to a set of
++ * container_subsys_state objects. This saves space in the task struct
++ * object and speeds up fork()/exit(), since a single inc/dec and a
++ * list_add()/del() can bump the reference count on the entire
++ * container set for a task.
++ */
++
++struct css_group {
++
++ /* Reference count */
++ struct kref ref;
++
++ /*
++ * List running through all container groups. Protected by
++ * css_group_lock
++ */
++ struct list_head list;
++
++ /*
++ * List running through all tasks using this container
++ * group. Protected by css_group_lock
++ */
++ struct list_head tasks;
++
++ /*
++ * List of cg_container_link objects on link chains from
++ * containers referenced from this css_group. Protected by
++ * css_group_lock
++ */
++ struct list_head cg_links;
++
++ /* Set of subsystem states, one for each subsystem. NULL for
++ * subsystems that aren't part of this hierarchy. These
++ * pointers reduce the number of dereferences required to get
++ * from a task to its state for a given container, but result
++ * in increased space usage if tasks are in wildly different
++ * groupings across different hierarchies. This array is
++ * immutable after creation */
++ struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
++
++};
++
++/* struct cftype:
++ *
++ * The files in the container filesystem mostly have a very simple read/write
++ * handling, some common function will take care of it. Nevertheless some cases
++ * (read tasks) are special and therefore I define this structure for every
++ * kind of file.
++ *
++ *
++ * When reading/writing to a file:
++ * - the container to use in file->f_dentry->d_parent->d_fsdata
++ * - the 'cftype' of the file is file->f_dentry->d_fsdata
++ */
++
++#define MAX_CFTYPE_NAME 64
++struct cftype {
++ /* By convention, the name should begin with the name of the
++ * subsystem, followed by a period */
++ char name[MAX_CFTYPE_NAME];
++ int private;
++ int (*open) (struct inode *inode, struct file *file);
++ ssize_t (*read) (struct container *cont, struct cftype *cft,
++ struct file *file,
++ char __user *buf, size_t nbytes, loff_t *ppos);
++ /*
++ * read_uint() is a shortcut for the common case of returning a
++ * single integer. Use it in place of read()
++ */
++ u64 (*read_uint) (struct container *cont, struct cftype *cft);
++ ssize_t (*write) (struct container *cont, struct cftype *cft,
++ struct file *file,
++ const char __user *buf, size_t nbytes, loff_t *ppos);
++ int (*release) (struct inode *inode, struct file *file);
++};
++
++/* Add a new file to the given container directory. Should only be
++ * called by subsystems from within a populate() method */
++int container_add_file(struct container *cont, const struct cftype *cft);
++
++/* Add a set of new files to the given container directory. Should
++ * only be called by subsystems from within a populate() method */
++int container_add_files(struct container *cont, const struct cftype cft[],
++ int count);
++
++int container_is_removed(const struct container *cont);
++
++int container_path(const struct container *cont, char *buf, int buflen);
++
++int container_task_count(const struct container *cont);
++
++/* Return true if the container is a descendant of the current container */
++int container_is_descendant(const struct container *cont);
++
++/* Container subsystem type. See Documentation/containers.txt for details */
++
++struct container_subsys {
++ int (*create)(struct container_subsys *ss,
++ struct container *cont);
++ void (*destroy)(struct container_subsys *ss, struct container *cont);
++ int (*can_attach)(struct container_subsys *ss,
++ struct container *cont, struct task_struct *tsk);
++ void (*attach)(struct container_subsys *ss, struct container *cont,
++ struct container *old_cont, struct task_struct *tsk);
++ void (*fork)(struct container_subsys *ss, struct task_struct *task);
++ void (*exit)(struct container_subsys *ss, struct task_struct *task);
++ int (*populate)(struct container_subsys *ss,
++ struct container *cont);
++ void (*post_clone)(struct container_subsys *ss, struct container *cont);
++ void (*bind)(struct container_subsys *ss, struct container *root);
++ int subsys_id;
++ int active;
++ int early_init;
++#define MAX_CONTAINER_TYPE_NAMELEN 32
++ const char *name;
++
++ /* Protected by RCU */
++ struct containerfs_root *root;
++
++ struct list_head sibling;
++
++ void *private;
++};
++
++#define SUBSYS(_x) extern struct container_subsys _x ## _subsys;
++#include <linux/container_subsys.h>
++#undef SUBSYS
++
++static inline struct container_subsys_state *container_subsys_state(
++ struct container *cont, int subsys_id)
++{
++ return cont->subsys[subsys_id];
++}
++
++static inline struct container_subsys_state *task_subsys_state(
++ struct task_struct *task, int subsys_id)
++{
++ return rcu_dereference(task->containers->subsys[subsys_id]);
++}
++
++static inline struct container* task_container(struct task_struct *task,
++ int subsys_id)
++{
++ return task_subsys_state(task, subsys_id)->container;
++}
++
++int container_path(const struct container *cont, char *buf, int buflen);
++
++int container_clone(struct task_struct *tsk, struct container_subsys *ss);
++
++/* A container_iter should be treated as an opaque object */
++struct container_iter {
++ struct list_head *cg_link;
++ struct list_head *task;
++};
++
++/* To iterate across the tasks in a container:
++ *
++ * 1) call container_iter_start to intialize an iterator
++ *
++ * 2) call container_iter_next() to retrieve member tasks until it
++ * returns NULL or until you want to end the iteration
++ *
++ * 3) call container_iter_end() to destroy the iterator.
++ */
++void container_iter_start(struct container *cont, struct container_iter *it);
++struct task_struct *container_iter_next(struct container *cont,
++ struct container_iter *it);
++void container_iter_end(struct container *cont, struct container_iter *it);
++
++void container_set_release_agent_path(struct container_subsys *ss,
++ const char *path);
++
++#else /* !CONFIG_CONTAINERS */
++
++static inline int container_init_early(void) { return 0; }
++static inline int container_init(void) { return 0; }
++static inline void container_init_smp(void) {}
++static inline void container_fork(struct task_struct *p) {}
++static inline void container_fork_callbacks(struct task_struct *p) {}
++static inline void container_exit(struct task_struct *p, int callbacks) {}
++
++static inline void container_lock(void) {}
++static inline void container_unlock(void) {}
++
++#endif /* !CONFIG_CONTAINERS */
++
++#endif /* _LINUX_CONTAINER_H */
+diff -Nurb linux-2.6.22-570/include/linux/container_subsys.h linux-2.6.22-591/include/linux/container_subsys.h
+--- linux-2.6.22-570/include/linux/container_subsys.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/container_subsys.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,32 @@
++/* Add subsystem definitions of the form SUBSYS(<name>) in this
++ * file. Surround each one by a line of comment markers so that
++ * patches don't collide
++ */
++
++/* */
++
++/* */
++
++#ifdef CONFIG_CONTAINER_CPUACCT
++SUBSYS(cpuacct)
++#endif
++
++/* */
++
++#ifdef CONFIG_CPUSETS
++SUBSYS(cpuset)
++#endif
++
++/* */
++
++#ifdef CONFIG_CONTAINER_DEBUG
++SUBSYS(debug)
++#endif
++
++/* */
++
++#ifdef CONFIG_CONTAINER_NS
++SUBSYS(ns)
++#endif
++
++/* */
+diff -Nurb linux-2.6.22-570/include/linux/cpu_acct.h linux-2.6.22-591/include/linux/cpu_acct.h
+--- linux-2.6.22-570/include/linux/cpu_acct.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/cpu_acct.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,14 @@
++
++#ifndef _LINUX_CPU_ACCT_H
++#define _LINUX_CPU_ACCT_H
++
++#include <linux/container.h>
++#include <asm/cputime.h>
++
++#ifdef CONFIG_CONTAINER_CPUACCT
++extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
++#else
++static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
++#endif
++
++#endif
+diff -Nurb linux-2.6.22-570/include/linux/cpuidle.h linux-2.6.22-591/include/linux/cpuidle.h
+--- linux-2.6.22-570/include/linux/cpuidle.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/cpuidle.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,189 @@
++/*
++ * cpuidle.h - a generic framework for CPU idle power management
++ *
++ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ * Shaohua Li <shaohua.li@intel.com>
++ * Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#ifndef _LINUX_CPUIDLE_H
++#define _LINUX_CPUIDLE_H
++
++#include <linux/percpu.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/kobject.h>
++#include <linux/completion.h>
++
++#define CPUIDLE_STATE_MAX 8
++#define CPUIDLE_NAME_LEN 16
++
++struct cpuidle_device;
++
++
++/****************************
++ * CPUIDLE DEVICE INTERFACE *
++ ****************************/
++
++struct cpuidle_state {
++ char name[CPUIDLE_NAME_LEN];
++ void *driver_data;
++
++ unsigned int flags;
++ unsigned int exit_latency; /* in US */
++ unsigned int power_usage; /* in mW */
++ unsigned int target_residency; /* in US */
++
++ unsigned int usage;
++ unsigned int time; /* in US */
++
++ int (*enter) (struct cpuidle_device *dev,
++ struct cpuidle_state *state);
++};
++
++/* Idle State Flags */
++#define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */
++#define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */
++#define CPUIDLE_FLAG_SHALLOW (0x10) /* low latency, minimal savings */
++#define CPUIDLE_FLAG_BALANCED (0x20) /* medium latency, moderate savings */
++#define CPUIDLE_FLAG_DEEP (0x40) /* high latency, large savings */
++
++#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
++
++/**
++ * cpuidle_get_statedata - retrieves private driver state data
++ * @state: the state
++ */
++static inline void * cpuidle_get_statedata(struct cpuidle_state *state)
++{
++ return state->driver_data;
++}
++
++/**
++ * cpuidle_set_statedata - stores private driver state data
++ * @state: the state
++ * @data: the private data
++ */
++static inline void
++cpuidle_set_statedata(struct cpuidle_state *state, void *data)
++{
++ state->driver_data = data;
++}
++
++struct cpuidle_state_kobj {
++ struct cpuidle_state *state;
++ struct completion kobj_unregister;
++ struct kobject kobj;
++};
++
++struct cpuidle_device {
++ unsigned int status;
++ int cpu;
++
++ int last_residency;
++ int state_count;
++ struct cpuidle_state states[CPUIDLE_STATE_MAX];
++ struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
++ struct cpuidle_state *last_state;
++
++ struct list_head device_list;
++ struct kobject kobj;
++ struct completion kobj_unregister;
++ void *governor_data;
++};
++
++DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
++
++/* Device Status Flags */
++#define CPUIDLE_STATUS_DETECTED (0x1)
++#define CPUIDLE_STATUS_DRIVER_ATTACHED (0x2)
++#define CPUIDLE_STATUS_GOVERNOR_ATTACHED (0x4)
++#define CPUIDLE_STATUS_DOIDLE (CPUIDLE_STATUS_DETECTED | \
++ CPUIDLE_STATUS_DRIVER_ATTACHED | \
++ CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++
++/**
++ * cpuidle_get_last_residency - retrieves the last state's residency time
++ * @dev: the target CPU
++ *
++ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
++ */
++static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
++{
++ return dev->last_residency;
++}
++
++
++/****************************
++ * CPUIDLE DRIVER INTERFACE *
++ ****************************/
++
++struct cpuidle_driver {
++ char name[CPUIDLE_NAME_LEN];
++ struct list_head driver_list;
++
++ int (*init) (struct cpuidle_device *dev);
++ void (*exit) (struct cpuidle_device *dev);
++ int (*redetect) (struct cpuidle_device *dev);
++
++ int (*bm_check) (void);
++
++ struct module *owner;
++};
++
++#ifdef CONFIG_CPU_IDLE
++
++extern int cpuidle_register_driver(struct cpuidle_driver *drv);
++extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
++extern int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv);
++extern int cpuidle_force_redetect_devices(struct cpuidle_driver *drv);
++
++#else
++
++static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
++{return 0;}
++static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
++static inline int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv)
++{return 0;}
++static inline int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
++{return 0;}
++
++#endif
++
++/******************************
++ * CPUIDLE GOVERNOR INTERFACE *
++ ******************************/
++
++struct cpuidle_governor {
++ char name[CPUIDLE_NAME_LEN];
++ struct list_head governor_list;
++
++ int (*init) (struct cpuidle_device *dev);
++ void (*exit) (struct cpuidle_device *dev);
++ void (*scan) (struct cpuidle_device *dev);
++
++ int (*select) (struct cpuidle_device *dev);
++ void (*reflect) (struct cpuidle_device *dev);
++
++ struct module *owner;
++};
++
++#ifdef CONFIG_CPU_IDLE
++
++extern int cpuidle_register_governor(struct cpuidle_governor *gov);
++extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
++extern int cpuidle_get_bm_activity(void);
++
++#else
++
++static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
++{return 0;}
++static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
++static inline int cpuidle_get_bm_activity(void)
++{return 0;}
++
++#endif
++
++#endif /* _LINUX_CPUIDLE_H */
+diff -Nurb linux-2.6.22-570/include/linux/cpuset.h linux-2.6.22-591/include/linux/cpuset.h
+--- linux-2.6.22-570/include/linux/cpuset.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/cpuset.h 2007-12-21 15:36:12.000000000 -0500
+@@ -11,6 +11,7 @@
+ #include <linux/sched.h>
+ #include <linux/cpumask.h>
+ #include <linux/nodemask.h>
++#include <linux/container.h>
+
+ #ifdef CONFIG_CPUSETS
+
+@@ -19,8 +20,6 @@
+ extern int cpuset_init_early(void);
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+-extern void cpuset_fork(struct task_struct *p);
+-extern void cpuset_exit(struct task_struct *p);
+ extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+@@ -75,13 +74,13 @@
+
+ extern void cpuset_track_online_nodes(void);
+
++extern int current_cpuset_is_being_rebound(void);
++
+ #else /* !CONFIG_CPUSETS */
+
+ static inline int cpuset_init_early(void) { return 0; }
+ static inline int cpuset_init(void) { return 0; }
+ static inline void cpuset_init_smp(void) {}
+-static inline void cpuset_fork(struct task_struct *p) {}
+-static inline void cpuset_exit(struct task_struct *p) {}
+
+ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
+ {
+@@ -146,6 +145,11 @@
+
+ static inline void cpuset_track_online_nodes(void) {}
+
++static inline int current_cpuset_is_being_rebound(void)
++{
++ return 0;
++}
++
+ #endif /* !CONFIG_CPUSETS */
+
+ #endif /* _LINUX_CPUSET_H */
+diff -Nurb linux-2.6.22-570/include/linux/device.h linux-2.6.22-591/include/linux/device.h
+--- linux-2.6.22-570/include/linux/device.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/device.h 2007-12-21 15:36:14.000000000 -0500
+@@ -200,6 +200,8 @@
+
+ int (*suspend)(struct device *, pm_message_t state);
+ int (*resume)(struct device *);
++
++ const struct shadow_dir_operations *shadow_ops;
+ };
+
+ extern int __must_check class_register(struct class *);
+@@ -238,7 +240,6 @@
+ * @devt: for internal use by the driver core only.
+ * @node: for internal use by the driver core only.
+ * @kobj: for internal use by the driver core only.
+- * @devt_attr: for internal use by the driver core only.
+ * @groups: optional additional groups to be created
+ * @dev: if set, a symlink to the struct device is created in the sysfs
+ * directory for this struct class device.
+@@ -263,8 +264,6 @@
+ struct kobject kobj;
+ struct class * class; /* required */
+ dev_t devt; /* dev_t, creates the sysfs "dev" */
+- struct class_device_attribute *devt_attr;
+- struct class_device_attribute uevent_attr;
+ struct device * dev; /* not necessary, but nice to have */
+ void * class_data; /* class-specific data */
+ struct class_device *parent; /* parent of this child device, if there is one */
+@@ -419,8 +418,6 @@
+ struct device_type *type;
+ unsigned is_registered:1;
+ unsigned uevent_suppress:1;
+- struct device_attribute uevent_attr;
+- struct device_attribute *devt_attr;
+
+ struct semaphore sem; /* semaphore to synchronize calls to
+ * its driver.
+diff -Nurb linux-2.6.22-570/include/linux/dmaengine.h linux-2.6.22-591/include/linux/dmaengine.h
+--- linux-2.6.22-570/include/linux/dmaengine.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/dmaengine.h 2007-12-21 15:36:12.000000000 -0500
+@@ -21,29 +21,40 @@
+ #ifndef DMAENGINE_H
+ #define DMAENGINE_H
+
+-#ifdef CONFIG_DMA_ENGINE
+-
+ #include <linux/device.h>
+ #include <linux/uio.h>
+ #include <linux/kref.h>
+ #include <linux/completion.h>
+ #include <linux/rcupdate.h>
++#include <linux/dma-mapping.h>
+
+ /**
+- * enum dma_event - resource PNP/power managment events
++ * enum dma_state - resource PNP/power managment state
+ * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
+ * @DMA_RESOURCE_RESUME: DMA device returning to full power
+- * @DMA_RESOURCE_ADDED: DMA device added to the system
++ * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
+ * @DMA_RESOURCE_REMOVED: DMA device removed from the system
+ */
+-enum dma_event {
++enum dma_state {
+ DMA_RESOURCE_SUSPEND,
+ DMA_RESOURCE_RESUME,
+- DMA_RESOURCE_ADDED,
++ DMA_RESOURCE_AVAILABLE,
+ DMA_RESOURCE_REMOVED,
+ };
+
+ /**
++ * enum dma_state_client - state of the channel in the client
++ * @DMA_ACK: client would like to use, or was using this channel
++ * @DMA_DUP: client has already seen this channel, or is not using this channel
++ * @DMA_NAK: client does not want to see any more channels
++ */
++enum dma_state_client {
++ DMA_ACK,
++ DMA_DUP,
++ DMA_NAK,
++};
++
++/**
+ * typedef dma_cookie_t - an opaque DMA cookie
+ *
+ * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
+@@ -65,6 +76,31 @@
+ };
+
+ /**
++ * enum dma_transaction_type - DMA transaction types/indexes
++ */
++enum dma_transaction_type {
++ DMA_MEMCPY,
++ DMA_XOR,
++ DMA_PQ_XOR,
++ DMA_DUAL_XOR,
++ DMA_PQ_UPDATE,
++ DMA_ZERO_SUM,
++ DMA_PQ_ZERO_SUM,
++ DMA_MEMSET,
++ DMA_MEMCPY_CRC32C,
++ DMA_INTERRUPT,
++};
++
++/* last transaction type for creation of the capabilities mask */
++#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
++
++/**
++ * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
++ * See linux/cpumask.h
++ */
++typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
++
++/**
+ * struct dma_chan_percpu - the per-CPU part of struct dma_chan
+ * @refcount: local_t used for open-coded "bigref" counting
+ * @memcpy_count: transaction counter
+@@ -80,7 +116,6 @@
+
+ /**
+ * struct dma_chan - devices supply DMA channels, clients use them
+- * @client: ptr to the client user of this chan, will be %NULL when unused
+ * @device: ptr to the dma device who supplies this channel, always !%NULL
+ * @cookie: last cookie value returned to client
+ * @chan_id: channel ID for sysfs
+@@ -88,12 +123,10 @@
+ * @refcount: kref, used in "bigref" slow-mode
+ * @slow_ref: indicates that the DMA channel is free
+ * @rcu: the DMA channel's RCU head
+- * @client_node: used to add this to the client chan list
+ * @device_node: used to add this to the device chan list
+ * @local: per-cpu pointer to a struct dma_chan_percpu
+ */
+ struct dma_chan {
+- struct dma_client *client;
+ struct dma_device *device;
+ dma_cookie_t cookie;
+
+@@ -105,11 +138,11 @@
+ int slow_ref;
+ struct rcu_head rcu;
+
+- struct list_head client_node;
+ struct list_head device_node;
+ struct dma_chan_percpu *local;
+ };
+
++
+ void dma_chan_cleanup(struct kref *kref);
+
+ static inline void dma_chan_get(struct dma_chan *chan)
+@@ -134,27 +167,68 @@
+
+ /*
+ * typedef dma_event_callback - function pointer to a DMA event callback
+- */
+-typedef void (*dma_event_callback) (struct dma_client *client,
+- struct dma_chan *chan, enum dma_event event);
++ * For each channel added to the system this routine is called for each client.
++ * If the client would like to use the channel it returns '1' to signal (ack)
++ * the dmaengine core to take out a reference on the channel and its
++ * corresponding device. A client must not 'ack' an available channel more
++ * than once. When a channel is removed all clients are notified. If a client
++ * is using the channel it must 'ack' the removal. A client must not 'ack' a
++ * removed channel more than once.
++ * @client - 'this' pointer for the client context
++ * @chan - channel to be acted upon
++ * @state - available or removed
++ */
++struct dma_client;
++typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
++ struct dma_chan *chan, enum dma_state state);
+
+ /**
+ * struct dma_client - info on the entity making use of DMA services
+ * @event_callback: func ptr to call when something happens
+- * @chan_count: number of chans allocated
+- * @chans_desired: number of chans requested. Can be +/- chan_count
+- * @lock: protects access to the channels list
+- * @channels: the list of DMA channels allocated
++ * @cap_mask: only return channels that satisfy the requested capabilities
++ * a value of zero corresponds to any capability
+ * @global_node: list_head for global dma_client_list
+ */
+ struct dma_client {
+ dma_event_callback event_callback;
+- unsigned int chan_count;
+- unsigned int chans_desired;
++ dma_cap_mask_t cap_mask;
++ struct list_head global_node;
++};
+
++typedef void (*dma_async_tx_callback)(void *dma_async_param);
++/**
++ * struct dma_async_tx_descriptor - async transaction descriptor
++ * @cookie: tracking cookie for this transaction, set to -EBUSY if
++ * this tx is sitting on a dependency list
++ * @ack: the descriptor can not be reused until the client acknowledges
++ * receipt, i.e. has has a chance to establish any dependency chains
++ * @callback: routine to call after this operation is complete
++ * @callback_param: general parameter to pass to the callback routine
++ * @chan: target channel for this operation
++ * @tx_submit: execute an operation
++ * @tx_set_dest: set a destination address in a hardware descriptor
++ * @tx_set_src: set a source address in a hardware descriptor
++ * @depend_list: at completion this list of transactions are submitted
++ * @depend_node: allow this transaction to be executed after another
++ * transaction has completed
++ * @parent: pointer to the next level up in the dependency chain
++ * @lock: protect the dependency list
++ */
++struct dma_async_tx_descriptor {
++ dma_cookie_t cookie;
++ int ack;
++ dma_async_tx_callback callback;
++ void *callback_param;
++ struct dma_chan *chan;
++ dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
++ void (*tx_set_dest)(dma_addr_t addr,
++ struct dma_async_tx_descriptor *tx, int index);
++ void (*tx_set_src)(dma_addr_t addr,
++ struct dma_async_tx_descriptor *tx, int index);
++ struct list_head depend_list;
++ struct list_head depend_node;
++ struct dma_async_tx_descriptor *parent;
+ spinlock_t lock;
+- struct list_head channels;
+- struct list_head global_node;
+ };
+
+ /**
+@@ -162,141 +236,130 @@
+ * @chancnt: how many DMA channels are supported
+ * @channels: the list of struct dma_chan
+ * @global_node: list_head for global dma_device_list
++ * @cap_mask: one or more dma_capability flags
++ * @max_xor: maximum number of xor sources, 0 if no capability
+ * @refcount: reference count
+ * @done: IO completion struct
+ * @dev_id: unique device ID
++ * @dev: struct device reference for dma mapping api
+ * @device_alloc_chan_resources: allocate resources and return the
+ * number of allocated descriptors
+ * @device_free_chan_resources: release DMA channel's resources
+- * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer
+- * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page
+- * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset
+- * @device_memcpy_complete: poll the status of an IOAT DMA transaction
+- * @device_memcpy_issue_pending: push appended descriptors to hardware
++ * @device_prep_dma_memcpy: prepares a memcpy operation
++ * @device_prep_dma_xor: prepares a xor operation
++ * @device_prep_dma_zero_sum: prepares a zero_sum operation
++ * @device_prep_dma_memset: prepares a memset operation
++ * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
++ * @device_dependency_added: async_tx notifies the channel about new deps
++ * @device_issue_pending: push pending transactions to hardware
+ */
+ struct dma_device {
+
+ unsigned int chancnt;
+ struct list_head channels;
+ struct list_head global_node;
++ dma_cap_mask_t cap_mask;
++ int max_xor;
+
+ struct kref refcount;
+ struct completion done;
+
+ int dev_id;
++ struct device *dev;
+
+ int (*device_alloc_chan_resources)(struct dma_chan *chan);
+ void (*device_free_chan_resources)(struct dma_chan *chan);
+- dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan,
+- void *dest, void *src, size_t len);
+- dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan,
+- struct page *page, unsigned int offset, void *kdata,
+- size_t len);
+- dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan,
+- struct page *dest_pg, unsigned int dest_off,
+- struct page *src_pg, unsigned int src_off, size_t len);
+- enum dma_status (*device_memcpy_complete)(struct dma_chan *chan,
++
++ struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
++ struct dma_chan *chan, size_t len, int int_en);
++ struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
++ struct dma_chan *chan, unsigned int src_cnt, size_t len,
++ int int_en);
++ struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
++ struct dma_chan *chan, unsigned int src_cnt, size_t len,
++ u32 *result, int int_en);
++ struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
++ struct dma_chan *chan, int value, size_t len, int int_en);
++ struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
++ struct dma_chan *chan);
++
++ void (*device_dependency_added)(struct dma_chan *chan);
++ enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *last,
+ dma_cookie_t *used);
+- void (*device_memcpy_issue_pending)(struct dma_chan *chan);
++ void (*device_issue_pending)(struct dma_chan *chan);
+ };
+
+ /* --- public DMA engine API --- */
+
+-struct dma_client *dma_async_client_register(dma_event_callback event_callback);
++void dma_async_client_register(struct dma_client *client);
+ void dma_async_client_unregister(struct dma_client *client);
+-void dma_async_client_chan_request(struct dma_client *client,
+- unsigned int number);
++void dma_async_client_chan_request(struct dma_client *client);
++dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
++ void *dest, void *src, size_t len);
++dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
++ struct page *page, unsigned int offset, void *kdata, size_t len);
++dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
++ struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
++ unsigned int src_off, size_t len);
++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
++ struct dma_chan *chan);
+
+-/**
+- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+- * @chan: DMA channel to offload copy to
+- * @dest: destination address (virtual)
+- * @src: source address (virtual)
+- * @len: length
+- *
+- * Both @dest and @src must be mappable to a bus address according to the
+- * DMA mapping API rules for streaming mappings.
+- * Both @dest and @src must stay memory resident (kernel memory or locked
+- * user space pages).
+- */
+-static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
+- void *dest, void *src, size_t len)
++static inline void
++async_tx_ack(struct dma_async_tx_descriptor *tx)
+ {
+- int cpu = get_cpu();
+- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+- per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+- put_cpu();
+-
+- return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
++ tx->ack = 1;
+ }
+
+-/**
+- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+- * @chan: DMA channel to offload copy to
+- * @page: destination page
+- * @offset: offset in page to copy to
+- * @kdata: source address (virtual)
+- * @len: length
+- *
+- * Both @page/@offset and @kdata must be mappable to a bus address according
+- * to the DMA mapping API rules for streaming mappings.
+- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+- * locked user space pages)
+- */
+-static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
+- struct page *page, unsigned int offset, void *kdata, size_t len)
++#define first_dma_cap(mask) __first_dma_cap(&(mask))
++static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
+ {
+- int cpu = get_cpu();
+- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+- per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+- put_cpu();
++ return min_t(int, DMA_TX_TYPE_END,
++ find_first_bit(srcp->bits, DMA_TX_TYPE_END));
++}
+
+- return chan->device->device_memcpy_buf_to_pg(chan, page, offset,
+- kdata, len);
++#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
++static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
++{
++ return min_t(int, DMA_TX_TYPE_END,
++ find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
+ }
+
+-/**
+- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+- * @chan: DMA channel to offload copy to
+- * @dest_pg: destination page
+- * @dest_off: offset in page to copy to
+- * @src_pg: source page
+- * @src_off: offset in page to copy from
+- * @len: length
+- *
+- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+- * address according to the DMA mapping API rules for streaming mappings.
+- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+- * (kernel memory or locked user space pages).
+- */
+-static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
+- struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
+- unsigned int src_off, size_t len)
++#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
++static inline void
++__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
+ {
+- int cpu = get_cpu();
+- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+- per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+- put_cpu();
++ set_bit(tx_type, dstp->bits);
++}
+
+- return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off,
+- src_pg, src_off, len);
++#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
++static inline int
++__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
++{
++ return test_bit(tx_type, srcp->bits);
+ }
+
++#define for_each_dma_cap_mask(cap, mask) \
++ for ((cap) = first_dma_cap(mask); \
++ (cap) < DMA_TX_TYPE_END; \
++ (cap) = next_dma_cap((cap), (mask)))
++
+ /**
+- * dma_async_memcpy_issue_pending - flush pending copies to HW
++ * dma_async_issue_pending - flush pending transactions to HW
+ * @chan: target DMA channel
+ *
+ * This allows drivers to push copies to HW in batches,
+ * reducing MMIO writes where possible.
+ */
+-static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
++static inline void dma_async_issue_pending(struct dma_chan *chan)
+ {
+- return chan->device->device_memcpy_issue_pending(chan);
++ return chan->device->device_issue_pending(chan);
+ }
+
++#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
++
+ /**
+- * dma_async_memcpy_complete - poll for transaction completion
++ * dma_async_is_tx_complete - poll for transaction completion
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ * @last: returns last completed cookie, can be NULL
+@@ -306,12 +369,15 @@
+ * internal state and can be used with dma_async_is_complete() to check
+ * the status of multiple cookies without re-checking hardware state.
+ */
+-static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan,
++static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
+ {
+- return chan->device->device_memcpy_complete(chan, cookie, last, used);
++ return chan->device->device_is_tx_complete(chan, cookie, last, used);
+ }
+
++#define dma_async_memcpy_complete(chan, cookie, last, used)\
++ dma_async_is_tx_complete(chan, cookie, last, used)
++
+ /**
+ * dma_async_is_complete - test a cookie against chan state
+ * @cookie: transaction identifier to test status of
+@@ -334,6 +400,7 @@
+ return DMA_IN_PROGRESS;
+ }
+
++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
+
+ /* --- DMA device --- */
+
+@@ -362,5 +429,4 @@
+ struct dma_pinned_list *pinned_list, struct page *page,
+ unsigned int offset, size_t len);
+
+-#endif /* CONFIG_DMA_ENGINE */
+ #endif /* DMAENGINE_H */
+diff -Nurb linux-2.6.22-570/include/linux/etherdevice.h linux-2.6.22-591/include/linux/etherdevice.h
+--- linux-2.6.22-570/include/linux/etherdevice.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/etherdevice.h 2007-12-21 15:36:12.000000000 -0500
+@@ -40,12 +40,6 @@
+ struct hh_cache *hh);
+
+ extern struct net_device *alloc_etherdev(int sizeof_priv);
+-static inline void eth_copy_and_sum (struct sk_buff *dest,
+- const unsigned char *src,
+- int len, int base)
+-{
+- memcpy (dest->data, src, len);
+-}
+
+ /**
+ * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
+diff -Nurb linux-2.6.22-570/include/linux/freezer.h linux-2.6.22-591/include/linux/freezer.h
+--- linux-2.6.22-570/include/linux/freezer.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/freezer.h 2007-12-21 15:36:12.000000000 -0500
+@@ -115,6 +115,14 @@
+ return !!(p->flags & PF_FREEZER_SKIP);
+ }
+
++/*
++ * Tell the freezer that the current task should be frozen by it
++ */
++static inline void set_freezable(void)
++{
++ current->flags &= ~PF_NOFREEZE;
++}
++
+ #else
+ static inline int frozen(struct task_struct *p) { return 0; }
+ static inline int freezing(struct task_struct *p) { return 0; }
+@@ -130,4 +138,5 @@
+ static inline void freezer_do_not_count(void) {}
+ static inline void freezer_count(void) {}
+ static inline int freezer_should_skip(struct task_struct *p) { return 0; }
++static inline void set_freezable(void) {}
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/fs.h linux-2.6.22-591/include/linux/fs.h
+--- linux-2.6.22-570/include/linux/fs.h 2007-12-21 15:36:06.000000000 -0500
++++ linux-2.6.22-591/include/linux/fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -283,6 +283,17 @@
+ #define SYNC_FILE_RANGE_WRITE 2
+ #define SYNC_FILE_RANGE_WAIT_AFTER 4
+
++/*
++ * sys_fallocate modes
++ * Currently sys_fallocate supports two modes:
++ * FA_ALLOCATE : This is the preallocate mode, using which an application/user
++ * may request (pre)allocation of blocks.
++ * FA_DEALLOCATE: This is the deallocate mode, which can be used to free
++ * the preallocated blocks.
++ */
++#define FA_ALLOCATE 0x1
++#define FA_DEALLOCATE 0x2
++
+ #ifdef __KERNEL__
+
+ #include <linux/linkage.h>
+@@ -300,6 +311,7 @@
+ #include <linux/init.h>
+ #include <linux/pid.h>
+ #include <linux/mutex.h>
++#include <linux/sysctl.h>
+
+ #include <asm/atomic.h>
+ #include <asm/semaphore.h>
+@@ -1139,6 +1151,7 @@
+ int (*flock) (struct file *, int, struct file_lock *);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
++ int (*revoke)(struct file *, struct address_space *);
+ };
+
+ struct inode_operations {
+@@ -1164,6 +1177,8 @@
+ ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ int (*removexattr) (struct dentry *, const char *);
+ void (*truncate_range)(struct inode *, loff_t, loff_t);
++ long (*fallocate)(struct inode *inode, int mode, loff_t offset,
++ loff_t len);
+ int (*sync_flags) (struct inode *);
+ };
+
+@@ -1809,6 +1824,13 @@
+ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ size_t len, unsigned int flags);
+
++/* fs/revoke.c */
++#ifdef CONFIG_MMU
++extern int generic_file_revoke(struct file *, struct address_space *);
++#else
++#define generic_file_revoke NULL
++#endif
++
+ extern void
+ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+ extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
+@@ -2077,5 +2099,9 @@
+ { }
+ #endif /* CONFIG_SECURITY */
+
++int proc_nr_files(ctl_table *table, int write, struct file *filp,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_FS_H */
+diff -Nurb linux-2.6.22-570/include/linux/fs_stack.h linux-2.6.22-591/include/linux/fs_stack.h
+--- linux-2.6.22-570/include/linux/fs_stack.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/fs_stack.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,7 +1,19 @@
++/*
++ * Copyright (c) 2006-2007 Erez Zadok
++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2006-2007 Stony Brook University
++ * Copyright (c) 2006-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
+ #ifndef _LINUX_FS_STACK_H
+ #define _LINUX_FS_STACK_H
+
+-/* This file defines generic functions used primarily by stackable
++/*
++ * This file defines generic functions used primarily by stackable
+ * filesystems; none of these functions require i_mutex to be held.
+ */
+
+@@ -11,7 +23,8 @@
+ extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
+ int (*get_nlinks)(struct inode *));
+
+-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
++extern void fsstack_copy_inode_size(struct inode *dst,
++ const struct inode *src);
+
+ /* inlines */
+ static inline void fsstack_copy_attr_atime(struct inode *dest,
+diff -Nurb linux-2.6.22-570/include/linux/gfp.h linux-2.6.22-591/include/linux/gfp.h
+--- linux-2.6.22-570/include/linux/gfp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/gfp.h 2007-12-21 15:36:12.000000000 -0500
+@@ -30,6 +30,9 @@
+ * cannot handle allocation failures.
+ *
+ * __GFP_NORETRY: The VM implementation must not retry indefinitely.
++ *
++ * __GFP_MOVABLE: Flag that this page will be movable by the page migration
++ * mechanism or reclaimed
+ */
+ #define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */
+ #define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */
+@@ -45,15 +48,21 @@
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
++#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
++#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */
+
+-#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
++#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+ /* if you forget to add the bitmask here kernel will crash, period */
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \
+- __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
++ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \
++ __GFP_RECLAIMABLE|__GFP_MOVABLE)
++
++/* This mask makes up all the page movable related flags */
++#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+
+ /* This equals 0, but use constants in case they ever change */
+ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
+@@ -62,9 +71,20 @@
+ #define GFP_NOIO (__GFP_WAIT)
+ #define GFP_NOFS (__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \
++ __GFP_RECLAIMABLE)
+ #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+ #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ __GFP_HIGHMEM)
++#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
++ __GFP_HARDWALL | __GFP_HIGHMEM | \
++ __GFP_MOVABLE)
++#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
++#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
++ __GFP_HARDWALL | __GFP_MOVABLE)
++#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
++ __GFP_HARDWALL | __GFP_HIGHMEM | \
++ __GFP_MOVABLE)
+
+ #ifdef CONFIG_NUMA
+ #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+@@ -99,6 +119,12 @@
+ return ZONE_NORMAL;
+ }
+
++static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags)
++{
++ BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
++ return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags;
++}
++
+ /*
+ * There is only one page-allocator function, and two main namespaces to
+ * it. The alloc_page*() variants return 'struct page *' and as such
+diff -Nurb linux-2.6.22-570/include/linux/highmem.h linux-2.6.22-591/include/linux/highmem.h
+--- linux-2.6.22-570/include/linux/highmem.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/highmem.h 2007-12-21 15:36:12.000000000 -0500
+@@ -73,10 +73,27 @@
+ }
+
+ #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
++/**
++ * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags
++ * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA but the caller is expected
++ * to specify via movableflags whether the page will be movable in the
++ * future or not
++ *
++ * An architecture may override this function by defining
++ * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own
++ * implementation.
++ */
+ static inline struct page *
+-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
++__alloc_zeroed_user_highpage(gfp_t movableflags,
++ struct vm_area_struct *vma,
++ unsigned long vaddr)
+ {
+- struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
++ struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
++ vma, vaddr);
+
+ if (page)
+ clear_user_highpage(page, vaddr);
+@@ -85,6 +102,36 @@
+ }
+ #endif
+
++/**
++ * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA that the caller knows will
++ * not be able to move in the future using move_pages() or reclaim. If it
++ * is known that the page can move, use alloc_zeroed_user_highpage_movable
++ */
++static inline struct page *
++alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
++{
++ return __alloc_zeroed_user_highpage(0, vma, vaddr);
++}
++
++/**
++ * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA that the caller knows will
++ * be able to migrate in the future using move_pages() or reclaimed
++ */
++static inline struct page *
++alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
++ unsigned long vaddr)
++{
++ return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
++}
++
+ static inline void clear_highpage(struct page *page)
+ {
+ void *kaddr = kmap_atomic(page, KM_USER0);
+diff -Nurb linux-2.6.22-570/include/linux/hugetlb.h linux-2.6.22-591/include/linux/hugetlb.h
+--- linux-2.6.22-570/include/linux/hugetlb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/hugetlb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -15,6 +15,7 @@
+ }
+
+ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
++int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
+ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
+ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
+@@ -29,6 +30,8 @@
+ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+
+ extern unsigned long max_huge_pages;
++extern unsigned long hugepages_treat_as_movable;
++extern gfp_t htlb_alloc_mask;
+ extern const unsigned long hugetlb_zero, hugetlb_infinity;
+ extern int sysctl_hugetlb_shm_group;
+
+diff -Nurb linux-2.6.22-570/include/linux/idr.h linux-2.6.22-591/include/linux/idr.h
+--- linux-2.6.22-570/include/linux/idr.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/idr.h 2007-12-21 15:36:14.000000000 -0500
+@@ -83,4 +83,33 @@
+ void idr_destroy(struct idr *idp);
+ void idr_init(struct idr *idp);
+
++
++/*
++ * IDA - IDR based id allocator, use when translation from id to
++ * pointer isn't necessary.
++ */
++#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
++#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1)
++#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8)
++
++struct ida_bitmap {
++ long nr_busy;
++ unsigned long bitmap[IDA_BITMAP_LONGS];
++};
++
++struct ida {
++ struct idr idr;
++ struct ida_bitmap *free_bitmap;
++};
++
++#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, }
++#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
++
++int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
++int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
++int ida_get_new(struct ida *ida, int *p_id);
++void ida_remove(struct ida *ida, int id);
++void ida_destroy(struct ida *ida);
++void ida_init(struct ida *ida);
++
+ #endif /* __IDR_H__ */
+diff -Nurb linux-2.6.22-570/include/linux/if_bridge.h linux-2.6.22-591/include/linux/if_bridge.h
+--- linux-2.6.22-570/include/linux/if_bridge.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/if_bridge.h 2007-12-21 15:36:14.000000000 -0500
+@@ -104,7 +104,7 @@
+
+ #include <linux/netdevice.h>
+
+-extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
++extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+ extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+ struct sk_buff *skb);
+ extern int (*br_should_route_hook)(struct sk_buff **pskb);
+diff -Nurb linux-2.6.22-570/include/linux/if_link.h linux-2.6.22-591/include/linux/if_link.h
+--- linux-2.6.22-570/include/linux/if_link.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/if_link.h 2007-12-21 15:36:12.000000000 -0500
+@@ -76,6 +76,8 @@
+ #define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
++ IFLA_LINKINFO,
++#define IFLA_LINKINFO IFLA_LINKINFO
+ __IFLA_MAX
+ };
+
+@@ -140,4 +142,49 @@
+ __u32 retrans_time;
+ };
+
++enum
++{
++ IFLA_INFO_UNSPEC,
++ IFLA_INFO_KIND,
++ IFLA_INFO_DATA,
++ IFLA_INFO_XSTATS,
++ __IFLA_INFO_MAX,
++};
++
++#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
++
++/* VLAN section */
++
++enum
++{
++ IFLA_VLAN_UNSPEC,
++ IFLA_VLAN_ID,
++ IFLA_VLAN_FLAGS,
++ IFLA_VLAN_EGRESS_QOS,
++ IFLA_VLAN_INGRESS_QOS,
++ __IFLA_VLAN_MAX,
++};
++
++#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
++
++struct ifla_vlan_flags {
++ __u32 flags;
++ __u32 mask;
++};
++
++enum
++{
++ IFLA_VLAN_QOS_UNSPEC,
++ IFLA_VLAN_QOS_MAPPING,
++ __IFLA_VLAN_QOS_MAX
++};
++
++#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
++
++struct ifla_vlan_qos_mapping
++{
++ __u32 from;
++ __u32 to;
++};
++
+ #endif /* _LINUX_IF_LINK_H */
+diff -Nurb linux-2.6.22-570/include/linux/if_pppox.h linux-2.6.22-591/include/linux/if_pppox.h
+--- linux-2.6.22-570/include/linux/if_pppox.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/if_pppox.h 2007-12-21 15:36:14.000000000 -0500
+@@ -160,7 +160,7 @@
+ struct module;
+
+ struct pppox_proto {
+- int (*create)(struct socket *sock);
++ int (*create)(struct net *net, struct socket *sock);
+ int (*ioctl)(struct socket *sock, unsigned int cmd,
+ unsigned long arg);
+ struct module *owner;
+diff -Nurb linux-2.6.22-570/include/linux/if_tun.h linux-2.6.22-591/include/linux/if_tun.h
+--- linux-2.6.22-570/include/linux/if_tun.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/if_tun.h 2007-12-21 15:36:12.000000000 -0500
+@@ -36,6 +36,7 @@
+ unsigned long flags;
+ int attached;
+ uid_t owner;
++ gid_t group;
+
+ wait_queue_head_t read_wait;
+ struct sk_buff_head readq;
+@@ -78,6 +79,7 @@
+ #define TUNSETPERSIST _IOW('T', 203, int)
+ #define TUNSETOWNER _IOW('T', 204, int)
+ #define TUNSETLINK _IOW('T', 205, int)
++#define TUNSETGROUP _IOW('T', 206, int)
+
+ /* TUNSETIFF ifr flags */
+ #define IFF_TUN 0x0001
+diff -Nurb linux-2.6.22-570/include/linux/if_vlan.h linux-2.6.22-591/include/linux/if_vlan.h
+--- linux-2.6.22-570/include/linux/if_vlan.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/if_vlan.h 2007-12-21 15:36:14.000000000 -0500
+@@ -62,7 +62,7 @@
+ #define VLAN_VID_MASK 0xfff
+
+ /* found in socket.c */
+-extern void vlan_ioctl_set(int (*hook)(void __user *));
++extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
+
+ #define VLAN_NAME "vlan"
+
+@@ -99,7 +99,7 @@
+ }
+
+ struct vlan_priority_tci_mapping {
+- unsigned long priority;
++ u32 priority;
+ unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
+ * at provisioning time.
+ * ((skb->priority << 13) & 0xE000)
+@@ -112,7 +112,10 @@
+ /** This will be the mapping that correlates skb->priority to
+ * 3 bits of VLAN QOS tags...
+ */
+- unsigned long ingress_priority_map[8];
++ unsigned int nr_ingress_mappings;
++ u32 ingress_priority_map[8];
++
++ unsigned int nr_egress_mappings;
+ struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
+
+ unsigned short vlan_id; /* The VLAN Identifier for this interface. */
+@@ -395,6 +398,10 @@
+ GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */
+ };
+
++enum vlan_flags {
++ VLAN_FLAG_REORDER_HDR = 0x1,
++};
++
+ enum vlan_name_types {
+ VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */
+ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */
+diff -Nurb linux-2.6.22-570/include/linux/inetdevice.h linux-2.6.22-591/include/linux/inetdevice.h
+--- linux-2.6.22-570/include/linux/inetdevice.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/inetdevice.h 2007-12-21 15:36:14.000000000 -0500
+@@ -17,8 +17,6 @@
+ DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1);
+ };
+
+-extern struct ipv4_devconf ipv4_devconf;
+-
+ struct in_device
+ {
+ struct net_device *dev;
+@@ -44,7 +42,7 @@
+ };
+
+ #define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1])
+-#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr)
++#define IPV4_DEVCONF_ALL(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf), attr)
+
+ static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+ {
+@@ -71,14 +69,14 @@
+ ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val))
+
+ #define IN_DEV_ANDCONF(in_dev, attr) \
+- (IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr))
++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) && IN_DEV_CONF_GET((in_dev), attr))
+ #define IN_DEV_ORCONF(in_dev, attr) \
+- (IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr))
++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) || IN_DEV_CONF_GET((in_dev), attr))
+ #define IN_DEV_MAXCONF(in_dev, attr) \
+- (max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr)))
++ (max(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr), IN_DEV_CONF_GET((in_dev), attr)))
+
+ #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING)
+-#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL(MC_FORWARDING) && \
++#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, MC_FORWARDING) && \
+ IPV4_DEVCONF((in_dev)->cnf, \
+ MC_FORWARDING))
+ #define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER)
+@@ -127,15 +125,15 @@
+ extern int register_inetaddr_notifier(struct notifier_block *nb);
+ extern int unregister_inetaddr_notifier(struct notifier_block *nb);
+
+-extern struct net_device *ip_dev_find(__be32 addr);
++extern struct net_device *ip_dev_find(struct net *net, __be32 addr);
+ extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
+-extern int devinet_ioctl(unsigned int cmd, void __user *);
++extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+ extern void devinet_init(void);
+-extern struct in_device *inetdev_by_index(int);
++extern struct in_device *inetdev_by_index(struct net *, int);
+ extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
+-extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope);
++extern __be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
+-extern void inet_forward_change(void);
++extern void inet_forward_change(struct net *net);
+
+ static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
+ {
+diff -Nurb linux-2.6.22-570/include/linux/init_task.h linux-2.6.22-591/include/linux/init_task.h
+--- linux-2.6.22-570/include/linux/init_task.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/init_task.h 2007-12-21 15:36:14.000000000 -0500
+@@ -8,6 +8,8 @@
+ #include <linux/lockdep.h>
+ #include <linux/ipc.h>
+ #include <linux/pid_namespace.h>
++#include <linux/user_namespace.h>
++#include <net/net_namespace.h>
+
+ #define INIT_FDTABLE \
+ { \
+@@ -77,7 +79,9 @@
+ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \
+ .uts_ns = &init_uts_ns, \
+ .mnt_ns = NULL, \
++ .net_ns = &init_net, \
+ INIT_IPC_NS(ipc_ns) \
++ .user_ns = &init_user_ns, \
+ }
+
+ #define INIT_SIGHAND(sighand) { \
+diff -Nurb linux-2.6.22-570/include/linux/io.h linux-2.6.22-591/include/linux/io.h
+--- linux-2.6.22-570/include/linux/io.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/io.h 2007-12-21 15:36:12.000000000 -0500
+@@ -63,32 +63,7 @@
+ void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset,
+ unsigned long size);
+ void devm_iounmap(struct device *dev, void __iomem *addr);
+-
+-/**
+- * check_signature - find BIOS signatures
+- * @io_addr: mmio address to check
+- * @signature: signature block
+- * @length: length of signature
+- *
+- * Perform a signature comparison with the mmio address io_addr. This
+- * address should have been obtained by ioremap.
+- * Returns 1 on a match.
+- */
+-
+-static inline int check_signature(const volatile void __iomem *io_addr,
+- const unsigned char *signature, int length)
+-{
+- int retval = 0;
+- do {
+- if (readb(io_addr) != *signature)
+- goto out;
+- io_addr++;
+- signature++;
+- length--;
+- } while (length);
+- retval = 1;
+-out:
+- return retval;
+-}
++int check_signature(const volatile void __iomem *io_addr,
++ const unsigned char *signature, int length);
+
+ #endif /* _LINUX_IO_H */
+diff -Nurb linux-2.6.22-570/include/linux/ip_mp_alg.h linux-2.6.22-591/include/linux/ip_mp_alg.h
+--- linux-2.6.22-570/include/linux/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,22 +0,0 @@
+-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#ifndef _LINUX_IP_MP_ALG_H
+-#define _LINUX_IP_MP_ALG_H
+-
+-enum ip_mp_alg {
+- IP_MP_ALG_NONE,
+- IP_MP_ALG_RR,
+- IP_MP_ALG_DRR,
+- IP_MP_ALG_RANDOM,
+- IP_MP_ALG_WRANDOM,
+- __IP_MP_ALG_MAX
+-};
+-
+-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
+-
+-#endif /* _LINUX_IP_MP_ALG_H */
+-
+diff -Nurb linux-2.6.22-570/include/linux/ipc.h linux-2.6.22-591/include/linux/ipc.h
+--- linux-2.6.22-570/include/linux/ipc.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/ipc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -93,6 +93,7 @@
+
+ #ifdef CONFIG_SYSVIPC
+ #define INIT_IPC_NS(ns) .ns = &init_ipc_ns,
++extern void free_ipc_ns(struct kref *kref);
+ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+ struct ipc_namespace *ns);
+ #else
+@@ -104,13 +105,9 @@
+ }
+ #endif
+
+-#ifdef CONFIG_IPC_NS
+-extern void free_ipc_ns(struct kref *kref);
+-#endif
+-
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ {
+-#ifdef CONFIG_IPC_NS
++#ifdef CONFIG_SYSVIPC
+ if (ns)
+ kref_get(&ns->kref);
+ #endif
+@@ -119,7 +116,7 @@
+
+ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ {
+-#ifdef CONFIG_IPC_NS
++#ifdef CONFIG_SYSVIPC
+ kref_put(&ns->kref, free_ipc_ns);
+ #endif
+ }
+@@ -127,5 +124,3 @@
+ #endif /* __KERNEL__ */
+
+ #endif /* _LINUX_IPC_H */
+-
+-
+diff -Nurb linux-2.6.22-570/include/linux/ipv6.h linux-2.6.22-591/include/linux/ipv6.h
+--- linux-2.6.22-570/include/linux/ipv6.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/ipv6.h 2007-12-21 15:36:12.000000000 -0500
+@@ -247,7 +247,7 @@
+ __u16 lastopt;
+ __u32 nhoff;
+ __u16 flags;
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ __u16 dsthao;
+ #endif
+
+diff -Nurb linux-2.6.22-570/include/linux/kgdb.h linux-2.6.22-591/include/linux/kgdb.h
+--- linux-2.6.22-570/include/linux/kgdb.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/kgdb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,291 @@
++/*
++ * include/linux/kgdb.h
++ *
++ * This provides the hooks and functions that KGDB needs to share between
++ * the core, I/O and arch-specific portions.
++ *
++ * Author: Amit Kale <amitkale@linsyssoft.com> and
++ * Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2001-2004 (c) Amit S. Kale and 2003-2005 (c) MontaVista Software, Inc.
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++#ifdef __KERNEL__
++#ifndef _KGDB_H_
++#define _KGDB_H_
++
++#include <asm/atomic.h>
++
++#ifdef CONFIG_KGDB
++#include <asm/kgdb.h>
++#include <linux/serial_8250.h>
++#include <linux/linkage.h>
++#include <linux/init.h>
++
++#ifndef CHECK_EXCEPTION_STACK
++#define CHECK_EXCEPTION_STACK() 1
++#endif
++
++struct tasklet_struct;
++struct pt_regs;
++struct task_struct;
++struct uart_port;
++
++#ifdef CONFIG_KGDB_CONSOLE
++extern struct console kgdbcons;
++#endif
++
++/* To enter the debugger explicitly. */
++extern void breakpoint(void);
++extern int kgdb_connected;
++extern int kgdb_may_fault;
++extern struct tasklet_struct kgdb_tasklet_breakpoint;
++
++extern atomic_t kgdb_setting_breakpoint;
++extern atomic_t cpu_doing_single_step;
++extern atomic_t kgdb_sync_softlockup[NR_CPUS];
++
++extern struct task_struct *kgdb_usethread, *kgdb_contthread;
++
++enum kgdb_bptype {
++ bp_breakpoint = '0',
++ bp_hardware_breakpoint,
++ bp_write_watchpoint,
++ bp_read_watchpoint,
++ bp_access_watchpoint
++};
++
++enum kgdb_bpstate {
++ bp_none = 0,
++ bp_removed,
++ bp_set,
++ bp_active
++};
++
++struct kgdb_bkpt {
++ unsigned long bpt_addr;
++ unsigned char saved_instr[BREAK_INSTR_SIZE];
++ enum kgdb_bptype type;
++ enum kgdb_bpstate state;
++};
++
++/* The maximum number of KGDB I/O modules that can be loaded */
++#define MAX_KGDB_IO_HANDLERS 3
++
++#ifndef MAX_BREAKPOINTS
++#define MAX_BREAKPOINTS 1000
++#endif
++
++#define KGDB_HW_BREAKPOINT 1
++
++/* Required functions. */
++/**
++ * kgdb_arch_init - Perform any architecture specific initalization.
++ *
++ * This function will handle the initalization of any architecture
++ * specific hooks.
++ */
++extern int kgdb_arch_init(void);
++
++/**
++ * regs_to_gdb_regs - Convert ptrace regs to GDB regs
++ * @gdb_regs: A pointer to hold the registers in the order GDB wants.
++ * @regs: The &struct pt_regs of the current process.
++ *
++ * Convert the pt_regs in @regs into the format for registers that
++ * GDB expects, stored in @gdb_regs.
++ */
++extern void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs);
++
++/**
++ * sleeping_regs_to_gdb_regs - Convert ptrace regs to GDB regs
++ * @gdb_regs: A pointer to hold the registers in the order GDB wants.
++ * @p: The &struct task_struct of the desired process.
++ *
++ * Convert the register values of the sleeping process in @p to
++ * the format that GDB expects.
++ * This function is called when kgdb does not have access to the
++ * &struct pt_regs and therefore it should fill the gdb registers
++ * @gdb_regs with what has been saved in &struct thread_struct
++ * thread field during switch_to.
++ */
++extern void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
++ struct task_struct *p);
++
++/**
++ * gdb_regs_to_regs - Convert GDB regs to ptrace regs.
++ * @gdb_regs: A pointer to hold the registers we've recieved from GDB.
++ * @regs: A pointer to a &struct pt_regs to hold these values in.
++ *
++ * Convert the GDB regs in @gdb_regs into the pt_regs, and store them
++ * in @regs.
++ */
++extern void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs);
++
++/**
++ * kgdb_arch_handle_exception - Handle architecture specific GDB packets.
++ * @vector: The error vector of the exception that happened.
++ * @signo: The signal number of the exception that happened.
++ * @err_code: The error code of the exception that happened.
++ * @remcom_in_buffer: The buffer of the packet we have read.
++ * @remcom_out_buffer: The buffer, of %BUFMAX to write a packet into.
++ * @regs: The &struct pt_regs of the current process.
++ *
++ * This function MUST handle the 'c' and 's' command packets,
++ * as well packets to set / remove a hardware breakpoint, if used.
++ * If there are additional packets which the hardware needs to handle,
++ * they are handled here. The code should return -1 if it wants to
++ * process more packets, and a %0 or %1 if it wants to exit from the
++ * kgdb hook.
++ */
++extern int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++ char *remcom_in_buffer,
++ char *remcom_out_buffer,
++ struct pt_regs *regs);
++
++/**
++ * kgdb_roundup_cpus - Get other CPUs into a holding pattern
++ * @flags: Current IRQ state
++ *
++ * On SMP systems, we need to get the attention of the other CPUs
++ * and get them be in a known state. This should do what is needed
++ * to get the other CPUs to call kgdb_wait(). Note that on some arches,
++ * the NMI approach is not used for rounding up all the CPUs. For example,
++ * in case of MIPS, smp_call_function() is used to roundup CPUs. In
++ * this case, we have to make sure that interrupts are enabled before
++ * calling smp_call_function(). The argument to this function is
++ * the flags that will be used when restoring the interrupts. There is
++ * local_irq_save() call before kgdb_roundup_cpus().
++ *
++ * On non-SMP systems, this is not called.
++ */
++extern void kgdb_roundup_cpus(unsigned long flags);
++
++#ifndef JMP_REGS_ALIGNMENT
++#define JMP_REGS_ALIGNMENT
++#endif
++
++extern unsigned long kgdb_fault_jmp_regs[];
++
++/**
++ * kgdb_fault_setjmp - Store state in case we fault.
++ * @curr_context: An array to store state into.
++ *
++ * Certain functions may try and access memory, and in doing so may
++ * cause a fault. When this happens, we trap it, restore state to
++ * this call, and let ourself know that something bad has happened.
++ */
++extern asmlinkage int kgdb_fault_setjmp(unsigned long *curr_context);
++
++/**
++ * kgdb_fault_longjmp - Restore state when we have faulted.
++ * @curr_context: The previously stored state.
++ *
++ * When something bad does happen, this function is called to
++ * restore the known good state, and set the return value to 1, so
++ * we know something bad happened.
++ */
++extern asmlinkage void kgdb_fault_longjmp(unsigned long *curr_context);
++
++/* Optional functions. */
++extern int kgdb_validate_break_address(unsigned long addr);
++extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr);
++extern int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle);
++
++/**
++ * struct kgdb_arch - Desribe architecture specific values.
++ * @gdb_bpt_instr: The instruction to trigger a breakpoint.
++ * @flags: Flags for the breakpoint, currently just %KGDB_HW_BREAKPOINT.
++ * @shadowth: A value of %1 indicates we shadow information on processes.
++ * @set_breakpoint: Allow an architecture to specify how to set a software
++ * breakpoint.
++ * @remove_breakpoint: Allow an architecture to specify how to remove a
++ * software breakpoint.
++ * @set_hw_breakpoint: Allow an architecture to specify how to set a hardware
++ * breakpoint.
++ * @remove_hw_breakpoint: Allow an architecture to specify how to remove a
++ * hardware breakpoint.
++ *
++ * The @shadowth flag is an option to shadow information not retrievable by
++ * gdb otherwise. This is deprecated in favor of a binutils which supports
++ * CFI macros.
++ */
++struct kgdb_arch {
++ unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE];
++ unsigned long flags;
++ unsigned shadowth;
++ int (*set_breakpoint) (unsigned long, char *);
++ int (*remove_breakpoint)(unsigned long, char *);
++ int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
++ int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
++ void (*remove_all_hw_break)(void);
++ void (*correct_hw_break)(void);
++};
++
++/* Thread reference */
++typedef unsigned char threadref[8];
++
++/**
++ * struct kgdb_io - Desribe the interface for an I/O driver to talk with KGDB.
++ * @read_char: Pointer to a function that will return one char.
++ * @write_char: Pointer to a function that will write one char.
++ * @flush: Pointer to a function that will flush any pending writes.
++ * @init: Pointer to a function that will initialize the device.
++ * @late_init: Pointer to a function that will do any setup that has
++ * other dependencies.
++ * @pre_exception: Pointer to a function that will do any prep work for
++ * the I/O driver.
++ * @post_exception: Pointer to a function that will do any cleanup work
++ * for the I/O driver.
++ *
++ * The @init and @late_init function pointers allow for an I/O driver
++ * such as a serial driver to fully initialize the port with @init and
++ * be called very early, yet safely call request_irq() later in the boot
++ * sequence.
++ *
++ * @init is allowed to return a non-0 return value to indicate failure.
++ * If this is called early on, then KGDB will try again when it would call
++ * @late_init. If it has failed later in boot as well, the user will be
++ * notified.
++ */
++struct kgdb_io {
++ int (*read_char) (void);
++ void (*write_char) (u8);
++ void (*flush) (void);
++ int (*init) (void);
++ void (*late_init) (void);
++ void (*pre_exception) (void);
++ void (*post_exception) (void);
++};
++
++extern struct kgdb_io kgdb_io_ops;
++extern struct kgdb_arch arch_kgdb_ops;
++extern int kgdb_initialized;
++
++extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
++extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
++
++extern void __init kgdb8250_add_port(int i, struct uart_port *serial_req);
++extern void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *serial_req);
++
++extern int kgdb_hex2long(char **ptr, long *long_val);
++extern char *kgdb_mem2hex(char *mem, char *buf, int count);
++extern char *kgdb_hex2mem(char *buf, char *mem, int count);
++extern int kgdb_get_mem(char *addr, unsigned char *buf, int count);
++extern int kgdb_set_mem(char *addr, unsigned char *buf, int count);
++
++int kgdb_isremovedbreak(unsigned long addr);
++
++extern int kgdb_handle_exception(int ex_vector, int signo, int err_code,
++ struct pt_regs *regs);
++extern void kgdb_nmihook(int cpu, void *regs);
++extern int debugger_step;
++extern atomic_t debugger_active;
++#else
++/* Stubs for when KGDB is not set. */
++static const atomic_t debugger_active = ATOMIC_INIT(0);
++#endif /* CONFIG_KGDB */
++#endif /* _KGDB_H_ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/linux/kmod.h linux-2.6.22-591/include/linux/kmod.h
+--- linux-2.6.22-570/include/linux/kmod.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/kmod.h 2007-12-21 15:36:12.000000000 -0500
+@@ -36,13 +36,57 @@
+ #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
+
+ struct key;
+-extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[],
+- struct key *session_keyring, int wait);
++struct file;
++struct subprocess_info;
++
++/* Allocate a subprocess_info structure */
++struct subprocess_info *call_usermodehelper_setup(char *path,
++ char **argv, char **envp);
++
++/* Set various pieces of state into the subprocess_info structure */
++void call_usermodehelper_setkeys(struct subprocess_info *info,
++ struct key *session_keyring);
++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
++ struct file **filp);
++void call_usermodehelper_setcleanup(struct subprocess_info *info,
++ void (*cleanup)(char **argv, char **envp));
++
++enum umh_wait {
++ UMH_NO_WAIT = -1, /* don't wait at all */
++ UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */
++ UMH_WAIT_PROC = 1, /* wait for the process to complete */
++};
++
++/* Actually execute the sub-process */
++int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
++
++/* Free the subprocess_info. This is only needed if you're not going
++ to call call_usermodehelper_exec */
++void call_usermodehelper_freeinfo(struct subprocess_info *info);
+
+ static inline int
+-call_usermodehelper(char *path, char **argv, char **envp, int wait)
++call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
+ {
+- return call_usermodehelper_keys(path, argv, envp, NULL, wait);
++ struct subprocess_info *info;
++
++ info = call_usermodehelper_setup(path, argv, envp);
++ if (info == NULL)
++ return -ENOMEM;
++ return call_usermodehelper_exec(info, wait);
++}
++
++static inline int
++call_usermodehelper_keys(char *path, char **argv, char **envp,
++ struct key *session_keyring, enum umh_wait wait)
++{
++ struct subprocess_info *info;
++
++ info = call_usermodehelper_setup(path, argv, envp);
++ if (info == NULL)
++ return -ENOMEM;
++
++ call_usermodehelper_setkeys(info, session_keyring);
++ return call_usermodehelper_exec(info, wait);
+ }
+
+ extern void usermodehelper_init(void);
+diff -Nurb linux-2.6.22-570/include/linux/kobject.h linux-2.6.22-591/include/linux/kobject.h
+--- linux-2.6.22-570/include/linux/kobject.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/kobject.h 2007-12-21 15:36:14.000000000 -0500
+@@ -55,7 +55,7 @@
+ struct kobject * parent;
+ struct kset * kset;
+ struct kobj_type * ktype;
+- struct dentry * dentry;
++ struct sysfs_dirent * sd;
+ wait_queue_head_t poll;
+ };
+
+@@ -71,13 +71,9 @@
+ extern void kobject_cleanup(struct kobject *);
+
+ extern int __must_check kobject_add(struct kobject *);
+-extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *);
+ extern void kobject_del(struct kobject *);
+
+ extern int __must_check kobject_rename(struct kobject *, const char *new_name);
+-extern int __must_check kobject_shadow_rename(struct kobject *kobj,
+- struct dentry *new_parent,
+- const char *new_name);
+ extern int __must_check kobject_move(struct kobject *, struct kobject *);
+
+ extern int __must_check kobject_register(struct kobject *);
+diff -Nurb linux-2.6.22-570/include/linux/ktime.h linux-2.6.22-591/include/linux/ktime.h
+--- linux-2.6.22-570/include/linux/ktime.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/ktime.h 2007-12-21 15:36:12.000000000 -0500
+@@ -279,6 +279,16 @@
+ return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+ }
+
++static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
++{
++ return ktime_to_us(ktime_sub(later, earlier));
++}
++
++static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
++{
++ return ktime_add_ns(kt, usec * 1000);
++}
++
+ /*
+ * The resolution of the clocks. The resolution value is returned in
+ * the clock_getres() system call to give application programmers an
+diff -Nurb linux-2.6.22-570/include/linux/magic.h linux-2.6.22-591/include/linux/magic.h
+--- linux-2.6.22-570/include/linux/magic.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/magic.h 2007-12-21 15:36:12.000000000 -0500
+@@ -36,8 +36,12 @@
+ #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
+ #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
+ #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
++#define REVOKEFS_MAGIC 0x5245564B /* REVK */
++
++#define UNIONFS_SUPER_MAGIC 0xf15f083d
+
+ #define SMB_SUPER_MAGIC 0x517B
+ #define USBDEVICE_SUPER_MAGIC 0x9fa2
++#define CONTAINER_SUPER_MAGIC 0x27e0eb
+
+ #endif /* __LINUX_MAGIC_H__ */
+diff -Nurb linux-2.6.22-570/include/linux/mempolicy.h linux-2.6.22-591/include/linux/mempolicy.h
+--- linux-2.6.22-570/include/linux/mempolicy.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mempolicy.h 2007-12-21 15:36:12.000000000 -0500
+@@ -148,18 +148,10 @@
+ const nodemask_t *new);
+ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
+ extern void mpol_fix_fork_child_flag(struct task_struct *p);
+-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
+-
+-#ifdef CONFIG_CPUSETS
+-#define current_cpuset_is_being_rebound() \
+- (cpuset_being_rebound == current->cpuset)
+-#else
+-#define current_cpuset_is_being_rebound() 0
+-#endif
+
+ extern struct mempolicy default_policy;
+ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+- unsigned long addr);
++ unsigned long addr, gfp_t gfp_flags);
+ extern unsigned slab_node(struct mempolicy *policy);
+
+ extern enum zone_type policy_zone;
+@@ -173,8 +165,6 @@
+ int do_migrate_pages(struct mm_struct *mm,
+ const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+
+-extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */
+-
+ #else
+
+ struct mempolicy {};
+@@ -253,12 +243,10 @@
+ {
+ }
+
+-#define set_cpuset_being_rebound(x) do {} while (0)
+-
+ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+- unsigned long addr)
++ unsigned long addr, gfp_t gfp_flags)
+ {
+- return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
++ return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
+ }
+
+ static inline int do_migrate_pages(struct mm_struct *mm,
+diff -Nurb linux-2.6.22-570/include/linux/mm.h linux-2.6.22-591/include/linux/mm.h
+--- linux-2.6.22-570/include/linux/mm.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mm.h 2007-12-21 15:36:14.000000000 -0500
+@@ -42,6 +42,8 @@
+
+ #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+
++#define VM_REVOKED 0x20000000 /* Mapping has been revoked */
++
+ /*
+ * Linux kernel virtual memory manager primitives.
+ * The idea being to have a "virtual" mm in the same way
+@@ -170,6 +172,13 @@
+ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
+ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
+
++#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated,
++ * eg. truncate or invalidate_inode_*.
++ * In this case, do_no_page must
++ * return with the page locked.
++ */
++#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */
++
+ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+ #endif
+@@ -192,6 +201,25 @@
+ */
+ extern pgprot_t protection_map[16];
+
++#define FAULT_FLAG_WRITE 0x01
++#define FAULT_FLAG_NONLINEAR 0x02
++
++/*
++ * fault_data is filled in the the pagefault handler and passed to the
++ * vma's ->fault function. That function is responsible for filling in
++ * 'type', which is the type of fault if a page is returned, or the type
++ * of error if NULL is returned.
++ *
++ * pgoff should be used in favour of address, if possible. If pgoff is
++ * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get
++ * nonlinear mapping support.
++ */
++struct fault_data {
++ unsigned long address;
++ pgoff_t pgoff;
++ unsigned int flags;
++ int type;
++};
+
+ /*
+ * These are the virtual MM functions - opening of an area, closing and
+@@ -201,9 +229,15 @@
+ struct vm_operations_struct {
+ void (*open)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct * area);
+- struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
+- unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
+- int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
++ struct page *(*fault)(struct vm_area_struct *vma,
++ struct fault_data *fdata);
++ struct page *(*nopage)(struct vm_area_struct *area,
++ unsigned long address, int *type);
++ unsigned long (*nopfn)(struct vm_area_struct *area,
++ unsigned long address);
++ int (*populate)(struct vm_area_struct *area, unsigned long address,
++ unsigned long len, pgprot_t prot, unsigned long pgoff,
++ int nonblock);
+
+ /* notification that a previously read-only page is about to become
+ * writable, if an error is returned it will cause a SIGBUS */
+@@ -656,7 +690,6 @@
+ */
+ #define NOPAGE_SIGBUS (NULL)
+ #define NOPAGE_OOM ((struct page *) (-1))
+-#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */
+
+ /*
+ * Error return values for the *_nopfn functions
+@@ -744,6 +777,16 @@
+ struct vm_area_struct *start_vma, unsigned long start_addr,
+ unsigned long end_addr, unsigned long *nr_accounted,
+ struct zap_details *);
++
++struct mm_walk {
++ int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
++ int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
++ int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
++ int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
++};
++
++int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end,
++ struct mm_walk *walk, void *private);
+ void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+ unsigned long end, unsigned long floor, unsigned long ceiling);
+ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
+@@ -1058,6 +1101,7 @@
+ extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
+ struct rb_node **, struct rb_node *);
++extern void __unlink_file_vma(struct vm_area_struct *);
+ extern void unlink_file_vma(struct vm_area_struct *);
+ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
+ unsigned long addr, unsigned long len, pgoff_t pgoff);
+@@ -1097,9 +1141,11 @@
+ loff_t lstart, loff_t lend);
+
+ /* generic vm_area_ops exported for stackable file systems */
+-extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
+-extern int filemap_populate(struct vm_area_struct *, unsigned long,
+- unsigned long, pgprot_t, unsigned long, int);
++extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *);
++extern struct page * __deprecated_for_modules
++filemap_nopage(struct vm_area_struct *, unsigned long, int *);
++extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
++ unsigned long, unsigned long, pgprot_t, unsigned long, int);
+
+ /* mm/page-writeback.c */
+ int write_one_page(struct page *page, int wait);
+@@ -1199,6 +1245,7 @@
+ void __user *, size_t *, loff_t *);
+ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+ unsigned long lru_pages);
++extern void drop_pagecache_sb(struct super_block *);
+ void drop_pagecache(void);
+ void drop_slab(void);
+
+diff -Nurb linux-2.6.22-570/include/linux/mmc/card.h linux-2.6.22-591/include/linux/mmc/card.h
+--- linux-2.6.22-570/include/linux/mmc/card.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mmc/card.h 2007-12-21 15:36:12.000000000 -0500
+@@ -72,6 +72,7 @@
+ #define MMC_STATE_READONLY (1<<1) /* card is read-only */
+ #define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */
+ #define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */
++#define MMC_STATE_LOCKED (1<<4) /* card is currently locked */
+ u32 raw_cid[4]; /* raw card CID */
+ u32 raw_csd[4]; /* raw card CSD */
+ u32 raw_scr[2]; /* raw card SCR */
+@@ -89,11 +90,16 @@
+ #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY)
+ #define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED)
+ #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR)
++#define mmc_card_locked(c) ((c)->state & MMC_STATE_LOCKED)
++
++#define mmc_card_lockable(c) (((c)->csd.cmdclass & CCC_LOCK_CARD) && \
++ ((c)->host->caps & MMC_CAP_BYTEBLOCK))
+
+ #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT)
+ #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
+ #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
+ #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
++#define mmc_card_set_locked(c) ((c)->state |= MMC_STATE_LOCKED)
+
+ #define mmc_card_name(c) ((c)->cid.prod_name)
+ #define mmc_card_id(c) ((c)->dev.bus_id)
+diff -Nurb linux-2.6.22-570/include/linux/mmc/mmc.h linux-2.6.22-591/include/linux/mmc/mmc.h
+--- linux-2.6.22-570/include/linux/mmc/mmc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mmc/mmc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -253,5 +253,13 @@
+ #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */
+ #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */
+
++/*
++ * MMC_LOCK_UNLOCK modes
++ */
++#define MMC_LOCK_MODE_ERASE (1<<3)
++#define MMC_LOCK_MODE_UNLOCK (1<<2)
++#define MMC_LOCK_MODE_CLR_PWD (1<<1)
++#define MMC_LOCK_MODE_SET_PWD (1<<0)
++
+ #endif /* MMC_MMC_PROTOCOL_H */
+
+diff -Nurb linux-2.6.22-570/include/linux/mmzone.h linux-2.6.22-591/include/linux/mmzone.h
+--- linux-2.6.22-570/include/linux/mmzone.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mmzone.h 2007-12-21 15:36:12.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/seqlock.h>
+ #include <linux/nodemask.h>
++#include <linux/pageblock-flags.h>
+ #include <asm/atomic.h>
+ #include <asm/page.h>
+
+@@ -24,8 +25,24 @@
+ #endif
+ #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++#define MIGRATE_UNMOVABLE 0
++#define MIGRATE_RECLAIMABLE 1
++#define MIGRATE_MOVABLE 2
++#define MIGRATE_TYPES 3
++#else
++#define MIGRATE_UNMOVABLE 0
++#define MIGRATE_UNRECLAIMABLE 0
++#define MIGRATE_MOVABLE 0
++#define MIGRATE_TYPES 1
++#endif
++
++#define for_each_migratetype_order(order, type) \
++ for (order = 0; order < MAX_ORDER; order++) \
++ for (type = 0; type < MIGRATE_TYPES; type++)
++
+ struct free_area {
+- struct list_head free_list;
++ struct list_head free_list[MIGRATE_TYPES];
+ unsigned long nr_free;
+ };
+
+@@ -213,6 +230,14 @@
+ #endif
+ struct free_area free_area[MAX_ORDER];
+
++#ifndef CONFIG_SPARSEMEM
++ /*
++ * Flags for a MAX_ORDER_NR_PAGES block. See pageblock-flags.h.
++ * In SPARSEMEM, this map is stored in struct mem_section
++ */
++ unsigned long *pageblock_flags;
++#endif /* CONFIG_SPARSEMEM */
++
+
+ ZONE_PADDING(_pad1_)
+
+@@ -468,6 +493,7 @@
+ void get_zone_counts(unsigned long *active, unsigned long *inactive,
+ unsigned long *free);
+ void build_all_zonelists(void);
++void raise_kswapd_order(unsigned int order);
+ void wakeup_kswapd(struct zone *zone, int order);
+ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags);
+@@ -662,6 +688,9 @@
+ #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT)
+ #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
+
++#define SECTION_BLOCKFLAGS_BITS \
++ ((SECTION_SIZE_BITS - (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS)
++
+ #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
+ #error Allocator MAX_ORDER exceeds SECTION_SIZE
+ #endif
+@@ -681,6 +710,7 @@
+ * before using it wrong.
+ */
+ unsigned long section_mem_map;
++ DECLARE_BITMAP(pageblock_flags, SECTION_BLOCKFLAGS_BITS);
+ };
+
+ #ifdef CONFIG_SPARSEMEM_EXTREME
+diff -Nurb linux-2.6.22-570/include/linux/mnt_namespace.h linux-2.6.22-591/include/linux/mnt_namespace.h
+--- linux-2.6.22-570/include/linux/mnt_namespace.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/mnt_namespace.h 2007-12-21 15:36:12.000000000 -0500
+@@ -14,7 +14,7 @@
+ int event;
+ };
+
+-extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *,
++extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
+ struct fs_struct *);
+ extern void __put_mnt_ns(struct mnt_namespace *ns);
+
+diff -Nurb linux-2.6.22-570/include/linux/module.h linux-2.6.22-591/include/linux/module.h
+--- linux-2.6.22-570/include/linux/module.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/module.h 2007-12-21 15:36:12.000000000 -0500
+@@ -227,8 +227,17 @@
+ MODULE_STATE_LIVE,
+ MODULE_STATE_COMING,
+ MODULE_STATE_GOING,
++ MODULE_STATE_GONE,
+ };
+
++#ifdef CONFIG_KGDB
++#define MAX_SECTNAME 31
++struct mod_section {
++ void *address;
++ char name[MAX_SECTNAME + 1];
++};
++#endif
++
+ /* Similar stuff for section attributes. */
+ struct module_sect_attr
+ {
+@@ -256,6 +265,13 @@
+ /* Unique handle for this module */
+ char name[MODULE_NAME_LEN];
+
++#ifdef CONFIG_KGDB
++ /* keep kgdb info at the begining so that gdb doesn't have a chance to
++ * miss out any fields */
++ unsigned long num_sections;
++ struct mod_section *mod_sections;
++#endif
++
+ /* Sysfs stuff. */
+ struct module_kobject mkobj;
+ struct module_param_attrs *param_attrs;
+diff -Nurb linux-2.6.22-570/include/linux/namei.h linux-2.6.22-591/include/linux/namei.h
+--- linux-2.6.22-570/include/linux/namei.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/namei.h 2007-12-21 15:36:12.000000000 -0500
+@@ -3,6 +3,7 @@
+
+ #include <linux/dcache.h>
+ #include <linux/linkage.h>
++#include <linux/mount.h>
+
+ struct vfsmount;
+
+@@ -81,9 +82,16 @@
+ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
+ extern void release_open_intent(struct nameidata *);
+
+-extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
++extern struct dentry * lookup_one_len_nd(const char *, struct dentry *,
++ int, struct nameidata *);
+ extern struct dentry *lookup_one_len_kern(const char *, struct dentry *, int);
+
++static inline struct dentry *lookup_one_len(const char *name,
++ struct dentry *dir, int len)
++{
++ return lookup_one_len_nd(name, dir, len, NULL);
++}
++
+ extern int follow_down(struct vfsmount **, struct dentry **);
+ extern int follow_up(struct vfsmount **, struct dentry **);
+
+@@ -100,4 +108,16 @@
+ return nd->saved_names[nd->depth];
+ }
+
++static inline void pathget(struct path *path)
++{
++ mntget(path->mnt);
++ dget(path->dentry);
++}
++
++static inline void pathput(struct path *path)
++{
++ dput(path->dentry);
++ mntput(path->mnt);
++}
++
+ #endif /* _LINUX_NAMEI_H */
+diff -Nurb linux-2.6.22-570/include/linux/net.h linux-2.6.22-591/include/linux/net.h
+--- linux-2.6.22-570/include/linux/net.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/net.h 2007-12-21 15:36:14.000000000 -0500
+@@ -23,6 +23,7 @@
+
+ struct poll_table_struct;
+ struct inode;
++struct net;
+
+ #define NPROTO 34 /* should be enough for now.. */
+
+@@ -170,7 +171,7 @@
+
+ struct net_proto_family {
+ int family;
+- int (*create)(struct socket *sock, int protocol);
++ int (*create)(struct net *net, struct socket *sock, int protocol);
+ struct module *owner;
+ };
+
+diff -Nurb linux-2.6.22-570/include/linux/netdevice.h linux-2.6.22-591/include/linux/netdevice.h
+--- linux-2.6.22-570/include/linux/netdevice.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/linux/netdevice.h 2007-12-21 15:36:14.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <linux/percpu.h>
+ #include <linux/dmaengine.h>
+
++struct net;
+ struct vlan_group;
+ struct ethtool_ops;
+ struct netpoll_info;
+@@ -314,9 +315,10 @@
+ /* Net device features */
+ unsigned long features;
+ #define NETIF_F_SG 1 /* Scatter/gather IO. */
+-#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */
++#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */
+ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
+ #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */
++#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */
+ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */
+ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */
+ #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */
+@@ -325,6 +327,7 @@
+ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
+ #define NETIF_F_GSO 2048 /* Enable software GSO. */
+ #define NETIF_F_LLTX 4096 /* LockLess TX */
++#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */
+
+ /* Segmentation offload features */
+ #define NETIF_F_GSO_SHIFT 16
+@@ -338,8 +341,11 @@
+ /* List of features with software fallbacks. */
+ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
+
++
+ #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
+-#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
++#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
++#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
++#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+
+ struct net_device *next_sched;
+
+@@ -533,6 +539,9 @@
+ void (*poll_controller)(struct net_device *dev);
+ #endif
+
++ /* Network namespace this network device is inside */
++ struct net *nd_net;
++
+ /* bridge stuff */
+ struct net_bridge_port *br_port;
+
+@@ -540,13 +549,16 @@
+ struct device dev;
+ /* space for optional statistics and wireless sysfs groups */
+ struct attribute_group *sysfs_groups[3];
++
++ /* rtnetlink link ops */
++ const struct rtnl_link_ops *rtnl_link_ops;
+ };
+ #define to_net_dev(d) container_of(d, struct net_device, dev)
+
+ #define NETDEV_ALIGN 32
+ #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1)
+
+-static inline void *netdev_priv(struct net_device *dev)
++static inline void *netdev_priv(const struct net_device *dev)
+ {
+ return (char *)dev + ((sizeof(struct net_device)
+ + NETDEV_ALIGN_CONST)
+@@ -576,45 +588,48 @@
+ #include <linux/interrupt.h>
+ #include <linux/notifier.h>
+
+-extern struct net_device loopback_dev; /* The loopback */
+-extern struct list_head dev_base_head; /* All devices */
+ extern rwlock_t dev_base_lock; /* Device list lock */
+
+-#define for_each_netdev(d) \
+- list_for_each_entry(d, &dev_base_head, dev_list)
+-#define for_each_netdev_safe(d, n) \
+- list_for_each_entry_safe(d, n, &dev_base_head, dev_list)
+-#define for_each_netdev_continue(d) \
+- list_for_each_entry_continue(d, &dev_base_head, dev_list)
+-#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
+-
+-static inline struct net_device *next_net_device(struct net_device *dev)
+-{
+- struct list_head *lh;
+
+- lh = dev->dev_list.next;
+- return lh == &dev_base_head ? NULL : net_device_entry(lh);
+-}
++#define for_each_netdev(net, d) \
++ list_for_each_entry(d, &(net)->dev_base_head, dev_list)
++#define for_each_netdev_safe(net, d, n) \
++ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
++#define for_each_netdev_continue(net, d) \
++ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
++#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
+
+-static inline struct net_device *first_net_device(void)
+-{
+- return list_empty(&dev_base_head) ? NULL :
+- net_device_entry(dev_base_head.next);
+-}
++#define next_net_device(d) \
++({ \
++ struct net_device *dev = d; \
++ struct list_head *lh; \
++ struct net *net; \
++ \
++ net = dev->nd_net; \
++ lh = dev->dev_list.next; \
++ lh == &net->dev_base_head ? NULL : net_device_entry(lh); \
++})
++
++#define first_net_device(N) \
++({ \
++ struct net *NET = (N); \
++ list_empty(&NET->dev_base_head) ? NULL : \
++ net_device_entry(NET->dev_base_head.next); \
++})
+
+ extern int netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long netdev_boot_base(const char *prefix, int unit);
+-extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+-extern struct net_device *dev_getfirstbyhwtype(unsigned short type);
+-extern struct net_device *__dev_getfirstbyhwtype(unsigned short type);
++extern struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr);
++extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
++extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
+ extern void dev_add_pack(struct packet_type *pt);
+ extern void dev_remove_pack(struct packet_type *pt);
+ extern void __dev_remove_pack(struct packet_type *pt);
+
+-extern struct net_device *dev_get_by_flags(unsigned short flags,
++extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags,
+ unsigned short mask);
+-extern struct net_device *dev_get_by_name(const char *name);
+-extern struct net_device *__dev_get_by_name(const char *name);
++extern struct net_device *dev_get_by_name(struct net *net, const char *name);
++extern struct net_device *__dev_get_by_name(struct net *net, const char *name);
+ extern int dev_alloc_name(struct net_device *dev, const char *name);
+ extern int dev_open(struct net_device *dev);
+ extern int dev_close(struct net_device *dev);
+@@ -625,9 +640,9 @@
+ extern void synchronize_net(void);
+ extern int register_netdevice_notifier(struct notifier_block *nb);
+ extern int unregister_netdevice_notifier(struct notifier_block *nb);
+-extern int call_netdevice_notifiers(unsigned long val, void *v);
+-extern struct net_device *dev_get_by_index(int ifindex);
+-extern struct net_device *__dev_get_by_index(int ifindex);
++extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
++extern struct net_device *dev_get_by_index(struct net *net, int ifindex);
++extern struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+ extern int dev_restart(struct net_device *dev);
+ #ifdef CONFIG_NETPOLL_TRAP
+ extern int netpoll_trap(void);
+@@ -732,11 +747,13 @@
+ #define HAVE_NETIF_RECEIVE_SKB 1
+ extern int netif_receive_skb(struct sk_buff *skb);
+ extern int dev_valid_name(const char *name);
+-extern int dev_ioctl(unsigned int cmd, void __user *);
+-extern int dev_ethtool(struct ifreq *);
++extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
++extern int dev_ethtool(struct net *net, struct ifreq *);
+ extern unsigned dev_get_flags(const struct net_device *);
+ extern int dev_change_flags(struct net_device *, unsigned);
+ extern int dev_change_name(struct net_device *, char *);
++extern int dev_change_net_namespace(struct net_device *,
++ struct net *, const char *);
+ extern int dev_set_mtu(struct net_device *, int);
+ extern int dev_set_mac_address(struct net_device *,
+ struct sockaddr *);
+@@ -1006,7 +1023,7 @@
+ extern void netdev_state_change(struct net_device *dev);
+ extern void netdev_features_change(struct net_device *dev);
+ /* Load a device via the kmod */
+-extern void dev_load(const char *name);
++extern void dev_load(struct net *net, const char *name);
+ extern void dev_mcast_init(void);
+ extern int netdev_max_backlog;
+ extern int weight_p;
+diff -Nurb linux-2.6.22-570/include/linux/netfilter/x_tables.h linux-2.6.22-591/include/linux/netfilter/x_tables.h
+--- linux-2.6.22-570/include/linux/netfilter/x_tables.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/netfilter/x_tables.h 2007-12-21 15:36:14.000000000 -0500
+@@ -289,7 +289,7 @@
+ unsigned int size, const char *table, unsigned int hook,
+ unsigned short proto, int inv_proto);
+
+-extern int xt_register_table(struct xt_table *table,
++extern int xt_register_table(struct net *net, struct xt_table *table,
+ struct xt_table_info *bootstrap,
+ struct xt_table_info *newinfo);
+ extern void *xt_unregister_table(struct xt_table *table);
+@@ -306,7 +306,7 @@
+ extern int xt_find_revision(int af, const char *name, u8 revision, int target,
+ int *err);
+
+-extern struct xt_table *xt_find_table_lock(int af, const char *name);
++extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name);
+ extern void xt_table_unlock(struct xt_table *t);
+
+ extern int xt_proto_init(int af);
+diff -Nurb linux-2.6.22-570/include/linux/netfilter.h linux-2.6.22-591/include/linux/netfilter.h
+--- linux-2.6.22-570/include/linux/netfilter.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/netfilter.h 2007-12-21 15:36:14.000000000 -0500
+@@ -362,11 +362,6 @@
+ #endif
+ }
+
+-#ifdef CONFIG_PROC_FS
+-#include <linux/proc_fs.h>
+-extern struct proc_dir_entry *proc_net_netfilter;
+-#endif
+-
+ #else /* !CONFIG_NETFILTER */
+ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+ #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
+diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h 2007-12-21 15:36:14.000000000 -0500
+@@ -292,7 +292,7 @@
+ #include <linux/init.h>
+ extern void ipt_init(void) __init;
+
+-extern int ipt_register_table(struct xt_table *table,
++extern int ipt_register_table(struct net *net, struct xt_table *table,
+ const struct ipt_replace *repl);
+ extern void ipt_unregister_table(struct xt_table *table);
+
+diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4.h linux-2.6.22-591/include/linux/netfilter_ipv4.h
+--- linux-2.6.22-570/include/linux/netfilter_ipv4.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/netfilter_ipv4.h 2007-12-21 15:36:14.000000000 -0500
+@@ -75,7 +75,7 @@
+ #define SO_ORIGINAL_DST 80
+
+ #ifdef __KERNEL__
+-extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
++extern int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type);
+ extern int ip_xfrm_me_harder(struct sk_buff **pskb);
+ extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
+diff -Nurb linux-2.6.22-570/include/linux/netlink.h linux-2.6.22-591/include/linux/netlink.h
+--- linux-2.6.22-570/include/linux/netlink.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/linux/netlink.h 2007-12-21 15:36:14.000000000 -0500
+@@ -21,12 +21,14 @@
+ #define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+ #define NETLINK_GENERIC 16
+-/* leave room for NETLINK_DM (DM Events) */
++#define NETLINK_DM 17 /* Device Mapper */
+ #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+ #define NETLINK_ECRYPTFS 19
+
+ #define MAX_LINKS 32
+
++struct net;
++
+ struct sockaddr_nl
+ {
+ sa_family_t nl_family; /* AF_NETLINK */
+@@ -157,7 +159,8 @@
+ #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
+
+
+-extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
++extern struct sock *netlink_kernel_create(struct net *net,
++ int unit,unsigned int groups,
+ void (*input)(struct sock *sk, int len),
+ struct mutex *cb_mutex,
+ struct module *module);
+@@ -204,6 +207,7 @@
+
+ struct netlink_notify
+ {
++ struct net *net;
+ int pid;
+ int protocol;
+ };
+diff -Nurb linux-2.6.22-570/include/linux/netpoll.h linux-2.6.22-591/include/linux/netpoll.h
+--- linux-2.6.22-570/include/linux/netpoll.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/netpoll.h 2007-12-21 15:36:12.000000000 -0500
+@@ -16,7 +16,7 @@
+ struct net_device *dev;
+ char dev_name[IFNAMSIZ];
+ const char *name;
+- void (*rx_hook)(struct netpoll *, int, char *, int);
++ void (*rx_hook)(struct netpoll *, int, char *, int, struct sk_buff *);
+
+ u32 local_ip, remote_ip;
+ u16 local_port, remote_port;
+diff -Nurb linux-2.6.22-570/include/linux/nfs4.h linux-2.6.22-591/include/linux/nfs4.h
+--- linux-2.6.22-570/include/linux/nfs4.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs4.h 2007-12-21 15:36:12.000000000 -0500
+@@ -15,6 +15,7 @@
+
+ #include <linux/types.h>
+
++#define NFS4_BITMAP_SIZE 2
+ #define NFS4_VERIFIER_SIZE 8
+ #define NFS4_STATEID_SIZE 16
+ #define NFS4_FHSIZE 128
+diff -Nurb linux-2.6.22-570/include/linux/nfs4_mount.h linux-2.6.22-591/include/linux/nfs4_mount.h
+--- linux-2.6.22-570/include/linux/nfs4_mount.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs4_mount.h 2007-12-21 15:36:12.000000000 -0500
+@@ -65,6 +65,7 @@
+ #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */
+ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */
+ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */
++#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */
+ #define NFS4_MOUNT_FLAGMASK 0xFFFF
+
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/nfs_fs.h linux-2.6.22-591/include/linux/nfs_fs.h
+--- linux-2.6.22-570/include/linux/nfs_fs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs_fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -30,7 +30,9 @@
+ #ifdef __KERNEL__
+
+ #include <linux/in.h>
++#include <linux/kref.h>
+ #include <linux/mm.h>
++#include <linux/namei.h>
+ #include <linux/pagemap.h>
+ #include <linux/rbtree.h>
+ #include <linux/rwsem.h>
+@@ -69,9 +71,8 @@
+
+ struct nfs4_state;
+ struct nfs_open_context {
+- atomic_t count;
+- struct vfsmount *vfsmnt;
+- struct dentry *dentry;
++ struct kref kref;
++ struct path path;
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+ fl_owner_t lockowner;
+@@ -156,12 +157,9 @@
+ * This is the list of dirty unwritten pages.
+ */
+ spinlock_t req_lock;
+- struct list_head dirty;
+- struct list_head commit;
+ struct radix_tree_root nfs_page_tree;
+
+- unsigned int ndirty,
+- ncommit,
++ unsigned long ncommit,
+ npages;
+
+ /* Open contexts for shared mmap writes */
+diff -Nurb linux-2.6.22-570/include/linux/nfs_fs_sb.h linux-2.6.22-591/include/linux/nfs_fs_sb.h
+--- linux-2.6.22-570/include/linux/nfs_fs_sb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs_fs_sb.h 2007-12-21 15:36:12.000000000 -0500
+@@ -16,7 +16,6 @@
+ #define NFS_CS_INITING 1 /* busy initialising */
+ int cl_nfsversion; /* NFS protocol version */
+ unsigned long cl_res_state; /* NFS resources state */
+-#define NFS_CS_RPCIOD 0 /* - rpciod started */
+ #define NFS_CS_CALLBACK 1 /* - callback started */
+ #define NFS_CS_IDMAP 2 /* - idmap started */
+ #define NFS_CS_RENEWD 3 /* - renewd started */
+diff -Nurb linux-2.6.22-570/include/linux/nfs_mount.h linux-2.6.22-591/include/linux/nfs_mount.h
+--- linux-2.6.22-570/include/linux/nfs_mount.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/nfs_mount.h 2007-12-21 15:36:12.000000000 -0500
+@@ -62,6 +62,7 @@
+ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */
+ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */
+ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
++#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
+ #define NFS_MOUNT_TAGGED 0x8000 /* context tagging */
+ #define NFS_MOUNT_FLAGMASK 0xFFFF
+
+diff -Nurb linux-2.6.22-570/include/linux/nfs_page.h linux-2.6.22-591/include/linux/nfs_page.h
+--- linux-2.6.22-570/include/linux/nfs_page.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs_page.h 2007-12-21 15:36:12.000000000 -0500
+@@ -16,12 +16,13 @@
+ #include <linux/sunrpc/auth.h>
+ #include <linux/nfs_xdr.h>
+
+-#include <asm/atomic.h>
++#include <linux/kref.h>
+
+ /*
+ * Valid flags for the radix tree
+ */
+-#define NFS_PAGE_TAG_WRITEBACK 0
++#define NFS_PAGE_TAG_LOCKED 0
++#define NFS_PAGE_TAG_COMMIT 1
+
+ /*
+ * Valid flags for a dirty buffer
+@@ -33,8 +34,7 @@
+
+ struct nfs_inode;
+ struct nfs_page {
+- struct list_head wb_list, /* Defines state of page: */
+- *wb_list_head; /* read/write/commit */
++ struct list_head wb_list; /* Defines state of page: */
+ struct page *wb_page; /* page to read in/write out */
+ struct nfs_open_context *wb_context; /* File state context info */
+ atomic_t wb_complete; /* i/os we're waiting for */
+@@ -42,7 +42,7 @@
+ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
+ wb_pgbase, /* Start of page data */
+ wb_bytes; /* Length of request */
+- atomic_t wb_count; /* reference count */
++ struct kref wb_kref; /* reference count */
+ unsigned long wb_flags;
+ struct nfs_writeverf wb_verf; /* Commit cookie */
+ };
+@@ -71,8 +71,8 @@
+ extern void nfs_release_request(struct nfs_page *req);
+
+
+-extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
+- pgoff_t idx_start, unsigned int npages);
++extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
++ pgoff_t idx_start, unsigned int npages, int tag);
+ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ struct inode *inode,
+ int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+@@ -84,12 +84,11 @@
+ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+ extern int nfs_wait_on_request(struct nfs_page *);
+ extern void nfs_unlock_request(struct nfs_page *req);
+-extern int nfs_set_page_writeback_locked(struct nfs_page *req);
+-extern void nfs_clear_page_writeback(struct nfs_page *req);
++extern void nfs_clear_page_tag_locked(struct nfs_page *req);
+
+
+ /*
+- * Lock the page of an asynchronous request without incrementing the wb_count
++ * Lock the page of an asynchronous request without getting a new reference
+ */
+ static inline int
+ nfs_lock_request_dontget(struct nfs_page *req)
+@@ -98,14 +97,14 @@
+ }
+
+ /*
+- * Lock the page of an asynchronous request
++ * Lock the page of an asynchronous request and take a reference
+ */
+ static inline int
+ nfs_lock_request(struct nfs_page *req)
+ {
+ if (test_and_set_bit(PG_BUSY, &req->wb_flags))
+ return 0;
+- atomic_inc(&req->wb_count);
++ kref_get(&req->wb_kref);
+ return 1;
+ }
+
+@@ -118,7 +117,6 @@
+ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+ {
+ list_add_tail(&req->wb_list, head);
+- req->wb_list_head = head;
+ }
+
+
+@@ -132,7 +130,6 @@
+ if (list_empty(&req->wb_list))
+ return;
+ list_del_init(&req->wb_list);
+- req->wb_list_head = NULL;
+ }
+
+ static inline struct nfs_page *
+diff -Nurb linux-2.6.22-570/include/linux/nfs_xdr.h linux-2.6.22-591/include/linux/nfs_xdr.h
+--- linux-2.6.22-570/include/linux/nfs_xdr.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/nfs_xdr.h 2007-12-21 15:36:12.000000000 -0500
+@@ -144,6 +144,7 @@
+ nfs4_stateid delegation;
+ __u32 do_recall;
+ __u64 maxsize;
++ __u32 attrset[NFS4_BITMAP_SIZE];
+ };
+
+ /*
+diff -Nurb linux-2.6.22-570/include/linux/nsproxy.h linux-2.6.22-591/include/linux/nsproxy.h
+--- linux-2.6.22-570/include/linux/nsproxy.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/nsproxy.h 2007-12-21 15:36:14.000000000 -0500
+@@ -10,6 +10,12 @@
+ struct ipc_namespace;
+ struct pid_namespace;
+
++#ifdef CONFIG_CONTAINER_NS
++int ns_container_clone(struct task_struct *tsk);
++#else
++static inline int ns_container_clone(struct task_struct *tsk) { return 0; }
++#endif
++
+ /*
+ * A structure to contain pointers to all per-process
+ * namespaces - fs (mount), uts, network, sysvipc, etc.
+@@ -29,10 +35,12 @@
+ struct ipc_namespace *ipc_ns;
+ struct mnt_namespace *mnt_ns;
+ struct pid_namespace *pid_ns;
++ struct user_namespace *user_ns;
++ struct net *net_ns;
+ };
+ extern struct nsproxy init_nsproxy;
+
+-int copy_namespaces(int flags, struct task_struct *tsk);
++int copy_namespaces(unsigned long flags, struct task_struct *tsk);
+ struct nsproxy *copy_nsproxy(struct nsproxy *orig);
+ void get_task_namespaces(struct task_struct *tsk);
+ void free_nsproxy(struct nsproxy *ns);
+diff -Nurb linux-2.6.22-570/include/linux/pageblock-flags.h linux-2.6.22-591/include/linux/pageblock-flags.h
+--- linux-2.6.22-570/include/linux/pageblock-flags.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/pageblock-flags.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,52 @@
++/*
++ * Macros for manipulating and testing flags related to a
++ * MAX_ORDER_NR_PAGES block of pages.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation version 2 of the License
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright (C) IBM Corporation, 2006
++ *
++ * Original author, Mel Gorman
++ * Major cleanups and reduction of bit operations, Andy Whitcroft
++ */
++#ifndef PAGEBLOCK_FLAGS_H
++#define PAGEBLOCK_FLAGS_H
++
++#include <linux/types.h>
++
++/* Macro to aid the definition of ranges of bits */
++#define PB_range(name, required_bits) \
++ name, name ## _end = (name + required_bits) - 1
++
++/* Bit indices that affect a whole block of pages */
++enum pageblock_bits {
++ PB_range(PB_migrate, 2), /* 2 bits required for migrate types */
++ NR_PAGEBLOCK_BITS
++};
++
++/* Forward declaration */
++struct page;
++
++/* Declarations for getting and setting flags. See mm/page_alloc.c */
++unsigned long get_pageblock_flags_group(struct page *page,
++ int start_bitidx, int end_bitidx);
++void set_pageblock_flags_group(struct page *page, unsigned long flags,
++ int start_bitidx, int end_bitidx);
++
++#define get_pageblock_flags(page) \
++ get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
++#define set_pageblock_flags(page) \
++ set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
++
++#endif /* PAGEBLOCK_FLAGS_H */
+diff -Nurb linux-2.6.22-570/include/linux/pci_ids.h linux-2.6.22-591/include/linux/pci_ids.h
+--- linux-2.6.22-570/include/linux/pci_ids.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/linux/pci_ids.h 2007-12-21 15:36:12.000000000 -0500
+@@ -2003,6 +2003,7 @@
+
+ #define PCI_VENDOR_ID_ENE 0x1524
+ #define PCI_DEVICE_ID_ENE_CB712_SD 0x0550
++#define PCI_DEVICE_ID_ENE_CB712_SD_2 0x0551
+ #define PCI_DEVICE_ID_ENE_1211 0x1211
+ #define PCI_DEVICE_ID_ENE_1225 0x1225
+ #define PCI_DEVICE_ID_ENE_1410 0x1410
+diff -Nurb linux-2.6.22-570/include/linux/pid_namespace.h linux-2.6.22-591/include/linux/pid_namespace.h
+--- linux-2.6.22-570/include/linux/pid_namespace.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/pid_namespace.h 2007-12-21 15:36:12.000000000 -0500
+@@ -29,7 +29,7 @@
+ kref_get(&ns->kref);
+ }
+
+-extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns);
++extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+ extern void free_pid_ns(struct kref *kref);
+
+ static inline void put_pid_ns(struct pid_namespace *ns)
+diff -Nurb linux-2.6.22-570/include/linux/pnp.h linux-2.6.22-591/include/linux/pnp.h
+--- linux-2.6.22-570/include/linux/pnp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/pnp.h 2007-12-21 15:36:12.000000000 -0500
+@@ -335,6 +335,10 @@
+ int (*set)(struct pnp_dev *dev, struct pnp_resource_table *res);
+ int (*disable)(struct pnp_dev *dev);
+
++ /* protocol specific suspend/resume */
++ int (*suspend)(struct pnp_dev *dev, pm_message_t state);
++ int (*resume)(struct pnp_dev *dev);
++
+ /* used by pnp layer only (look but don't touch) */
+ unsigned char number; /* protocol number*/
+ struct device dev; /* link to driver model */
+diff -Nurb linux-2.6.22-570/include/linux/prctl.h linux-2.6.22-591/include/linux/prctl.h
+--- linux-2.6.22-570/include/linux/prctl.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/prctl.h 2007-12-21 15:36:12.000000000 -0500
+@@ -59,4 +59,8 @@
+ # define PR_ENDIAN_LITTLE 1 /* True little endian mode */
+ # define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */
+
++/* Get/set process seccomp mode */
++#define PR_GET_SECCOMP 21
++#define PR_SET_SECCOMP 22
++
+ #endif /* _LINUX_PRCTL_H */
+diff -Nurb linux-2.6.22-570/include/linux/proc_fs.h linux-2.6.22-591/include/linux/proc_fs.h
+--- linux-2.6.22-570/include/linux/proc_fs.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/proc_fs.h 2007-12-21 15:36:14.000000000 -0500
+@@ -86,8 +86,6 @@
+
+ extern struct proc_dir_entry proc_root;
+ extern struct proc_dir_entry *proc_root_fs;
+-extern struct proc_dir_entry *proc_net;
+-extern struct proc_dir_entry *proc_net_stat;
+ extern struct proc_dir_entry *proc_bus;
+ extern struct proc_dir_entry *proc_root_driver;
+ extern struct proc_dir_entry *proc_root_kcore;
+@@ -105,7 +103,6 @@
+ unsigned long task_vsize(struct mm_struct *);
+ int task_statm(struct mm_struct *, int *, int *, int *, int *);
+ char *task_mem(struct mm_struct *, char *);
+-void clear_refs_smap(struct mm_struct *mm);
+
+ struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+ void de_put(struct proc_dir_entry *de);
+@@ -113,6 +110,10 @@
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
++static inline void remove_proc_pde(struct proc_dir_entry *pde)
++{
++ return remove_proc_entry(pde->name, pde->parent);
++}
+
+ extern struct vfsmount *proc_mnt;
+ extern int proc_fill_super(struct super_block *,void *,int);
+@@ -182,42 +183,18 @@
+ return res;
+ }
+
+-static inline struct proc_dir_entry *proc_net_create(const char *name,
+- mode_t mode, get_info_t *get_info)
+-{
+- return create_proc_info_entry(name,mode,proc_net,get_info);
+-}
+-
+-static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
+- mode_t mode, const struct file_operations *fops)
+-{
+- struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net);
+- if (res)
+- res->proc_fops = fops;
+- return res;
+-}
+-
+-static inline void proc_net_remove(const char *name)
+-{
+- remove_proc_entry(name,proc_net);
+-}
+-
+ #else
+
+ #define proc_root_driver NULL
+-#define proc_net NULL
+ #define proc_bus NULL
+
+-#define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
+-#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
+-static inline void proc_net_remove(const char *name) {}
+-
+ static inline void proc_flush_task(struct task_struct *task) { }
+
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+
+ #define remove_proc_entry(name, parent) do {} while (0)
++#define remove_proc_pde(PDE) do {} while (0)
+
+ static inline struct proc_dir_entry *proc_symlink(const char *name,
+ struct proc_dir_entry *parent,const char *dest) {return NULL;}
+diff -Nurb linux-2.6.22-570/include/linux/raid/raid5.h linux-2.6.22-591/include/linux/raid/raid5.h
+--- linux-2.6.22-570/include/linux/raid/raid5.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/raid/raid5.h 2007-12-21 15:36:12.000000000 -0500
+@@ -116,13 +116,46 @@
+ * attach a request to an active stripe (add_stripe_bh())
+ * lockdev attach-buffer unlockdev
+ * handle a stripe (handle_stripe())
+- * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io
++ * lockstripe clrSTRIPE_HANDLE ...
++ * (lockdev check-buffers unlockdev) ..
++ * change-state ..
++ * record io/ops needed unlockstripe schedule io/ops
+ * release an active stripe (release_stripe())
+ * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
+ *
+ * The refcount counts each thread that have activated the stripe,
+ * plus raid5d if it is handling it, plus one for each active request
+- * on a cached buffer.
++ * on a cached buffer, and plus one if the stripe is undergoing stripe
++ * operations.
++ *
++ * Stripe operations are performed outside the stripe lock,
++ * the stripe operations are:
++ * -copying data between the stripe cache and user application buffers
++ * -computing blocks to save a disk access, or to recover a missing block
++ * -updating the parity on a write operation (reconstruct write and
++ * read-modify-write)
++ * -checking parity correctness
++ * -running i/o to disk
++ * These operations are carried out by raid5_run_ops which uses the async_tx
++ * api to (optionally) offload operations to dedicated hardware engines.
++ * When requesting an operation handle_stripe sets the pending bit for the
++ * operation and increments the count. raid5_run_ops is then run whenever
++ * the count is non-zero.
++ * There are some critical dependencies between the operations that prevent some
++ * from being requested while another is in flight.
++ * 1/ Parity check operations destroy the in cache version of the parity block,
++ * so we prevent parity dependent operations like writes and compute_blocks
++ * from starting while a check is in progress. Some dma engines can perform
++ * the check without damaging the parity block, in these cases the parity
++ * block is re-marked up to date (assuming the check was successful) and is
++ * not re-read from disk.
++ * 2/ When a write operation is requested we immediately lock the affected
++ * blocks, and mark them as not up to date. This causes new read requests
++ * to be held off, as well as parity checks and compute block operations.
++ * 3/ Once a compute block operation has been requested handle_stripe treats
++ * that block as if it is up to date. raid5_run_ops guaruntees that any
++ * operation that is dependent on the compute block result is initiated after
++ * the compute block completes.
+ */
+
+ struct stripe_head {
+@@ -136,15 +169,46 @@
+ spinlock_t lock;
+ int bm_seq; /* sequence number for bitmap flushes */
+ int disks; /* disks in stripe */
++ /* stripe_operations
++ * @pending - pending ops flags (set for request->issue->complete)
++ * @ack - submitted ops flags (set for issue->complete)
++ * @complete - completed ops flags (set for complete)
++ * @target - STRIPE_OP_COMPUTE_BLK target
++ * @count - raid5_runs_ops is set to run when this is non-zero
++ */
++ struct stripe_operations {
++ unsigned long pending;
++ unsigned long ack;
++ unsigned long complete;
++ int target;
++ int count;
++ u32 zero_sum_result;
++ } ops;
+ struct r5dev {
+ struct bio req;
+ struct bio_vec vec;
+ struct page *page;
+- struct bio *toread, *towrite, *written;
++ struct bio *toread, *read, *towrite, *written;
+ sector_t sector; /* sector of this page */
+ unsigned long flags;
+ } dev[1]; /* allocated with extra space depending of RAID geometry */
+ };
++
++/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
++ * for handle_stripe. It is only valid under spin_lock(sh->lock);
++ */
++struct stripe_head_state {
++ int syncing, expanding, expanded;
++ int locked, uptodate, to_read, to_write, failed, written;
++ int to_fill, compute, req_compute, non_overwrite, dirty;
++ int failed_num;
++};
++
++/* r6_state - extra state data only relevant to r6 */
++struct r6_state {
++ int p_failed, q_failed, qd_idx, failed_num[2];
++};
++
+ /* Flags */
+ #define R5_UPTODATE 0 /* page contains current data */
+ #define R5_LOCKED 1 /* IO has been submitted on "req" */
+@@ -158,6 +222,15 @@
+ #define R5_ReWrite 9 /* have tried to over-write the readerror */
+
+ #define R5_Expanded 10 /* This block now has post-expand data */
++#define R5_Wantcompute 11 /* compute_block in progress treat as
++ * uptodate
++ */
++#define R5_Wantfill 12 /* dev->toread contains a bio that needs
++ * filling
++ */
++#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from
++ * other "towrites"
++ */
+ /*
+ * Write method
+ */
+@@ -180,6 +253,24 @@
+ #define STRIPE_EXPAND_SOURCE 10
+ #define STRIPE_EXPAND_READY 11
+ /*
++ * Operations flags (in issue order)
++ */
++#define STRIPE_OP_BIOFILL 0
++#define STRIPE_OP_COMPUTE_BLK 1
++#define STRIPE_OP_PREXOR 2
++#define STRIPE_OP_BIODRAIN 3
++#define STRIPE_OP_POSTXOR 4
++#define STRIPE_OP_CHECK 5
++#define STRIPE_OP_IO 6
++
++/* modifiers to the base operations
++ * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
++ * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
++ */
++#define STRIPE_OP_MOD_REPAIR_PD 7
++#define STRIPE_OP_MOD_DMA_CHECK 8
++
++/*
+ * Plugging:
+ *
+ * To improve write throughput, we need to delay the handling of some
+diff -Nurb linux-2.6.22-570/include/linux/raid/xor.h linux-2.6.22-591/include/linux/raid/xor.h
+--- linux-2.6.22-570/include/linux/raid/xor.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/raid/xor.h 2007-12-21 15:36:12.000000000 -0500
+@@ -3,9 +3,10 @@
+
+ #include <linux/raid/md.h>
+
+-#define MAX_XOR_BLOCKS 5
++#define MAX_XOR_BLOCKS 4
+
+-extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
++extern void xor_blocks(unsigned int count, unsigned int bytes,
++ void *dest, void **srcs);
+
+ struct xor_block_template {
+ struct xor_block_template *next;
+diff -Nurb linux-2.6.22-570/include/linux/reboot.h linux-2.6.22-591/include/linux/reboot.h
+--- linux-2.6.22-570/include/linux/reboot.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/reboot.h 2007-12-21 15:36:12.000000000 -0500
+@@ -67,6 +67,11 @@
+
+ void ctrl_alt_del(void);
+
++#define POWEROFF_CMD_PATH_LEN 256
++extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
++
++extern int orderly_poweroff(bool force);
++
+ /*
+ * Emergency restart, callable from an interrupt handler.
+ */
+diff -Nurb linux-2.6.22-570/include/linux/revoked_fs_i.h linux-2.6.22-591/include/linux/revoked_fs_i.h
+--- linux-2.6.22-570/include/linux/revoked_fs_i.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/revoked_fs_i.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,18 @@
++#ifndef _LINUX_REVOKED_FS_I_H
++#define _LINUX_REVOKED_FS_I_H
++
++struct revokefs_inode_info {
++ struct task_struct *owner;
++ struct file *file;
++ unsigned int fd;
++ struct inode vfs_inode;
++};
++
++static inline struct revokefs_inode_info *revokefs_i(struct inode *inode)
++{
++ return container_of(inode, struct revokefs_inode_info, vfs_inode);
++}
++
++void make_revoked_inode(struct inode *, int);
++
++#endif
+diff -Nurb linux-2.6.22-570/include/linux/rtnetlink.h linux-2.6.22-591/include/linux/rtnetlink.h
+--- linux-2.6.22-570/include/linux/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/rtnetlink.h 2007-12-21 15:36:14.000000000 -0500
+@@ -261,7 +261,7 @@
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION,
+- RTA_MP_ALGO,
++ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ __RTA_MAX
+ };
+@@ -570,15 +570,21 @@
+ }
+
+ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
++extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
++ struct rtattr *rta, int len);
+
+ #define rtattr_parse_nested(tb, max, rta) \
+ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
+
+-extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+-extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+-extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
++#define rtattr_parse_nested_compat(tb, max, rta, data, len) \
++({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
++ __rtattr_parse_nested_compat(tb, max, rta, len); })
++
++extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
++extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
++extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ struct nlmsghdr *nlh, gfp_t flags);
+-extern void rtnl_set_sk_err(u32 group, int error);
++extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
+ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
+ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
+ u32 id, u32 ts, u32 tsage, long expires,
+@@ -638,6 +644,18 @@
+ ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
+ (skb)->len; })
+
++#define RTA_NEST_COMPAT(skb, type, attrlen, data) \
++({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
++ RTA_PUT(skb, type, attrlen, data); \
++ RTA_NEST(skb, type); \
++ __start; })
++
++#define RTA_NEST_COMPAT_END(skb, start) \
++({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \
++ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
++ RTA_NEST_END(skb, __nest); \
++ (skb)->len; })
++
+ #define RTA_NEST_CANCEL(skb, start) \
+ ({ if (start) \
+ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
+diff -Nurb linux-2.6.22-570/include/linux/sched.h linux-2.6.22-591/include/linux/sched.h
+--- linux-2.6.22-570/include/linux/sched.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/sched.h 2007-12-21 15:36:14.000000000 -0500
+@@ -26,7 +26,9 @@
+ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */
+ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */
+ #define CLONE_NEWIPC 0x08000000 /* New ipcs */
++#define CLONE_NEWUSER 0x20000000 /* New user namespace */
+ #define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */
++#define CLONE_NEWNET 0x40000000 /* New network namespace */
+
+ /*
+ * Scheduling policies
+@@ -266,6 +268,7 @@
+ asmlinkage void schedule(void);
+
+ struct nsproxy;
++struct user_namespace;
+
+ /* Maximum number of active map areas.. This is a random (large) number */
+ #define DEFAULT_MAX_MAP_COUNT 65536
+@@ -325,6 +328,27 @@
+ (mm)->hiwater_vm = (mm)->total_vm; \
+ } while (0)
+
++extern void set_dumpable(struct mm_struct *mm, int value);
++extern int get_dumpable(struct mm_struct *mm);
++
++/* mm flags */
++/* dumpable bits */
++#define MMF_DUMPABLE 0 /* core dump is permitted */
++#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
++#define MMF_DUMPABLE_BITS 2
++
++/* coredump filter bits */
++#define MMF_DUMP_ANON_PRIVATE 2
++#define MMF_DUMP_ANON_SHARED 3
++#define MMF_DUMP_MAPPED_PRIVATE 4
++#define MMF_DUMP_MAPPED_SHARED 5
++#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
++#define MMF_DUMP_FILTER_BITS 4
++#define MMF_DUMP_FILTER_MASK \
++ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
++#define MMF_DUMP_FILTER_DEFAULT \
++ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
++
+ struct mm_struct {
+ struct vm_area_struct * mmap; /* list of VMAs */
+ struct rb_root mm_rb;
+@@ -383,7 +407,7 @@
+ unsigned int token_priority;
+ unsigned int last_interval;
+
+- unsigned char dumpable:2;
++ unsigned long flags; /* Must use atomic bitops to access the bits */
+
+ /* coredumping support */
+ int core_waiters;
+@@ -757,9 +781,6 @@
+ #endif
+ };
+
+-extern int partition_sched_domains(cpumask_t *partition1,
+- cpumask_t *partition2);
+-
+ /*
+ * Maximum cache size the migration-costs auto-tuning code will
+ * search from:
+@@ -770,8 +791,6 @@
+
+
+ struct io_context; /* See blkdev.h */
+-struct cpuset;
+-
+ #define NGROUPS_SMALL 32
+ #define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
+ struct group_info {
+@@ -912,7 +931,7 @@
+ unsigned int rt_priority;
+ cputime_t utime, stime;
+ unsigned long nvcsw, nivcsw; /* context switch counts */
+- struct timespec start_time;
++ struct timespec start_time, real_start_time;
+ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+ unsigned long min_flt, maj_flt;
+
+@@ -1067,11 +1086,16 @@
+ short il_next;
+ #endif
+ #ifdef CONFIG_CPUSETS
+- struct cpuset *cpuset;
+ nodemask_t mems_allowed;
+ int cpuset_mems_generation;
+ int cpuset_mem_spread_rotor;
+ #endif
++#ifdef CONFIG_CONTAINERS
++ /* Container info protected by css_group_lock */
++ struct css_group *containers;
++ /* cg_list protected by css_group_lock and tsk->alloc_lock */
++ struct list_head cg_list;
++#endif
+ struct robust_list_head __user *robust_list;
+ #ifdef CONFIG_COMPAT
+ struct compat_robust_list_head __user *compat_robust_list;
+@@ -1514,7 +1538,8 @@
+ /*
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4(). Also used in procfs. Also
+- * pins the final release of task.io_context. Also protects ->cpuset.
++ * pins the final release of task.io_context. Also protects ->cpuset and
++ * ->container.subsys[].
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+diff -Nurb linux-2.6.22-570/include/linux/seccomp.h linux-2.6.22-591/include/linux/seccomp.h
+--- linux-2.6.22-570/include/linux/seccomp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/seccomp.h 2007-12-21 15:36:12.000000000 -0500
+@@ -4,8 +4,6 @@
+
+ #ifdef CONFIG_SECCOMP
+
+-#define NR_SECCOMP_MODES 1
+-
+ #include <linux/thread_info.h>
+ #include <asm/seccomp.h>
+
+@@ -23,6 +21,9 @@
+ return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP));
+ }
+
++extern long prctl_get_seccomp(void);
++extern long prctl_set_seccomp(unsigned long);
++
+ #else /* CONFIG_SECCOMP */
+
+ typedef struct { } seccomp_t;
+@@ -34,6 +35,16 @@
+ return 0;
+ }
+
++static inline long prctl_get_seccomp(void)
++{
++ return -EINVAL;
++}
++
++static inline long prctl_set_seccomp(unsigned long arg2)
++{
++ return -EINVAL;
++}
++
+ #endif /* CONFIG_SECCOMP */
+
+ #endif /* _LINUX_SECCOMP_H */
+diff -Nurb linux-2.6.22-570/include/linux/security.h linux-2.6.22-591/include/linux/security.h
+--- linux-2.6.22-570/include/linux/security.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/security.h 2007-12-21 15:36:12.000000000 -0500
+@@ -71,6 +71,7 @@
+ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
+ extern int cap_netlink_recv(struct sk_buff *skb, int cap);
+
++extern unsigned long mmap_min_addr;
+ /*
+ * Values used in the task_security_ops calls
+ */
+@@ -1241,8 +1242,9 @@
+ int (*file_ioctl) (struct file * file, unsigned int cmd,
+ unsigned long arg);
+ int (*file_mmap) (struct file * file,
+- unsigned long reqprot,
+- unsigned long prot, unsigned long flags);
++ unsigned long reqprot, unsigned long prot,
++ unsigned long flags, unsigned long addr,
++ unsigned long addr_only);
+ int (*file_mprotect) (struct vm_area_struct * vma,
+ unsigned long reqprot,
+ unsigned long prot);
+@@ -1814,9 +1816,12 @@
+
+ static inline int security_file_mmap (struct file *file, unsigned long reqprot,
+ unsigned long prot,
+- unsigned long flags)
++ unsigned long flags,
++ unsigned long addr,
++ unsigned long addr_only)
+ {
+- return security_ops->file_mmap (file, reqprot, prot, flags);
++ return security_ops->file_mmap (file, reqprot, prot, flags, addr,
++ addr_only);
+ }
+
+ static inline int security_file_mprotect (struct vm_area_struct *vma,
+@@ -2489,7 +2494,9 @@
+
+ static inline int security_file_mmap (struct file *file, unsigned long reqprot,
+ unsigned long prot,
+- unsigned long flags)
++ unsigned long flags,
++ unsigned long addr,
++ unsigned long addr_only)
+ {
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/include/linux/serial_8250.h linux-2.6.22-591/include/linux/serial_8250.h
+--- linux-2.6.22-570/include/linux/serial_8250.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/serial_8250.h 2007-12-21 15:36:12.000000000 -0500
+@@ -57,6 +57,7 @@
+
+ int serial8250_register_port(struct uart_port *);
+ void serial8250_unregister_port(int line);
++void serial8250_unregister_by_port(struct uart_port *port);
+ void serial8250_suspend_port(int line);
+ void serial8250_resume_port(int line);
+
+diff -Nurb linux-2.6.22-570/include/linux/signal.h linux-2.6.22-591/include/linux/signal.h
+--- linux-2.6.22-570/include/linux/signal.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/signal.h 2007-12-21 15:36:12.000000000 -0500
+@@ -238,12 +238,15 @@
+ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+ extern long do_sigpending(void __user *, unsigned long);
+ extern int sigprocmask(int, sigset_t *, sigset_t *);
++extern int show_unhandled_signals;
+
+ struct pt_regs;
+ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+
+ extern struct kmem_cache *sighand_cachep;
+
++int unhandled_signal(struct task_struct *tsk, int sig);
++
+ /*
+ * In POSIX a signal is sent either to a specific thread (Linux task)
+ * or to the process as a whole (Linux thread group). How the signal
+diff -Nurb linux-2.6.22-570/include/linux/skbuff.h linux-2.6.22-591/include/linux/skbuff.h
+--- linux-2.6.22-570/include/linux/skbuff.h 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/include/linux/skbuff.h 2007-12-21 15:36:12.000000000 -0500
+@@ -147,8 +147,8 @@
+
+ /* We divide dataref into two halves. The higher 16 bits hold references
+ * to the payload part of skb->data. The lower 16 bits hold references to
+- * the entire skb->data. It is up to the users of the skb to agree on
+- * where the payload starts.
++ * the entire skb->data. A clone of a headerless skb holds the length of
++ * the header in skb->hdr_len.
+ *
+ * All users must obey the rule that the skb->data reference count must be
+ * greater than or equal to the payload reference count.
+@@ -206,6 +206,7 @@
+ * @len: Length of actual data
+ * @data_len: Data length
+ * @mac_len: Length of link layer header
++ * @hdr_len: writable header length of cloned skb
+ * @csum: Checksum (must include start/offset pair)
+ * @csum_start: Offset from skb->head where checksumming should start
+ * @csum_offset: Offset from csum_start where checksum should be stored
+@@ -260,8 +261,9 @@
+ char cb[48];
+
+ unsigned int len,
+- data_len,
+- mac_len;
++ data_len;
++ __u16 mac_len,
++ hdr_len;
+ union {
+ __wsum csum;
+ struct {
+@@ -1323,6 +1325,20 @@
+ }
+
+ /**
++ * skb_clone_writable - is the header of a clone writable
++ * @skb: buffer to check
++ * @len: length up to which to write
++ *
++ * Returns true if modifying the header part of the cloned buffer
++ * does not requires the data to be copied.
++ */
++static inline int skb_clone_writable(struct sk_buff *skb, int len)
++{
++ return !skb_header_cloned(skb) &&
++ skb_headroom(skb) + len <= skb->hdr_len;
++}
++
++/**
+ * skb_cow - copy header of skb when it is required
+ * @skb: buffer to cow
+ * @headroom: needed headroom
+diff -Nurb linux-2.6.22-570/include/linux/slab.h linux-2.6.22-591/include/linux/slab.h
+--- linux-2.6.22-570/include/linux/slab.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/slab.h 2007-12-21 15:36:12.000000000 -0500
+@@ -26,12 +26,14 @@
+ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */
+ #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */
+ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */
+-#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
+ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */
+ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
+ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
+ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */
+
++/* The following flags affect the page allocator grouping pages by mobility */
++#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
++#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
+ /*
+ * struct kmem_cache related prototypes
+ */
+diff -Nurb linux-2.6.22-570/include/linux/socket.h linux-2.6.22-591/include/linux/socket.h
+--- linux-2.6.22-570/include/linux/socket.h 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/include/linux/socket.h 2007-12-21 15:36:14.000000000 -0500
+@@ -24,7 +24,6 @@
+ #include <linux/types.h> /* pid_t */
+ #include <linux/compiler.h> /* __user */
+
+-extern int sysctl_somaxconn;
+ #ifdef CONFIG_PROC_FS
+ struct seq_file;
+ extern void socket_seq_show(struct seq_file *seq);
+diff -Nurb linux-2.6.22-570/include/linux/string.h linux-2.6.22-591/include/linux/string.h
+--- linux-2.6.22-570/include/linux/string.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/string.h 2007-12-21 15:36:12.000000000 -0500
+@@ -105,8 +105,12 @@
+ #endif
+
+ extern char *kstrdup(const char *s, gfp_t gfp);
++extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+ extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+
++extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
++extern void argv_free(char **argv);
++
+ #ifdef __cplusplus
+ }
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth.h linux-2.6.22-591/include/linux/sunrpc/auth.h
+--- linux-2.6.22-570/include/linux/sunrpc/auth.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/sunrpc/auth.h 2007-12-21 15:36:12.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/sunrpc/xdr.h>
+
+ #include <asm/atomic.h>
++#include <linux/rcupdate.h>
+
+ /* size of the nodename buffer */
+ #define UNX_MAXNODENAME 32
+@@ -31,22 +32,28 @@
+ /*
+ * Client user credentials
+ */
++struct rpc_auth;
++struct rpc_credops;
+ struct rpc_cred {
+ struct hlist_node cr_hash; /* hash chain */
+- struct rpc_credops * cr_ops;
+- unsigned long cr_expire; /* when to gc */
+- atomic_t cr_count; /* ref count */
+- unsigned short cr_flags; /* various flags */
++ struct list_head cr_lru; /* lru garbage collection */
++ struct rcu_head cr_rcu;
++ struct rpc_auth * cr_auth;
++ const struct rpc_credops *cr_ops;
+ #ifdef RPC_DEBUG
+ unsigned long cr_magic; /* 0x0f4aa4f0 */
+ #endif
++ unsigned long cr_expire; /* when to gc */
++ unsigned long cr_flags; /* various flags */
++ atomic_t cr_count; /* ref count */
+
+ uid_t cr_uid;
+
+ /* per-flavor data */
+ };
+-#define RPCAUTH_CRED_NEW 0x0001
+-#define RPCAUTH_CRED_UPTODATE 0x0002
++#define RPCAUTH_CRED_NEW 0
++#define RPCAUTH_CRED_UPTODATE 1
++#define RPCAUTH_CRED_HASHED 2
+
+ #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
+
+@@ -57,10 +64,10 @@
+ #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1)
+ struct rpc_cred_cache {
+ struct hlist_head hashtable[RPC_CREDCACHE_NR];
+- unsigned long nextgc; /* next garbage collection */
+- unsigned long expire; /* cache expiry interval */
++ spinlock_t lock;
+ };
+
++struct rpc_authops;
+ struct rpc_auth {
+ unsigned int au_cslack; /* call cred size estimate */
+ /* guess at number of u32's auth adds before
+@@ -70,7 +77,7 @@
+ unsigned int au_verfsize;
+
+ unsigned int au_flags; /* various flags */
+- struct rpc_authops * au_ops; /* operations */
++ const struct rpc_authops *au_ops; /* operations */
+ rpc_authflavor_t au_flavor; /* pseudoflavor (note may
+ * differ from the flavor in
+ * au_ops->au_flavor in gss
+@@ -116,17 +123,19 @@
+ void *, __be32 *, void *);
+ };
+
+-extern struct rpc_authops authunix_ops;
+-extern struct rpc_authops authnull_ops;
+-#ifdef CONFIG_SUNRPC_SECURE
+-extern struct rpc_authops authdes_ops;
+-#endif
++extern const struct rpc_authops authunix_ops;
++extern const struct rpc_authops authnull_ops;
++
++void __init rpc_init_authunix(void);
++void __init rpcauth_init_module(void);
++void __exit rpcauth_remove_module(void);
+
+-int rpcauth_register(struct rpc_authops *);
+-int rpcauth_unregister(struct rpc_authops *);
++int rpcauth_register(const struct rpc_authops *);
++int rpcauth_unregister(const struct rpc_authops *);
+ struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
+-void rpcauth_destroy(struct rpc_auth *);
++void rpcauth_release(struct rpc_auth *);
+ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
++void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
+ struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
+ struct rpc_cred * rpcauth_bindcred(struct rpc_task *);
+ void rpcauth_holdcred(struct rpc_task *);
+@@ -139,8 +148,9 @@
+ int rpcauth_refreshcred(struct rpc_task *);
+ void rpcauth_invalcred(struct rpc_task *);
+ int rpcauth_uptodatecred(struct rpc_task *);
+-int rpcauth_init_credcache(struct rpc_auth *, unsigned long);
+-void rpcauth_free_credcache(struct rpc_auth *);
++int rpcauth_init_credcache(struct rpc_auth *);
++void rpcauth_destroy_credcache(struct rpc_auth *);
++void rpcauth_clear_credcache(struct rpc_cred_cache *);
+
+ static inline
+ struct rpc_cred * get_rpccred(struct rpc_cred *cred)
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth_gss.h linux-2.6.22-591/include/linux/sunrpc/auth_gss.h
+--- linux-2.6.22-570/include/linux/sunrpc/auth_gss.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/sunrpc/auth_gss.h 2007-12-21 15:36:12.000000000 -0500
+@@ -85,11 +85,6 @@
+ struct gss_upcall_msg *gc_upcall;
+ };
+
+-#define gc_uid gc_base.cr_uid
+-#define gc_count gc_base.cr_count
+-#define gc_flags gc_base.cr_flags
+-#define gc_expire gc_base.cr_expire
+-
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */
+
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/clnt.h linux-2.6.22-591/include/linux/sunrpc/clnt.h
+--- linux-2.6.22-570/include/linux/sunrpc/clnt.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/sunrpc/clnt.h 2007-12-21 15:36:12.000000000 -0500
+@@ -24,8 +24,10 @@
+ * The high-level client handle
+ */
+ struct rpc_clnt {
+- atomic_t cl_count; /* Number of clones */
+- atomic_t cl_users; /* number of references */
++ struct kref cl_kref; /* Number of references */
++ struct list_head cl_clients; /* Global list of clients */
++ struct list_head cl_tasks; /* List of tasks */
++ spinlock_t cl_lock; /* spinlock */
+ struct rpc_xprt * cl_xprt; /* transport */
+ struct rpc_procinfo * cl_procinfo; /* procedure info */
+ u32 cl_prog, /* RPC program number */
+@@ -41,10 +43,7 @@
+ unsigned int cl_softrtry : 1,/* soft timeouts */
+ cl_intr : 1,/* interruptible */
+ cl_discrtry : 1,/* disconnect before retry */
+- cl_autobind : 1,/* use getport() */
+- cl_oneshot : 1,/* dispose after use */
+- cl_dead : 1,/* abandoned */
+- cl_tag : 1;/* context tagging */
++ cl_autobind : 1;/* use getport() */
+
+ struct rpc_rtt * cl_rtt; /* RTO estimator data */
+
+@@ -111,17 +110,15 @@
+ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0)
+ #define RPC_CLNT_CREATE_INTR (1UL << 1)
+ #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2)
+-#define RPC_CLNT_CREATE_ONESHOT (1UL << 3)
+-#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4)
+-#define RPC_CLNT_CREATE_NOPING (1UL << 5)
+-#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6)
++#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3)
++#define RPC_CLNT_CREATE_NOPING (1UL << 4)
++#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5)
+
+ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
+ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
+ struct rpc_program *, int);
+ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
+-int rpc_shutdown_client(struct rpc_clnt *);
+-int rpc_destroy_client(struct rpc_clnt *);
++void rpc_shutdown_client(struct rpc_clnt *);
+ void rpc_release_client(struct rpc_clnt *);
+ int rpcb_register(u32, u32, int, unsigned short, int *);
+ void rpcb_getport(struct rpc_task *);
+@@ -133,13 +130,14 @@
+ void *calldata);
+ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
+ int flags);
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
++ int flags);
+ void rpc_restart_call(struct rpc_task *);
+ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
+ size_t rpc_max_payload(struct rpc_clnt *);
+ void rpc_force_rebind(struct rpc_clnt *);
+-int rpc_ping(struct rpc_clnt *clnt, int flags);
+ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
+ char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/gss_api.h linux-2.6.22-591/include/linux/sunrpc/gss_api.h
+--- linux-2.6.22-570/include/linux/sunrpc/gss_api.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/sunrpc/gss_api.h 2007-12-21 15:36:12.000000000 -0500
+@@ -77,7 +77,7 @@
+ struct module *gm_owner;
+ struct xdr_netobj gm_oid;
+ char *gm_name;
+- struct gss_api_ops *gm_ops;
++ const struct gss_api_ops *gm_ops;
+ /* pseudoflavors supported by this mechanism: */
+ int gm_pf_num;
+ struct pf_desc * gm_pfs;
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -23,9 +23,11 @@
+ void *private;
+ struct list_head pipe;
+ struct list_head in_upcall;
++ struct list_head in_downcall;
+ int pipelen;
+ int nreaders;
+ int nwriters;
++ int nkern_readwriters;
+ wait_queue_head_t waitq;
+ #define RPC_PIPE_WAIT_FOR_OPEN 1
+ int flags;
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/sched.h linux-2.6.22-591/include/linux/sunrpc/sched.h
+--- linux-2.6.22-570/include/linux/sunrpc/sched.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/sunrpc/sched.h 2007-12-21 15:36:12.000000000 -0500
+@@ -110,11 +110,6 @@
+ if (!list_empty(head) && \
+ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
+
+-/* .. and walking list of all tasks */
+-#define alltask_for_each(task, pos, head) \
+- list_for_each(pos, head) \
+- if ((task=list_entry(pos, struct rpc_task, tk_task)),1)
+-
+ typedef void (*rpc_action)(struct rpc_task *);
+
+ struct rpc_call_ops {
+diff -Nurb linux-2.6.22-570/include/linux/syscalls.h linux-2.6.22-591/include/linux/syscalls.h
+--- linux-2.6.22-570/include/linux/syscalls.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/syscalls.h 2007-12-21 15:36:12.000000000 -0500
+@@ -110,6 +110,9 @@
+ asmlinkage long sys_capset(cap_user_header_t header,
+ const cap_user_data_t data);
+ asmlinkage long sys_personality(u_long personality);
++asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
++ loff_t offset, loff_t nbytes);
++
+
+ asmlinkage long sys_sigpending(old_sigset_t __user *set);
+ asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
+@@ -612,7 +615,11 @@
+ asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
+ const struct itimerspec __user *utmr);
+ asmlinkage long sys_eventfd(unsigned int count);
++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
+
+ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+
++asmlinkage long sys_revokeat(int dfd, const char __user *filename);
++asmlinkage long sys_frevoke(unsigned int fd);
++
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/sysctl.h linux-2.6.22-591/include/linux/sysctl.h
+--- linux-2.6.22-570/include/linux/sysctl.h 2007-12-21 15:36:02.000000000 -0500
++++ linux-2.6.22-591/include/linux/sysctl.h 2007-12-21 15:36:14.000000000 -0500
+@@ -31,6 +31,7 @@
+
+ struct file;
+ struct completion;
++struct net;
+
+ #define CTL_MAXNAME 10 /* how many path components do we allow in a
+ call to sysctl? In other words, what is
+@@ -166,6 +167,7 @@
+ KERN_MAX_LOCK_DEPTH=74,
+ KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
+ KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
++ KERN_POWEROFF_CMD=77, /* string: poweroff command line */
+ };
+
+
+@@ -208,6 +210,7 @@
+ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
+ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
+ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
++ VM_HUGETLB_TREAT_MOVABLE=36, /* Allocate hugepages from ZONE_MOVABLE */
+
+ /* s390 vm cmm sysctls */
+ VM_CMM_PAGES=1111,
+@@ -843,6 +846,9 @@
+ };
+
+ /* CTL_DEBUG names: */
++enum {
++ DEBUG_UNHANDLED_SIGNALS = 1,
++};
+
+ /* CTL_DEV names: */
+ enum {
+@@ -980,6 +986,7 @@
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen);
+
++extern ctl_handler sysctl_data;
+ extern ctl_handler sysctl_string;
+ extern ctl_handler sysctl_intvec;
+ extern ctl_handler sysctl_jiffies;
+@@ -1056,6 +1063,12 @@
+
+ void unregister_sysctl_table(struct ctl_table_header * table);
+
++#ifdef CONFIG_NET
++extern struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table);
++extern void unregister_net_sysctl_table(struct ctl_table_header *header);
++extern ctl_table net_root_table[];
++#endif
++
+ #else /* __KERNEL__ */
+
+ #endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/linux/sysdev.h linux-2.6.22-591/include/linux/sysdev.h
+--- linux-2.6.22-570/include/linux/sysdev.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/sysdev.h 2007-12-21 15:36:12.000000000 -0500
+@@ -101,8 +101,7 @@
+
+ #define _SYSDEV_ATTR(_name,_mode,_show,_store) \
+ { \
+- .attr = { .name = __stringify(_name), .mode = _mode, \
+- .owner = THIS_MODULE }, \
++ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ }
+diff -Nurb linux-2.6.22-570/include/linux/sysfs.h linux-2.6.22-591/include/linux/sysfs.h
+--- linux-2.6.22-570/include/linux/sysfs.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/sysfs.h 2007-12-21 15:36:14.000000000 -0500
+@@ -19,9 +19,11 @@
+
+ struct kobject;
+ struct module;
+-struct nameidata;
+-struct dentry;
+
++/* FIXME
++ * The *owner field is no longer used, but leave around
++ * until the tree gets cleaned up fully.
++ */
+ struct attribute {
+ const char * name;
+ struct module * owner;
+@@ -41,13 +43,13 @@
+ */
+
+ #define __ATTR(_name,_mode,_show,_store) { \
+- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \
++ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ }
+
+ #define __ATTR_RO(_name) { \
+- .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
++ .attr = { .name = __stringify(_name), .mode = 0444 }, \
+ .show = _name##_show, \
+ }
+
+@@ -61,8 +63,10 @@
+ struct attribute attr;
+ size_t size;
+ void *private;
+- ssize_t (*read)(struct kobject *, char *, loff_t, size_t);
+- ssize_t (*write)(struct kobject *, char *, loff_t, size_t);
++ ssize_t (*read)(struct kobject *, struct bin_attribute *,
++ char *, loff_t, size_t);
++ ssize_t (*write)(struct kobject *, struct bin_attribute *,
++ char *, loff_t, size_t);
+ int (*mmap)(struct kobject *, struct bin_attribute *attr,
+ struct vm_area_struct *vma);
+ };
+@@ -72,12 +76,23 @@
+ ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
+ };
+
++struct shadow_dir_operations {
++ const void *(*current_tag)(void);
++ const void *(*kobject_tag)(struct kobject *kobj);
++};
++
++#define SYSFS_TYPE_MASK 0x00ff
+ #define SYSFS_ROOT 0x0001
+ #define SYSFS_DIR 0x0002
+ #define SYSFS_KOBJ_ATTR 0x0004
+ #define SYSFS_KOBJ_BIN_ATTR 0x0008
+ #define SYSFS_KOBJ_LINK 0x0020
+-#define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
++#define SYSFS_SHADOW_DIR 0x0040
++#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
++
++#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
++#define SYSFS_FLAG_REMOVED 0x0100
++#define SYSFS_FLAG_SHADOWED 0x0200
+
+ #ifdef CONFIG_SYSFS
+
+@@ -85,13 +100,13 @@
+ void (*func)(void *), void *data, struct module *owner);
+
+ extern int __must_check
+-sysfs_create_dir(struct kobject *, struct dentry *);
++sysfs_create_dir(struct kobject *);
+
+ extern void
+ sysfs_remove_dir(struct kobject *);
+
+ extern int __must_check
+-sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name);
++sysfs_rename_dir(struct kobject *kobj, const char *new_name);
+
+ extern int __must_check
+ sysfs_move_dir(struct kobject *, struct kobject *);
+@@ -114,6 +129,13 @@
+ extern void
+ sysfs_remove_link(struct kobject *, const char * name);
+
++extern int
++sysfs_rename_link(struct kobject *kobj, struct kobject *target,
++ const char *old_name, const char *new_name);
++
++extern void
++sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name);
++
+ int __must_check sysfs_create_bin_file(struct kobject *kobj,
+ struct bin_attribute *attr);
+ void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
+@@ -128,11 +150,7 @@
+
+ void sysfs_notify(struct kobject * k, char *dir, char *attr);
+
+-
+-extern int sysfs_make_shadowed_dir(struct kobject *kobj,
+- void * (*follow_link)(struct dentry *, struct nameidata *));
+-extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj);
+-extern void sysfs_remove_shadow_dir(struct dentry *dir);
++int sysfs_enable_shadowing(struct kobject *, const struct shadow_dir_operations *);
+
+ extern int __must_check sysfs_init(void);
+
+@@ -144,7 +162,7 @@
+ return -ENOSYS;
+ }
+
+-static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow)
++static inline int sysfs_create_dir(struct kobject * kobj)
+ {
+ return 0;
+ }
+@@ -154,9 +172,7 @@
+ ;
+ }
+
+-static inline int sysfs_rename_dir(struct kobject * k,
+- struct dentry *new_parent,
+- const char *new_name)
++static inline int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+ {
+ return 0;
+ }
+@@ -195,6 +211,17 @@
+ ;
+ }
+
++static inline int
++sysfs_rename_link(struct kobject * k, struct kobject *t,
++ const char *old_name, const char * new_name)
++{
++ return 0;
++}
++
++static inline void
++sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name)
++{
++}
+
+ static inline int sysfs_create_bin_file(struct kobject * k, struct bin_attribute * a)
+ {
+@@ -231,8 +258,8 @@
+ {
+ }
+
+-static inline int sysfs_make_shadowed_dir(struct kobject *kobj,
+- void * (*follow_link)(struct dentry *, struct nameidata *))
++static inline int sysfs_enable_shadowing(struct kobject *kobj,
++ const struct shadow_dir_operations *shadow_ops)
+ {
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/include/linux/taskstats.h linux-2.6.22-591/include/linux/taskstats.h
+--- linux-2.6.22-570/include/linux/taskstats.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/taskstats.h 2007-12-21 15:36:12.000000000 -0500
+@@ -31,7 +31,7 @@
+ */
+
+
+-#define TASKSTATS_VERSION 4
++#define TASKSTATS_VERSION 5
+ #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
+ * in linux/sched.h */
+
+@@ -149,6 +149,9 @@
+ __u64 read_bytes; /* bytes of read I/O */
+ __u64 write_bytes; /* bytes of write I/O */
+ __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
++
++ __u64 nvcsw; /* voluntary_ctxt_switches */
++ __u64 nivcsw; /* nonvoluntary_ctxt_switches */
+ };
+
+
+diff -Nurb linux-2.6.22-570/include/linux/tick.h linux-2.6.22-591/include/linux/tick.h
+--- linux-2.6.22-570/include/linux/tick.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/tick.h 2007-12-21 15:36:12.000000000 -0500
+@@ -40,6 +40,7 @@
+ * @idle_sleeps: Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime: Time when the idle call was entered
+ * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
++ * @sleep_length: Duration of the current idle sleep
+ */
+ struct tick_sched {
+ struct hrtimer sched_timer;
+@@ -52,6 +53,7 @@
+ unsigned long idle_sleeps;
+ ktime_t idle_entrytime;
+ ktime_t idle_sleeptime;
++ ktime_t sleep_length;
+ unsigned long last_jiffies;
+ unsigned long next_jiffies;
+ ktime_t idle_expires;
+@@ -100,10 +102,18 @@
+ extern void tick_nohz_stop_sched_tick(void);
+ extern void tick_nohz_restart_sched_tick(void);
+ extern void tick_nohz_update_jiffies(void);
++extern ktime_t tick_nohz_get_sleep_length(void);
++extern unsigned long tick_nohz_get_idle_jiffies(void);
+ # else
+ static inline void tick_nohz_stop_sched_tick(void) { }
+ static inline void tick_nohz_restart_sched_tick(void) { }
+ static inline void tick_nohz_update_jiffies(void) { }
++static inline ktime_t tick_nohz_get_sleep_length(void)
++{
++ ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
++
++ return len;
++}
+ # endif /* !NO_HZ */
+
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/time.h linux-2.6.22-591/include/linux/time.h
+--- linux-2.6.22-570/include/linux/time.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/linux/time.h 2007-12-21 15:36:14.000000000 -0500
+@@ -116,6 +116,8 @@
+ extern unsigned int alarm_setitimer(unsigned int seconds);
+ extern int do_getitimer(int which, struct itimerval *value);
+ extern void getnstimeofday(struct timespec *tv);
++extern void getboottime(struct timespec *ts);
++extern void monotonic_to_bootbased(struct timespec *ts);
+
+ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
+ extern int timekeeping_is_continuous(void);
+diff -Nurb linux-2.6.22-570/include/linux/union_fs.h linux-2.6.22-591/include/linux/union_fs.h
+--- linux-2.6.22-570/include/linux/union_fs.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/union_fs.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _LINUX_UNION_FS_H
++#define _LINUX_UNION_FS_H
++
++#define UNIONFS_VERSION "2.0"
++/*
++ * DEFINITIONS FOR USER AND KERNEL CODE:
++ */
++# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
++# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
++
++/* We don't support normal remount, but unionctl uses it. */
++# define UNIONFS_REMOUNT_MAGIC 0x4a5a4380
++
++/* should be at least LAST_USED_UNIONFS_PERMISSION<<1 */
++#define MAY_NFSRO 16
++
++#endif /* _LINUX_UNIONFS_H */
++
+diff -Nurb linux-2.6.22-570/include/linux/unwind.h linux-2.6.22-591/include/linux/unwind.h
+--- linux-2.6.22-570/include/linux/unwind.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/unwind.h 2007-12-21 15:36:12.000000000 -0500
+@@ -14,6 +14,63 @@
+
+ struct module;
+
++#ifdef CONFIG_STACK_UNWIND
++
++#include <asm/unwind.h>
++
++#ifndef ARCH_UNWIND_SECTION_NAME
++#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
++#endif
++
++/*
++ * Initialize unwind support.
++ */
++extern void unwind_init(void);
++extern void unwind_setup(void);
++
++#ifdef CONFIG_MODULES
++
++extern void *unwind_add_table(struct module *,
++ const void *table_start,
++ unsigned long table_size);
++
++extern void unwind_remove_table(void *handle, int init_only);
++
++#endif
++
++extern int unwind_init_frame_info(struct unwind_frame_info *,
++ struct task_struct *,
++ /*const*/ struct pt_regs *);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++extern int unwind_init_blocked(struct unwind_frame_info *,
++ struct task_struct *);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++extern int unwind_init_running(struct unwind_frame_info *,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg);
++
++/*
++ * Unwind to previous to frame. Returns 0 if successful, negative
++ * number in case of an error.
++ */
++extern int unwind(struct unwind_frame_info *);
++
++/*
++ * Unwind until the return pointer is in user-land (or until an error
++ * occurs). Returns 0 if successful, negative number in case of
++ * error.
++ */
++extern int unwind_to_user(struct unwind_frame_info *);
++
++#else
++
+ struct unwind_frame_info {};
+
+ static inline void unwind_init(void) {}
+@@ -28,12 +85,12 @@
+ return NULL;
+ }
+
++#endif
++
+ static inline void unwind_remove_table(void *handle, int init_only)
+ {
+ }
+
+-#endif
+-
+ static inline int unwind_init_frame_info(struct unwind_frame_info *info,
+ struct task_struct *tsk,
+ const struct pt_regs *regs)
+@@ -65,4 +122,6 @@
+ return -ENOSYS;
+ }
+
++#endif
++
+ #endif /* _LINUX_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/linux/usb.h linux-2.6.22-591/include/linux/usb.h
+--- linux-2.6.22-570/include/linux/usb.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/usb.h 2007-12-21 15:36:14.000000000 -0500
+@@ -146,6 +146,10 @@
+ * active alternate setting */
+ unsigned num_altsetting; /* number of alternate settings */
+
++ /* If there is an interface association descriptor then it will list
++ * the associated interfaces */
++ struct usb_interface_assoc_descriptor *intf_assoc;
++
+ int minor; /* minor number this interface is
+ * bound to */
+ enum usb_interface_condition condition; /* state of binding */
+@@ -175,6 +179,7 @@
+
+ /* this maximum is arbitrary */
+ #define USB_MAXINTERFACES 32
++#define USB_MAXIADS USB_MAXINTERFACES/2
+
+ /**
+ * struct usb_interface_cache - long-term representation of a device interface
+@@ -245,6 +250,11 @@
+ struct usb_config_descriptor desc;
+
+ char *string; /* iConfiguration string, if present */
++
++ /* List of any Interface Association Descriptors in this
++ * configuration. */
++ struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS];
++
+ /* the interfaces associated with this configuration,
+ * stored in no particular order */
+ struct usb_interface *interface[USB_MAXINTERFACES];
+diff -Nurb linux-2.6.22-570/include/linux/user_namespace.h linux-2.6.22-591/include/linux/user_namespace.h
+--- linux-2.6.22-570/include/linux/user_namespace.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/linux/user_namespace.h 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,61 @@
++#ifndef _LINUX_USER_NAMESPACE_H
++#define _LINUX_USER_NAMESPACE_H
++
++#include <linux/kref.h>
++#include <linux/nsproxy.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++
++#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
++#define UIDHASH_SZ (1 << UIDHASH_BITS)
++
++struct user_namespace {
++ struct kref kref;
++ struct list_head uidhash_table[UIDHASH_SZ];
++ struct user_struct *root_user;
++};
++
++extern struct user_namespace init_user_ns;
++
++#ifdef CONFIG_USER_NS
++
++static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
++{
++ if (ns)
++ kref_get(&ns->kref);
++ return ns;
++}
++
++extern struct user_namespace *copy_user_ns(int flags,
++ struct user_namespace *old_ns);
++extern void free_user_ns(struct kref *kref);
++
++static inline void put_user_ns(struct user_namespace *ns)
++{
++ if (ns)
++ kref_put(&ns->kref, free_user_ns);
++}
++
++#else
++
++static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
++{
++ return &init_user_ns;
++}
++
++static inline struct user_namespace *copy_user_ns(int flags,
++ struct user_namespace *old_ns)
++{
++ if (flags & CLONE_NEWUSER)
++ return ERR_PTR(-EINVAL);
++
++ return NULL;
++}
++
++static inline void put_user_ns(struct user_namespace *ns)
++{
++}
++
++#endif
++
++#endif /* _LINUX_USER_H */
+diff -Nurb linux-2.6.22-570/include/linux/utsname.h linux-2.6.22-591/include/linux/utsname.h
+--- linux-2.6.22-570/include/linux/utsname.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/utsname.h 2007-12-21 15:36:12.000000000 -0500
+@@ -48,26 +48,14 @@
+ kref_get(&ns->kref);
+ }
+
+-#ifdef CONFIG_UTS_NS
+-extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns);
++extern struct uts_namespace *copy_utsname(unsigned long flags,
++ struct uts_namespace *ns);
+ extern void free_uts_ns(struct kref *kref);
+
+ static inline void put_uts_ns(struct uts_namespace *ns)
+ {
+ kref_put(&ns->kref, free_uts_ns);
+ }
+-#else
+-static inline struct uts_namespace *copy_utsname(int flags,
+- struct uts_namespace *ns)
+-{
+- return ns;
+-}
+-
+-static inline void put_uts_ns(struct uts_namespace *ns)
+-{
+-}
+-#endif
+-
+ static inline struct new_utsname *utsname(void)
+ {
+ return ¤t->nsproxy->uts_ns->name;
+diff -Nurb linux-2.6.22-570/include/linux/vmalloc.h linux-2.6.22-591/include/linux/vmalloc.h
+--- linux-2.6.22-570/include/linux/vmalloc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/linux/vmalloc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -65,9 +65,10 @@
+ unsigned long flags, int node,
+ gfp_t gfp_mask);
+ extern struct vm_struct *remove_vm_area(void *addr);
++
+ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
+ struct page ***pages);
+-extern void unmap_vm_area(struct vm_struct *area);
++extern void unmap_kernel_range(unsigned long addr, unsigned long size);
+
+ /*
+ * Internals. Dont't use..
+diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-591/include/net/addrconf.h
+--- linux-2.6.22-570/include/net/addrconf.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/net/addrconf.h 2007-12-21 15:36:12.000000000 -0500
+@@ -61,7 +61,7 @@
+ extern int ipv6_chk_addr(struct in6_addr *addr,
+ struct net_device *dev,
+ int strict);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ extern int ipv6_chk_home_addr(struct in6_addr *addr);
+ #endif
+ extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr,
+diff -Nurb linux-2.6.22-570/include/net/af_unix.h linux-2.6.22-591/include/net/af_unix.h
+--- linux-2.6.22-570/include/net/af_unix.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/net/af_unix.h 2007-12-21 15:36:14.000000000 -0500
+@@ -91,12 +91,11 @@
+ #define unix_sk(__sk) ((struct unix_sock *)__sk)
+
+ #ifdef CONFIG_SYSCTL
+-extern int sysctl_unix_max_dgram_qlen;
+-extern void unix_sysctl_register(void);
+-extern void unix_sysctl_unregister(void);
++extern void unix_sysctl_register(struct net *net);
++extern void unix_sysctl_unregister(struct net *net);
+ #else
+-static inline void unix_sysctl_register(void) {}
+-static inline void unix_sysctl_unregister(void) {}
++static inline void unix_sysctl_register(struct net *net) {}
++static inline void unix_sysctl_unregister(struct net *net) {}
+ #endif
+ #endif
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/arp.h linux-2.6.22-591/include/net/arp.h
+--- linux-2.6.22-570/include/net/arp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/arp.h 2007-12-21 15:36:14.000000000 -0500
+@@ -11,7 +11,7 @@
+
+ extern void arp_init(void);
+ extern int arp_find(unsigned char *haddr, struct sk_buff *skb);
+-extern int arp_ioctl(unsigned int cmd, void __user *arg);
++extern int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+ extern void arp_send(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
+ unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th);
+diff -Nurb linux-2.6.22-570/include/net/dst.h linux-2.6.22-591/include/net/dst.h
+--- linux-2.6.22-570/include/net/dst.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/dst.h 2007-12-21 15:36:12.000000000 -0500
+@@ -47,7 +47,6 @@
+ #define DST_NOXFRM 2
+ #define DST_NOPOLICY 4
+ #define DST_NOHASH 8
+-#define DST_BALANCED 0x10
+ unsigned long expires;
+
+ unsigned short header_len; /* more space at head required */
+diff -Nurb linux-2.6.22-570/include/net/fib_rules.h linux-2.6.22-591/include/net/fib_rules.h
+--- linux-2.6.22-570/include/net/fib_rules.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/fib_rules.h 2007-12-21 15:36:14.000000000 -0500
+@@ -56,12 +56,12 @@
+ int (*fill)(struct fib_rule *, struct sk_buff *,
+ struct nlmsghdr *,
+ struct fib_rule_hdr *);
+- u32 (*default_pref)(void);
++ u32 (*default_pref)(struct fib_rules_ops *ops);
+ size_t (*nlmsg_payload)(struct fib_rule *);
+
+ /* Called after modifications to the rules set, must flush
+ * the route cache if one exists. */
+- void (*flush_cache)(void);
++ void (*flush_cache)(struct fib_rules_ops *ops);
+
+ int nlgroup;
+ const struct nla_policy *policy;
+@@ -101,8 +101,8 @@
+ return frh->table;
+ }
+
+-extern int fib_rules_register(struct fib_rules_ops *);
+-extern int fib_rules_unregister(struct fib_rules_ops *);
++extern int fib_rules_register(struct net *net, struct fib_rules_ops *);
++extern int fib_rules_unregister(struct net *net, struct fib_rules_ops *);
+
+ extern int fib_rules_lookup(struct fib_rules_ops *,
+ struct flowi *, int flags,
+diff -Nurb linux-2.6.22-570/include/net/flow.h linux-2.6.22-591/include/net/flow.h
+--- linux-2.6.22-570/include/net/flow.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/flow.h 2007-12-21 15:36:14.000000000 -0500
+@@ -8,9 +8,11 @@
+ #define _NET_FLOW_H
+
+ #include <linux/in6.h>
++#include <net/net_namespace.h>
+ #include <asm/atomic.h>
+
+ struct flowi {
++ struct net *fl_net;
+ int oif;
+ int iif;
+ __u32 mark;
+@@ -67,20 +69,16 @@
+
+ __be32 spi;
+
+-#ifdef CONFIG_IPV6_MIP6
+ struct {
+ __u8 type;
+ } mht;
+-#endif
+ } uli_u;
+ #define fl_ip_sport uli_u.ports.sport
+ #define fl_ip_dport uli_u.ports.dport
+ #define fl_icmp_type uli_u.icmpt.type
+ #define fl_icmp_code uli_u.icmpt.code
+ #define fl_ipsec_spi uli_u.spi
+-#ifdef CONFIG_IPV6_MIP6
+ #define fl_mh_type uli_u.mht.type
+-#endif
+ __u32 secid; /* used by xfrm; see secid.txt */
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+
+diff -Nurb linux-2.6.22-570/include/net/inet6_hashtables.h linux-2.6.22-591/include/net/inet6_hashtables.h
+--- linux-2.6.22-570/include/net/inet6_hashtables.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/inet6_hashtables.h 2007-12-21 15:36:14.000000000 -0500
+@@ -62,31 +62,31 @@
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum,
+- const int dif);
++ const int dif, struct net *net);
+
+ extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+ const struct in6_addr *daddr,
+ const unsigned short hnum,
+- const int dif);
++ const int dif, struct net *net);
+
+ static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
+- daddr, hnum, dif);
++ daddr, hnum, dif, net);
+ if (sk)
+ return sk;
+
+- return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
++ return inet6_lookup_listener(hashinfo, daddr, hnum, dif, net);
+ }
+
+ extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+- const int dif);
++ const int dif, struct net *net);
+ #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
+ #endif /* _INET6_HASHTABLES_H */
+diff -Nurb linux-2.6.22-570/include/net/inet_hashtables.h linux-2.6.22-591/include/net/inet_hashtables.h
+--- linux-2.6.22-570/include/net/inet_hashtables.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/inet_hashtables.h 2007-12-21 15:36:14.000000000 -0500
+@@ -75,6 +75,7 @@
+ * ports are created in O(1) time? I thought so. ;-) -DaveM
+ */
+ struct inet_bind_bucket {
++ struct net *net;
+ unsigned short port;
+ signed short fastreuse;
+ struct hlist_node node;
+@@ -138,34 +139,35 @@
+ extern struct inet_bind_bucket *
+ inet_bind_bucket_create(struct kmem_cache *cachep,
+ struct inet_bind_hashbucket *head,
++ struct net *net,
+ const unsigned short snum);
+ extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
+ struct inet_bind_bucket *tb);
+
+-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
++static inline int inet_bhashfn(struct net *net, const __u16 lport, const int bhash_size)
+ {
+- return lport & (bhash_size - 1);
++ return (((unsigned long)net) ^ lport) & (bhash_size - 1);
+ }
+
+ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ const unsigned short snum);
+
+ /* These can have wildcards, don't try too hard. */
+-static inline int inet_lhashfn(const unsigned short num)
++static inline int inet_lhashfn(struct net *net, const unsigned short num)
+ {
+- return num & (INET_LHTABLE_SIZE - 1);
++ return (((unsigned long)net) ^ num) & (INET_LHTABLE_SIZE - 1);
+ }
+
+ static inline int inet_sk_listen_hashfn(const struct sock *sk)
+ {
+- return inet_lhashfn(inet_sk(sk)->num);
++ return inet_lhashfn(sk->sk_net, inet_sk(sk)->num);
+ }
+
+ /* Caller must disable local BH processing. */
+ static inline void __inet_inherit_port(struct inet_hashinfo *table,
+ struct sock *sk, struct sock *child)
+ {
+- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
++ const int bhash = inet_bhashfn(sk->sk_net, inet_sk(child)->num, table->bhash_size);
+ struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ struct inet_bind_bucket *tb;
+
+@@ -274,12 +276,13 @@
+ extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ const __be32 daddr,
+ const unsigned short hnum,
+- const int dif);
++ const int dif, struct net *net);
+
+ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+- __be32 daddr, __be16 dport, int dif)
++ __be32 daddr, __be16 dport,
++ int dif, struct net *net)
+ {
+- return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif);
++ return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif, net);
+ }
+
+ /* Socket demux engine toys. */
+@@ -313,30 +316,34 @@
+ (((__force __u64)(__be32)(__daddr)) << 32) | \
+ ((__force __u64)(__be32)(__saddr)));
+ #endif /* __BIG_ENDIAN */
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\
+ (((__sk)->sk_hash == (__hash)) && \
+ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
+ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \
+- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++ ((__sk)->sk_net == __net))
++#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\
+ (((__sk)->sk_hash == (__hash)) && \
+ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
+ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
+- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++ ((__sk)->sk_net == __net))
+ #else /* 32-bit arch */
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net) \
+ (((__sk)->sk_hash == (__hash)) && \
+ (inet_sk(__sk)->daddr == (__saddr)) && \
+ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
+ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \
+- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++ ((__sk)->sk_net == __net))
++#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __net) \
+ (((__sk)->sk_hash == (__hash)) && \
+ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
+ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
+ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
+- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++ ((__sk)->sk_net == __net))
+ #endif /* 64-bit arch */
+
+ /*
+@@ -349,7 +356,7 @@
+ __inet_lookup_established(struct inet_hashinfo *hashinfo,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const u16 hnum,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+@@ -358,19 +365,19 @@
+ /* Optimize here for direct hit, only listening connections can
+ * have wildcards anyways.
+ */
+- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
++ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+
+ prefetch(head->chain.first);
+ read_lock(&head->lock);
+ sk_for_each(sk, node, &head->chain) {
+- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net))
+ goto hit; /* You sunk my battleship! */
+ }
+
+ /* Must check for a TIME_WAIT'er before going to listener hash. */
+ sk_for_each(sk, node, &head->twchain) {
+- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net))
+ goto hit;
+ }
+ sk = NULL;
+@@ -386,32 +393,32 @@
+ inet_lookup_established(struct inet_hashinfo *hashinfo,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ return __inet_lookup_established(hashinfo, saddr, sport, daddr,
+- ntohs(dport), dif);
++ ntohs(dport), dif, net);
+ }
+
+ static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ u16 hnum = ntohs(dport);
+ struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
+- hnum, dif);
+- return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif);
++ hnum, dif, net);
++ return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif, net);
+ }
+
+ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const __be16 dport,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *sk;
+
+ local_bh_disable();
+- sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif);
++ sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif, net);
+ local_bh_enable();
+
+ return sk;
+diff -Nurb linux-2.6.22-570/include/net/inet_sock.h linux-2.6.22-591/include/net/inet_sock.h
+--- linux-2.6.22-570/include/net/inet_sock.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/inet_sock.h 2007-12-21 15:36:14.000000000 -0500
+@@ -171,10 +171,12 @@
+ extern u32 inet_ehash_secret;
+ extern void build_ehash_secret(void);
+
+-static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
++static inline unsigned int inet_ehashfn(struct net *net,
++ const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport)
+ {
+- return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr,
++ return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr ^
++ (__force __u32) ((unsigned long)net),
+ ((__u32) lport) << 16 | (__force __u32)fport,
+ inet_ehash_secret);
+ }
+@@ -187,7 +189,7 @@
+ const __be32 faddr = inet->daddr;
+ const __be16 fport = inet->dport;
+
+- return inet_ehashfn(laddr, lport, faddr, fport);
++ return inet_ehashfn(sk->sk_net, laddr, lport, faddr, fport);
+ }
+
+ #endif /* _INET_SOCK_H */
+diff -Nurb linux-2.6.22-570/include/net/inet_timewait_sock.h linux-2.6.22-591/include/net/inet_timewait_sock.h
+--- linux-2.6.22-570/include/net/inet_timewait_sock.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/net/inet_timewait_sock.h 2007-12-21 15:36:14.000000000 -0500
+@@ -115,6 +115,7 @@
+ #define tw_refcnt __tw_common.skc_refcnt
+ #define tw_hash __tw_common.skc_hash
+ #define tw_prot __tw_common.skc_prot
++#define tw_net __tw_common.skc_net
+ #define tw_xid __tw_common.skc_xid
+ #define tw_vx_info __tw_common.skc_vx_info
+ #define tw_nid __tw_common.skc_nid
+diff -Nurb linux-2.6.22-570/include/net/inetpeer.h linux-2.6.22-591/include/net/inetpeer.h
+--- linux-2.6.22-570/include/net/inetpeer.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/inetpeer.h 2007-12-21 15:36:14.000000000 -0500
+@@ -15,6 +15,8 @@
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
+
++struct net;
++
+ struct inet_peer
+ {
+ /* group together avl_left,avl_right,v4daddr to speedup lookups */
+@@ -22,7 +24,11 @@
+ __be32 v4daddr; /* peer's address */
+ __u16 avl_height;
+ __u16 ip_id_count; /* IP ID for the next packet */
+- struct inet_peer *unused_next, **unused_prevp;
++ union {
++ struct inet_peer *unused_next;
++ struct net *net;
++ } u;
++ struct inet_peer **unused_prevp;
+ __u32 dtime; /* the time of last use of not
+ * referenced entries */
+ atomic_t refcnt;
+@@ -34,7 +40,7 @@
+ void inet_initpeers(void) __init;
+
+ /* can be called with or without local BH being disabled */
+-struct inet_peer *inet_getpeer(__be32 daddr, int create);
++struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create);
+
+ /* can be called from BH context or outside */
+ extern void inet_putpeer(struct inet_peer *p);
+diff -Nurb linux-2.6.22-570/include/net/ip.h linux-2.6.22-591/include/net/ip.h
+--- linux-2.6.22-570/include/net/ip.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/ip.h 2007-12-21 15:36:14.000000000 -0500
+@@ -149,13 +149,6 @@
+ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ unsigned int len);
+
+-struct ipv4_config
+-{
+- int log_martians;
+- int no_pmtu_disc;
+-};
+-
+-extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+ #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field)
+ #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field)
+@@ -171,27 +164,6 @@
+ extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
+ extern void snmp_mib_free(void *ptr[2]);
+
+-extern int sysctl_local_port_range[2];
+-extern int sysctl_ip_default_ttl;
+-extern int sysctl_ip_nonlocal_bind;
+-
+-/* From ip_fragment.c */
+-extern int sysctl_ipfrag_high_thresh;
+-extern int sysctl_ipfrag_low_thresh;
+-extern int sysctl_ipfrag_time;
+-extern int sysctl_ipfrag_secret_interval;
+-extern int sysctl_ipfrag_max_dist;
+-
+-/* From inetpeer.c */
+-extern int inet_peer_threshold;
+-extern int inet_peer_minttl;
+-extern int inet_peer_maxttl;
+-extern int inet_peer_gc_mintime;
+-extern int inet_peer_gc_maxtime;
+-
+-/* From ip_output.c */
+-extern int sysctl_ip_dynaddr;
+-
+ extern void ipfrag_init(void);
+
+ #ifdef CONFIG_INET
+@@ -332,8 +304,6 @@
+ };
+
+ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
+-extern int ip_frag_nqueues;
+-extern atomic_t ip_frag_mem;
+
+ /*
+ * Functions provided by ip_forward.c
+@@ -392,5 +362,6 @@
+ #endif
+
+ extern struct ctl_table ipv4_table[];
++extern struct ctl_table multi_ipv4_table[];
+
+ #endif /* _IP_H */
+diff -Nurb linux-2.6.22-570/include/net/ip_fib.h linux-2.6.22-591/include/net/ip_fib.h
+--- linux-2.6.22-570/include/net/ip_fib.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/ip_fib.h 2007-12-21 15:36:14.000000000 -0500
+@@ -39,7 +39,6 @@
+ int fc_mx_len;
+ int fc_mp_len;
+ u32 fc_flow;
+- u32 fc_mp_alg;
+ u32 fc_nlflags;
+ struct nl_info fc_nlinfo;
+ };
+@@ -89,6 +88,7 @@
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+ u32 fib_mp_alg;
+ #endif
++ struct net * fib_net;
+ struct fib_nh fib_nh[0];
+ #define fib_dev fib_nh[0].nh_dev
+ };
+@@ -103,10 +103,6 @@
+ unsigned char nh_sel;
+ unsigned char type;
+ unsigned char scope;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- __be32 network;
+- __be32 netmask;
+-#endif
+ struct fib_info *fi;
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ struct fib_rule *r;
+@@ -145,14 +141,6 @@
+ #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
+ #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
+
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-#define FIB_RES_NETWORK(res) ((res).network)
+-#define FIB_RES_NETMASK(res) ((res).netmask)
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-#define FIB_RES_NETWORK(res) (0)
+-#define FIB_RES_NETMASK(res) (0)
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
+-
+ struct fib_table {
+ struct hlist_node tb_hlist;
+ u32 tb_id;
+@@ -171,43 +159,43 @@
+
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+
+-extern struct fib_table *ip_fib_local_table;
+-extern struct fib_table *ip_fib_main_table;
+-
+-static inline struct fib_table *fib_get_table(u32 id)
++static inline struct fib_table *fib_get_table(struct net *net, u32 id)
+ {
+ if (id != RT_TABLE_LOCAL)
+- return ip_fib_main_table;
+- return ip_fib_local_table;
++ return net->ip_fib_main_table;
++ return net->ip_fib_local_table;
+ }
+
+-static inline struct fib_table *fib_new_table(u32 id)
++static inline struct fib_table *fib_new_table(struct net *net, u32 id)
+ {
+- return fib_get_table(id);
++ return fib_get_table(net, id);
+ }
+
+ static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
+ {
+- if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) &&
+- ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res))
++ struct net *net = flp->fl_net;
++ struct fib_table *local_table = net->ip_fib_local_table;
++ struct fib_table *main_table = net->ip_fib_main_table;
++ if (local_table->tb_lookup(local_table, flp, res) &&
++ main_table->tb_lookup(main_table, flp, res))
+ return -ENETUNREACH;
+ return 0;
+ }
+
+ static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
+ {
++ struct net *net = flp->fl_net;
++ struct fib_table *main_table = net->ip_fib_main_table;
+ if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+- ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res);
++ main_table->tb_select_default(main_table, flp, res);
+ }
+
+ #else /* CONFIG_IP_MULTIPLE_TABLES */
+-#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
+-#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
+
+ extern int fib_lookup(struct flowi *flp, struct fib_result *res);
+
+-extern struct fib_table *fib_new_table(u32 id);
+-extern struct fib_table *fib_get_table(u32 id);
++extern struct fib_table *fib_new_table(struct net *net, u32 id);
++extern struct fib_table *fib_get_table(struct net *net, u32 id);
+ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
+
+ #endif /* CONFIG_IP_MULTIPLE_TABLES */
+@@ -223,15 +211,17 @@
+
+ /* Exported by fib_semantics.c */
+ extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
+-extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
++extern int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force);
+ extern int fib_sync_up(struct net_device *dev);
+ extern __be32 __fib_res_prefsrc(struct fib_result *res);
+
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(u32 id);
++extern void fib_hash_exit(struct fib_table *tb);
+
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+-extern void __init fib4_rules_init(void);
++extern void fib4_rules_init(struct net * net);
++extern void fib4_rules_exit(struct net * net);
+
+ #ifdef CONFIG_NET_CLS_ROUTE
+ extern u32 fib_rules_tclass(struct fib_result *res);
+@@ -274,8 +264,11 @@
+ }
+
+ #ifdef CONFIG_PROC_FS
+-extern int fib_proc_init(void);
+-extern void fib_proc_exit(void);
++extern int fib_proc_init(struct net * net);
++extern void fib_proc_exit(struct net * net);
+ #endif
+
++extern int fib_info_init(struct net *net);
++extern void fib_info_exit(struct net *net);
++
+ #endif /* _NET_FIB_H */
+diff -Nurb linux-2.6.22-570/include/net/ip_mp_alg.h linux-2.6.22-591/include/net/ip_mp_alg.h
+--- linux-2.6.22-570/include/net/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,96 +0,0 @@
+-/* ip_mp_alg.h: IPV4 multipath algorithm support.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#ifndef _NET_IP_MP_ALG_H
+-#define _NET_IP_MP_ALG_H
+-
+-#include <linux/ip_mp_alg.h>
+-#include <net/flow.h>
+-#include <net/route.h>
+-
+-struct fib_nh;
+-
+-struct ip_mp_alg_ops {
+- void (*mp_alg_select_route)(const struct flowi *flp,
+- struct rtable *rth, struct rtable **rp);
+- void (*mp_alg_flush)(void);
+- void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask,
+- unsigned char prefixlen,
+- const struct fib_nh *nh);
+- void (*mp_alg_remove)(struct rtable *rth);
+-};
+-
+-extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
+-extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
+-
+-extern struct ip_mp_alg_ops *ip_mp_alg_table[];
+-
+-static inline int multipath_select_route(const struct flowi *flp,
+- struct rtable *rth,
+- struct rtable **rp)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+- /* mp_alg_select_route _MUST_ be implemented */
+- if (ops && (rth->u.dst.flags & DST_BALANCED)) {
+- ops->mp_alg_select_route(flp, rth, rp);
+- return 1;
+- }
+-#endif
+- return 0;
+-}
+-
+-static inline void multipath_flush(void)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- int i;
+-
+- for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
+- struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
+-
+- if (ops && ops->mp_alg_flush)
+- ops->mp_alg_flush();
+- }
+-#endif
+-}
+-
+-static inline void multipath_set_nhinfo(struct rtable *rth,
+- __be32 network, __be32 netmask,
+- unsigned char prefixlen,
+- const struct fib_nh *nh)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+- if (ops && ops->mp_alg_set_nhinfo)
+- ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
+-#endif
+-}
+-
+-static inline void multipath_remove(struct rtable *rth)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+- if (ops && ops->mp_alg_remove &&
+- (rth->u.dst.flags & DST_BALANCED))
+- ops->mp_alg_remove(rth);
+-#endif
+-}
+-
+-static inline int multipath_comparekeys(const struct flowi *flp1,
+- const struct flowi *flp2)
+-{
+- return flp1->fl4_dst == flp2->fl4_dst &&
+- flp1->fl4_src == flp2->fl4_src &&
+- flp1->oif == flp2->oif &&
+- flp1->mark == flp2->mark &&
+- !((flp1->fl4_tos ^ flp2->fl4_tos) &
+- (IPTOS_RT_MASK | RTO_ONLINK));
+-}
+-
+-#endif /* _NET_IP_MP_ALG_H */
+diff -Nurb linux-2.6.22-570/include/net/llc_conn.h linux-2.6.22-591/include/net/llc_conn.h
+--- linux-2.6.22-570/include/net/llc_conn.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/llc_conn.h 2007-12-21 15:36:14.000000000 -0500
+@@ -93,7 +93,7 @@
+ return skb->cb[sizeof(skb->cb) - 1];
+ }
+
+-extern struct sock *llc_sk_alloc(int family, gfp_t priority,
++extern struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority,
+ struct proto *prot);
+ extern void llc_sk_free(struct sock *sk);
+
+diff -Nurb linux-2.6.22-570/include/net/mip6.h linux-2.6.22-591/include/net/mip6.h
+--- linux-2.6.22-570/include/net/mip6.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/mip6.h 2007-12-21 15:36:12.000000000 -0500
+@@ -54,8 +54,4 @@
+ #define IP6_MH_TYPE_BERROR 7 /* Binding Error */
+ #define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR
+
+-extern int mip6_init(void);
+-extern void mip6_fini(void);
+-extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb);
+-
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/neighbour.h linux-2.6.22-591/include/net/neighbour.h
+--- linux-2.6.22-570/include/net/neighbour.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/neighbour.h 2007-12-21 15:36:14.000000000 -0500
+@@ -34,6 +34,7 @@
+
+ struct neigh_parms
+ {
++ struct net *net;
+ struct net_device *dev;
+ struct neigh_parms *next;
+ int (*neigh_setup)(struct neighbour *);
+@@ -126,6 +127,7 @@
+ struct pneigh_entry
+ {
+ struct pneigh_entry *next;
++ struct net *net;
+ struct net_device *dev;
+ u8 flags;
+ u8 key[0];
+@@ -187,6 +189,7 @@
+ const void *pkey,
+ struct net_device *dev);
+ extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl,
++ struct net *net,
+ const void *pkey);
+ extern struct neighbour * neigh_create(struct neigh_table *tbl,
+ const void *pkey,
+@@ -205,21 +208,24 @@
+ struct net_device *dev);
+
+ extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
++extern struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net);
+ extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
+ extern void neigh_parms_destroy(struct neigh_parms *parms);
+ extern unsigned long neigh_rand_reach_time(unsigned long base);
+
+ extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+ struct sk_buff *skb);
+-extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat);
+-extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev);
++extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat);
++extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev);
+
+ extern void neigh_app_ns(struct neighbour *n);
+ extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
+ extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *));
+ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
+
+-struct neigh_seq_state {
++struct neigh_seq_state
++{
++ struct net *net;
+ struct neigh_table *tbl;
+ void *(*neigh_sub_iter)(struct neigh_seq_state *state,
+ struct neighbour *n, loff_t *pos);
+diff -Nurb linux-2.6.22-570/include/net/net_namespace.h linux-2.6.22-591/include/net/net_namespace.h
+--- linux-2.6.22-570/include/net/net_namespace.h 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/include/net/net_namespace.h 2007-12-21 15:36:14.000000000 -0500
+@@ -0,0 +1,236 @@
++/*
++ * Operations on the network namespace
++ */
++#ifndef __NET_NET_NAMESPACE_H
++#define __NET_NET_NAMESPACE_H
++
++#include <asm/atomic.h>
++#include <linux/workqueue.h>
++#include <linux/list.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/netdevice.h>
++#include <linux/timer.h>
++
++struct sock;
++struct xt_af_pernet;
++struct ipv4_devconf;
++struct neigh_parms;
++struct inet_peer;
++struct xt_table;
++struct net {
++ atomic_t count; /* To decided when the network namespace
++ * should go
++ */
++ atomic_t use_count; /* For references we destroy on demand */
++ struct list_head list; /* list of network namespace structures */
++ struct work_struct work; /* work struct for freeing */
++
++#ifdef CONFIG_PROC_FS
++ struct proc_dir_entry *proc_net;
++ struct proc_dir_entry *proc_net_stat;
++ struct proc_dir_entry proc_net_root;
++# ifdef CONFIG_NETFILTER
++ struct proc_dir_entry *proc_net_netfilter;
++# endif
++#endif
++#ifdef CONFIG_SYSCTL
++ struct ctl_table_header net_table_header;
++#endif
++ struct net_device loopback_dev; /* The loopback */
++ struct list_head dev_base_head; /* All devices */
++
++ struct hlist_head *dev_name_head;
++ struct hlist_head *dev_index_head;
++
++ struct sock * rtnl; /* rtnetlink socket */
++
++
++ /* core netfilter */
++ struct xt_af_pernet * xtn;
++
++ /* core fib_rules */
++ struct list_head rules_ops;
++ spinlock_t rules_mod_lock;
++
++#ifdef CONFIG_XFRM
++ u32 sysctl_xfrm_aevent_etime;
++ u32 sysctl_xfrm_aevent_rseqth;
++ int sysctl_xfrm_larval_drop;
++ u32 sysctl_xfrm_acq_expires;
++#endif /* CONFIG_XFRM */
++
++ int sysctl_somaxconn;
++
++#ifdef CONFIG_PACKET
++ /* List of all packet sockets. */
++ rwlock_t packet_sklist_lock;
++ struct hlist_head packet_sklist;
++#endif /* CONFIG_PACKET */
++#ifdef CONFIG_UNIX
++ int sysctl_unix_max_dgram_qlen;
++ void * unix_sysctl;
++#endif /* CONFIG_UNIX */
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ void * fib4_table;
++#endif /* CONFIG_IP_MULTIPLE_TABLES */
++#ifdef CONFIG_IP_FIB_HASH
++ int fn_hash_last_dflt;
++#endif
++#ifdef CONFIG_IP_FIB_TRIE
++ int trie_last_dflt;
++#endif
++#ifndef CONFIG_IP_MULTIPLE_TABLES
++ struct fib_table *ip_fib_local_table;
++ struct fib_table *ip_fib_main_table;
++#endif
++ struct hlist_head *ip_fib_table_hash;
++ struct sock *nlfl;
++
++ /* fib_semantics */
++ struct hlist_head *fib_info_hash;
++ struct hlist_head *fib_info_laddrhash;
++ unsigned int fib_info_hash_size;
++ unsigned int fib_info_cnt;
++ struct hlist_head *fib_info_devhash;
++
++ /* af_inet.c */
++ int sysctl_ip_nonlocal_bind; /* __read_mostly */
++ int sysctl_ip_default_ttl; /* __read_mostly */
++ int sysctl_ipfrag_high_thresh;
++ int sysctl_ipfrag_low_thresh;
++ int sysctl_ipfrag_time;
++ int sysctl_ipfrag_secret_interval;
++ int sysctl_ipfrag_max_dist;
++ int sysctl_ipv4_no_pmtu_disc;
++ int sysctl_local_port_range[2];
++ int sysctl_ip_dynaddr;
++ int sysctl_tcp_timestamps; /* __read_mostly */
++ int sysctl_tcp_window_scaling; /* __read_mostly */
++ /* inetpeer.c */
++ int inet_peer_threshold;
++ int inet_peer_minttl;
++ int inet_peer_maxttl;
++ int inet_peer_gc_mintime;
++ int inet_peer_gc_maxtime;
++
++ /* devinet */
++ struct ipv4_devconf *ipv4_devconf;
++ struct ipv4_devconf *ipv4_devconf_dflt;
++
++ /* arp.c */
++ struct neigh_parms *arp_neigh_parms_default;
++
++ /* icmp.c */
++ struct socket **__icmp_socket;
++
++ /* inetpeer.c */
++ struct inet_peer *peer_root;
++ int peer_total;
++ struct inet_peer *inet_peer_unused_head;
++ struct inet_peer **inet_peer_unused_tailp;
++ struct timer_list peer_periodic_timer;
++
++ /* ip_fragment.c */
++ struct hlist_head *ipq_hash;
++ u32 ipfrag_hash_rnd;
++ struct list_head ipq_lru_list;
++ int ip_frag_nqueues;
++ atomic_t ip_frag_mem;
++ struct timer_list ipfrag_secret_timer;
++
++ /* udp.c */
++ int udp_port_rover;
++
++ /* iptable_filter.c */
++ struct xt_table *ip_packet_filter;
++};
++
++extern struct net init_net;
++extern struct list_head net_namespace_list;
++
++extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
++extern void __put_net(struct net *net);
++
++static inline struct net *get_net(struct net *net)
++{
++ atomic_inc(&net->count);
++ return net;
++}
++
++static inline void put_net(struct net *net)
++{
++ if (atomic_dec_and_test(&net->count))
++ __put_net(net);
++}
++
++static inline struct net *hold_net(struct net *net)
++{
++ atomic_inc(&net->use_count);
++ return net;
++}
++
++static inline void release_net(struct net *net)
++{
++ atomic_dec(&net->use_count);
++}
++
++extern void net_lock(void);
++extern void net_unlock(void);
++
++#define for_each_net(VAR) \
++ list_for_each_entry(VAR, &net_namespace_list, list)
++
++
++struct pernet_operations {
++ struct list_head list;
++ int (*init)(struct net *net);
++ void (*exit)(struct net *net);
++};
++
++extern int register_pernet_subsys(struct pernet_operations *);
++extern void unregister_pernet_subsys(struct pernet_operations *);
++extern int register_pernet_device(struct pernet_operations *);
++extern void unregister_pernet_device(struct pernet_operations *);
++
++#ifdef CONFIG_PROC_FS
++static inline struct net *PDE_NET(struct proc_dir_entry *pde)
++{
++ return pde->parent->data;
++}
++
++static inline struct net *PROC_NET(const struct inode *inode)
++{
++ return PDE_NET(PDE(inode));
++}
++
++static inline struct proc_dir_entry *proc_net_create(struct net *net,
++ const char *name, mode_t mode, get_info_t *get_info)
++{
++ return create_proc_info_entry(name,mode, net->proc_net, get_info);
++}
++
++static inline struct proc_dir_entry *proc_net_fops_create(struct net *net,
++ const char *name, mode_t mode, const struct file_operations *fops)
++{
++ struct proc_dir_entry *res =
++ create_proc_entry(name, mode, net->proc_net);
++ if (res)
++ res->proc_fops = fops;
++ return res;
++}
++
++static inline void proc_net_remove(struct net *net, const char *name)
++{
++ remove_proc_entry(name, net->proc_net);
++}
++
++#else
++
++#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
++#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; })
++static inline void proc_net_remove(struct net *net, const char *name) {}
++
++#endif /* CONFIG_PROC_FS */
++
++#endif /* __NET_NET_NAMESPACE_H */
+diff -Nurb linux-2.6.22-570/include/net/netlink.h linux-2.6.22-591/include/net/netlink.h
+--- linux-2.6.22-570/include/net/netlink.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/netlink.h 2007-12-21 15:36:14.000000000 -0500
+@@ -118,6 +118,9 @@
+ * Nested Attributes Construction:
+ * nla_nest_start(skb, type) start a nested attribute
+ * nla_nest_end(skb, nla) finalize a nested attribute
++ * nla_nest_compat_start(skb, type, start a nested compat attribute
++ * len, data)
++ * nla_nest_compat_end(skb, type) finalize a nested compat attribute
+ * nla_nest_cancel(skb, nla) cancel nested attribute construction
+ *
+ * Attribute Length Calculations:
+@@ -152,6 +155,7 @@
+ * nla_find_nested() find attribute in nested attributes
+ * nla_parse() parse and validate stream of attrs
+ * nla_parse_nested() parse nested attribuets
++ * nla_parse_nested_compat() parse nested compat attributes
+ * nla_for_each_attr() loop over all attributes
+ * nla_for_each_nested() loop over the nested attributes
+ *=========================================================================
+@@ -170,6 +174,7 @@
+ NLA_FLAG,
+ NLA_MSECS,
+ NLA_NESTED,
++ NLA_NESTED_COMPAT,
+ NLA_NUL_STRING,
+ NLA_BINARY,
+ __NLA_TYPE_MAX,
+@@ -190,6 +195,7 @@
+ * NLA_NUL_STRING Maximum length of string (excluding NUL)
+ * NLA_FLAG Unused
+ * NLA_BINARY Maximum length of attribute payload
++ * NLA_NESTED_COMPAT Exact length of structure payload
+ * All other Exact length of attribute payload
+ *
+ * Example:
+@@ -212,6 +218,7 @@
+ struct nl_info {
+ struct nlmsghdr *nlh;
+ u32 pid;
++ struct net *net;
+ };
+
+ extern void netlink_run_queue(struct sock *sk, unsigned int *qlen,
+@@ -733,6 +740,39 @@
+ {
+ return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
+ }
++
++/**
++ * nla_parse_nested_compat - parse nested compat attributes
++ * @tb: destination array with maxtype+1 elements
++ * @maxtype: maximum attribute type to be expected
++ * @nla: attribute containing the nested attributes
++ * @data: pointer to point to contained structure
++ * @len: length of contained structure
++ * @policy: validation policy
++ *
++ * Parse a nested compat attribute. The compat attribute contains a structure
++ * and optionally a set of nested attributes. On success the data pointer
++ * points to the nested data and tb contains the parsed attributes
++ * (see nla_parse).
++ */
++static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
++ struct nlattr *nla,
++ const struct nla_policy *policy,
++ int len)
++{
++ if (nla_len(nla) < len)
++ return -1;
++ if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
++ return nla_parse_nested(tb, maxtype,
++ nla_data(nla) + NLA_ALIGN(len),
++ policy);
++ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
++ return 0;
++}
++
++#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \
++({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \
++ __nla_parse_nested_compat(tb, maxtype, nla, policy, len); })
+ /**
+ * nla_put_u8 - Add a u16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+@@ -965,6 +1005,51 @@
+ }
+
+ /**
++ * nla_nest_compat_start - Start a new level of nested compat attributes
++ * @skb: socket buffer to add attributes to
++ * @attrtype: attribute type of container
++ * @attrlen: length of structure
++ * @data: pointer to structure
++ *
++ * Start a nested compat attribute that contains both a structure and
++ * a set of nested attributes.
++ *
++ * Returns the container attribute
++ */
++static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb,
++ int attrtype, int attrlen,
++ const void *data)
++{
++ struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
++
++ if (nla_put(skb, attrtype, attrlen, data) < 0)
++ return NULL;
++ if (nla_nest_start(skb, attrtype) == NULL) {
++ nlmsg_trim(skb, start);
++ return NULL;
++ }
++ return start;
++}
++
++/**
++ * nla_nest_compat_end - Finalize nesting of compat attributes
++ * @skb: socket buffer the attribtues are stored in
++ * @start: container attribute
++ *
++ * Corrects the container attribute header to include the all
++ * appeneded attributes.
++ *
++ * Returns the total data length of the skb.
++ */
++static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
++{
++ struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len);
++
++ start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
++ return nla_nest_end(skb, nest);
++}
++
++/**
+ * nla_nest_cancel - Cancel nesting of attributes
+ * @skb: socket buffer the message is stored in
+ * @start: container attribute
+diff -Nurb linux-2.6.22-570/include/net/pkt_cls.h linux-2.6.22-591/include/net/pkt_cls.h
+--- linux-2.6.22-570/include/net/pkt_cls.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/pkt_cls.h 2007-12-21 15:36:14.000000000 -0500
+@@ -2,6 +2,7 @@
+ #define __NET_PKT_CLS_H
+
+ #include <linux/pkt_cls.h>
++#include <net/net_namespace.h>
+ #include <net/sch_generic.h>
+ #include <net/act_api.h>
+
+@@ -357,7 +358,7 @@
+ if (indev[0]) {
+ if (!skb->iif)
+ return 0;
+- dev = __dev_get_by_index(skb->iif);
++ dev = __dev_get_by_index(&init_net, skb->iif);
+ if (!dev || strcmp(indev, dev->name))
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/include/net/protocol.h linux-2.6.22-591/include/net/protocol.h
+--- linux-2.6.22-570/include/net/protocol.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/protocol.h 2007-12-21 15:36:14.000000000 -0500
+@@ -86,6 +86,7 @@
+ #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */
+ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */
+ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */
++#define INET_PROTOSW_NETNS 0x08 /* Multiple namespaces support? */
+
+ extern struct net_protocol *inet_protocol_base;
+ extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
+diff -Nurb linux-2.6.22-570/include/net/raw.h linux-2.6.22-591/include/net/raw.h
+--- linux-2.6.22-570/include/net/raw.h 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/include/net/raw.h 2007-12-21 15:36:14.000000000 -0500
+@@ -34,7 +34,7 @@
+ extern rwlock_t raw_v4_lock;
+
+
+-extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
++extern struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr,
+ int dif, int tag);
+
+diff -Nurb linux-2.6.22-570/include/net/rawv6.h linux-2.6.22-591/include/net/rawv6.h
+--- linux-2.6.22-570/include/net/rawv6.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/rawv6.h 2007-12-21 15:36:12.000000000 -0500
+@@ -3,6 +3,8 @@
+
+ #ifdef __KERNEL__
+
++#include <net/protocol.h>
++
+ #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS
+ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
+ extern rwlock_t raw_v6_lock;
+@@ -23,6 +25,13 @@
+ int type, int code,
+ int offset, __be32 info);
+
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
++int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
++ struct sk_buff *skb));
++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
++ struct sk_buff *skb));
++#endif
++
+ #endif
+
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/route.h linux-2.6.22-591/include/net/route.h
+--- linux-2.6.22-570/include/net/route.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/net/route.h 2007-12-21 15:36:14.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <net/dst.h>
+ #include <net/inetpeer.h>
+ #include <net/flow.h>
++#include <net/sock.h>
+ #include <net/inet_sock.h>
+ #include <linux/in_route.h>
+ #include <linux/rtnetlink.h>
+@@ -66,7 +67,6 @@
+
+ unsigned rt_flags;
+ __u16 rt_type;
+- __u16 rt_multipath_alg;
+
+ __be32 rt_dst; /* Path destination */
+ __be32 rt_src; /* Path source */
+@@ -123,9 +123,9 @@
+ extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
+ extern void ip_rt_send_redirect(struct sk_buff *skb);
+
+-extern unsigned inet_addr_type(__be32 addr);
++extern unsigned inet_addr_type(struct net *net, __be32 addr);
+ extern void ip_rt_multicast_event(struct in_device *);
+-extern int ip_rt_ioctl(unsigned int cmd, void __user *arg);
++extern int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+ extern void ip_rt_get_source(u8 *src, struct rtable *rt);
+ extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+@@ -154,7 +154,8 @@
+ __be16 sport, __be16 dport, struct sock *sk,
+ int flags)
+ {
+- struct flowi fl = { .oif = oif,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = oif,
+ .nl_u = { .ip4_u = { .daddr = dst,
+ .saddr = src,
+ .tos = tos } },
+@@ -199,6 +200,7 @@
+ struct flowi fl;
+
+ memcpy(&fl, &(*rp)->fl, sizeof(fl));
++ fl.fl_net = sk->sk_net;
+ fl.fl_ip_sport = sport;
+ fl.fl_ip_dport = dport;
+ fl.proto = protocol;
+diff -Nurb linux-2.6.22-570/include/net/rtnetlink.h linux-2.6.22-591/include/net/rtnetlink.h
+--- linux-2.6.22-570/include/net/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/rtnetlink.h 2007-12-21 15:36:12.000000000 -0500
+@@ -22,4 +22,62 @@
+ return AF_UNSPEC;
+ }
+
++/**
++ * struct rtnl_link_ops - rtnetlink link operations
++ *
++ * @list: Used internally
++ * @kind: Identifier
++ * @maxtype: Highest device specific netlink attribute number
++ * @policy: Netlink policy for device specific attribute validation
++ * @validate: Optional validation function for netlink/changelink parameters
++ * @priv_size: sizeof net_device private space
++ * @setup: net_device setup function
++ * @newlink: Function for configuring and registering a new device
++ * @changelink: Function for changing parameters of an existing device
++ * @dellink: Function to remove a device
++ * @get_size: Function to calculate required room for dumping device
++ * specific netlink attributes
++ * @fill_info: Function to dump device specific netlink attributes
++ * @get_xstats_size: Function to calculate required room for dumping devic
++ * specific statistics
++ * @fill_xstats: Function to dump device specific statistics
++ */
++struct rtnl_link_ops {
++ struct list_head list;
++
++ const char *kind;
++
++ size_t priv_size;
++ void (*setup)(struct net_device *dev);
++
++ int maxtype;
++ const struct nla_policy *policy;
++ int (*validate)(struct nlattr *tb[],
++ struct nlattr *data[]);
++
++ int (*newlink)(struct net_device *dev,
++ struct nlattr *tb[],
++ struct nlattr *data[]);
++ int (*changelink)(struct net_device *dev,
++ struct nlattr *tb[],
++ struct nlattr *data[]);
++ void (*dellink)(struct net_device *dev);
++
++ size_t (*get_size)(const struct net_device *dev);
++ int (*fill_info)(struct sk_buff *skb,
++ const struct net_device *dev);
++
++ size_t (*get_xstats_size)(const struct net_device *dev);
++ int (*fill_xstats)(struct sk_buff *skb,
++ const struct net_device *dev);
++};
++
++extern int __rtnl_link_register(struct rtnl_link_ops *ops);
++extern void __rtnl_link_unregister(struct rtnl_link_ops *ops);
++
++extern int rtnl_link_register(struct rtnl_link_ops *ops);
++extern void rtnl_link_unregister(struct rtnl_link_ops *ops);
++
++#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
++
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/sock.h linux-2.6.22-591/include/net/sock.h
+--- linux-2.6.22-570/include/net/sock.h 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/include/net/sock.h 2007-12-21 15:36:14.000000000 -0500
+@@ -55,6 +55,7 @@
+ #include <asm/atomic.h>
+ #include <net/dst.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+
+ /*
+ * This structure really needs to be cleaned up.
+@@ -105,6 +106,7 @@
+ * @skc_refcnt: reference count
+ * @skc_hash: hash value used with various protocol lookup tables
+ * @skc_prot: protocol handlers inside a network family
++ * @skc_net: reference to the network namespace of this socket
+ *
+ * This is the minimal network layer representation of sockets, the header
+ * for struct sock and struct inet_timewait_sock.
+@@ -119,6 +121,7 @@
+ atomic_t skc_refcnt;
+ unsigned int skc_hash;
+ struct proto *skc_prot;
++ struct net *skc_net;
+ xid_t skc_xid;
+ struct vx_info *skc_vx_info;
+ nid_t skc_nid;
+@@ -199,6 +202,7 @@
+ #define sk_refcnt __sk_common.skc_refcnt
+ #define sk_hash __sk_common.skc_hash
+ #define sk_prot __sk_common.skc_prot
++#define sk_net __sk_common.skc_net
+ #define sk_xid __sk_common.skc_xid
+ #define sk_vx_info __sk_common.skc_vx_info
+ #define sk_nid __sk_common.skc_nid
+@@ -781,7 +785,7 @@
+ SINGLE_DEPTH_NESTING)
+ #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock))
+
+-extern struct sock *sk_alloc(int family,
++extern struct sock *sk_alloc(struct net *net, int family,
+ gfp_t priority,
+ struct proto *prot, int zero_it);
+ extern void sk_free(struct sock *sk);
+@@ -1010,6 +1014,7 @@
+ #endif
+
+ memcpy(nsk, osk, osk->sk_prot->obj_size);
++ get_net(nsk->sk_net);
+ #ifdef CONFIG_SECURITY_NETWORK
+ nsk->sk_security = sptr;
+ security_sk_clone(osk, nsk);
+@@ -1373,6 +1378,7 @@
+
+ #ifdef CONFIG_SYSCTL
+ extern struct ctl_table core_table[];
++extern struct ctl_table multi_core_table[];
+ #endif
+
+ extern int sysctl_optmem_max;
+diff -Nurb linux-2.6.22-570/include/net/tcp.h linux-2.6.22-591/include/net/tcp.h
+--- linux-2.6.22-570/include/net/tcp.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/net/tcp.h 2007-12-21 15:36:14.000000000 -0500
+@@ -191,8 +191,6 @@
+ extern struct inet_timewait_death_row tcp_death_row;
+
+ /* sysctl variables for tcp */
+-extern int sysctl_tcp_timestamps;
+-extern int sysctl_tcp_window_scaling;
+ extern int sysctl_tcp_sack;
+ extern int sysctl_tcp_fin_timeout;
+ extern int sysctl_tcp_keepalive_time;
+@@ -1293,6 +1291,7 @@
+ };
+
+ struct tcp_iter_state {
++ struct net *net;
+ sa_family_t family;
+ enum tcp_seq_states state;
+ struct sock *syn_wait_sk;
+@@ -1300,8 +1299,8 @@
+ struct seq_operations seq_ops;
+ };
+
+-extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
+-extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
++extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
++extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
+
+ extern struct request_sock_ops tcp_request_sock_ops;
+
+diff -Nurb linux-2.6.22-570/include/net/tipc/tipc_port.h linux-2.6.22-591/include/net/tipc/tipc_port.h
+--- linux-2.6.22-570/include/net/tipc/tipc_port.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/tipc/tipc_port.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
+ *
+- * Copyright (c) 1994-2006, Ericsson AB
+- * Copyright (c) 2005, Wind River Systems
++ * Copyright (c) 1994-2007, Ericsson AB
++ * Copyright (c) 2005-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -55,6 +55,7 @@
+ * @conn_unacked: number of unacknowledged messages received from peer port
+ * @published: non-zero if port has one or more associated names
+ * @congested: non-zero if cannot send because of link or port congestion
++ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @ref: unique reference to port in TIPC object registry
+ * @phdr: preformatted message header used when sending messages
+ */
+@@ -68,6 +69,7 @@
+ u32 conn_unacked;
+ int published;
+ u32 congested;
++ u32 max_pkt;
+ u32 ref;
+ struct tipc_msg phdr;
+ };
+diff -Nurb linux-2.6.22-570/include/net/udp.h linux-2.6.22-591/include/net/udp.h
+--- linux-2.6.22-570/include/net/udp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/udp.h 2007-12-21 15:36:14.000000000 -0500
+@@ -160,6 +160,7 @@
+ };
+
+ struct udp_iter_state {
++ struct net *net;
+ sa_family_t family;
+ struct hlist_head *hashtable;
+ int bucket;
+@@ -167,8 +168,8 @@
+ };
+
+ #ifdef CONFIG_PROC_FS
+-extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
+-extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
++extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
++extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
+
+ extern int udp4_proc_init(void);
+ extern void udp4_proc_exit(void);
+diff -Nurb linux-2.6.22-570/include/net/wext.h linux-2.6.22-591/include/net/wext.h
+--- linux-2.6.22-570/include/net/wext.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/net/wext.h 2007-12-21 15:36:14.000000000 -0500
+@@ -5,16 +5,23 @@
+ * wireless extensions interface to the core code
+ */
+
++struct net;
++
+ #ifdef CONFIG_WIRELESS_EXT
+-extern int wext_proc_init(void);
+-extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++extern int wext_proc_init(struct net *net);
++extern void wext_proc_exit(struct net *net);
++extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ void __user *arg);
+ #else
+-static inline int wext_proc_init(void)
++static inline int wext_proc_init(struct net *net)
+ {
+ return 0;
+ }
+-static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++static inline void wext_proc_exit(struct net *net)
++{
++ return;
++}
++static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ void __user *arg)
+ {
+ return -EINVAL;
+diff -Nurb linux-2.6.22-570/include/net/xfrm.h linux-2.6.22-591/include/net/xfrm.h
+--- linux-2.6.22-570/include/net/xfrm.h 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/include/net/xfrm.h 2007-12-21 15:36:14.000000000 -0500
+@@ -19,13 +19,21 @@
+ #include <net/ipv6.h>
+ #include <net/ip6_fib.h>
+
++#define XFRM_PROTO_ESP 50
++#define XFRM_PROTO_AH 51
++#define XFRM_PROTO_COMP 108
++#define XFRM_PROTO_IPIP 4
++#define XFRM_PROTO_IPV6 41
++#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
++#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
++
+ #define XFRM_ALIGN8(len) (((len) + 7) & ~7)
+ #define MODULE_ALIAS_XFRM_MODE(family, encap) \
+ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
++#define MODULE_ALIAS_XFRM_TYPE(family, proto) \
++ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
+
+ extern struct sock *xfrm_nl;
+-extern u32 sysctl_xfrm_aevent_etime;
+-extern u32 sysctl_xfrm_aevent_rseqth;
+
+ extern struct mutex xfrm_cfg_mutex;
+
+@@ -509,11 +517,9 @@
+ case IPPROTO_ICMPV6:
+ port = htons(fl->fl_icmp_type);
+ break;
+-#ifdef CONFIG_IPV6_MIP6
+ case IPPROTO_MH:
+ port = htons(fl->fl_mh_type);
+ break;
+-#endif
+ default:
+ port = 0; /*XXX*/
+ }
+diff -Nurb linux-2.6.22-570/include/scsi/iscsi_if.h linux-2.6.22-591/include/scsi/iscsi_if.h
+--- linux-2.6.22-570/include/scsi/iscsi_if.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/iscsi_if.h 2007-12-21 15:36:12.000000000 -0500
+@@ -48,6 +48,7 @@
+ ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT = UEVENT_BASE + 14,
+
+ ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15,
++ ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16,
+
+ /* up events */
+ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1,
+@@ -71,6 +72,8 @@
+ /* messages u -> k */
+ struct msg_create_session {
+ uint32_t initial_cmdsn;
++ uint16_t cmds_max;
++ uint16_t queue_depth;
+ } c_session;
+ struct msg_destroy_session {
+ uint32_t sid;
+@@ -136,6 +139,11 @@
+ */
+ uint32_t enable;
+ } tgt_dscvr;
++ struct msg_set_host_param {
++ uint32_t host_no;
++ uint32_t param; /* enum iscsi_host_param */
++ uint32_t len;
++ } set_host_param;
+ } u;
+ union {
+ /* messages k -> u */
+@@ -223,6 +231,11 @@
+ ISCSI_PARAM_CONN_PORT,
+ ISCSI_PARAM_CONN_ADDRESS,
+
++ ISCSI_PARAM_USERNAME,
++ ISCSI_PARAM_USERNAME_IN,
++ ISCSI_PARAM_PASSWORD,
++ ISCSI_PARAM_PASSWORD_IN,
++
+ /* must always be last */
+ ISCSI_PARAM_MAX,
+ };
+@@ -249,6 +262,24 @@
+ #define ISCSI_SESS_RECOVERY_TMO (1 << ISCSI_PARAM_SESS_RECOVERY_TMO)
+ #define ISCSI_CONN_PORT (1 << ISCSI_PARAM_CONN_PORT)
+ #define ISCSI_CONN_ADDRESS (1 << ISCSI_PARAM_CONN_ADDRESS)
++#define ISCSI_USERNAME (1 << ISCSI_PARAM_USERNAME)
++#define ISCSI_USERNAME_IN (1 << ISCSI_PARAM_USERNAME_IN)
++#define ISCSI_PASSWORD (1 << ISCSI_PARAM_PASSWORD)
++#define ISCSI_PASSWORD_IN (1 << ISCSI_PARAM_PASSWORD_IN)
++
++/* iSCSI HBA params */
++enum iscsi_host_param {
++ ISCSI_HOST_PARAM_HWADDRESS,
++ ISCSI_HOST_PARAM_INITIATOR_NAME,
++ ISCSI_HOST_PARAM_NETDEV_NAME,
++ ISCSI_HOST_PARAM_IPADDRESS,
++ ISCSI_HOST_PARAM_MAX,
++};
++
++#define ISCSI_HOST_HWADDRESS (1 << ISCSI_HOST_PARAM_HWADDRESS)
++#define ISCSI_HOST_INITIATOR_NAME (1 << ISCSI_HOST_PARAM_INITIATOR_NAME)
++#define ISCSI_HOST_NETDEV_NAME (1 << ISCSI_HOST_PARAM_NETDEV_NAME)
++#define ISCSI_HOST_IPADDRESS (1 << ISCSI_HOST_PARAM_IPADDRESS)
+
+ #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle)
+ #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr)
+@@ -272,6 +303,9 @@
+ #define CAP_MULTI_CONN 0x40
+ #define CAP_TEXT_NEGO 0x80
+ #define CAP_MARKERS 0x100
++#define CAP_FW_DB 0x200
++#define CAP_SENDTARGETS_OFFLOAD 0x400
++#define CAP_DATA_PATH_OFFLOAD 0x800
+
+ /*
+ * These flags describes reason of stop_conn() call
+diff -Nurb linux-2.6.22-570/include/scsi/libiscsi.h linux-2.6.22-591/include/scsi/libiscsi.h
+--- linux-2.6.22-570/include/scsi/libiscsi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/libiscsi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -48,9 +48,8 @@
+ #define debug_scsi(fmt...)
+ #endif
+
+-#define ISCSI_XMIT_CMDS_MAX 128 /* must be power of 2 */
+-#define ISCSI_MGMT_CMDS_MAX 32 /* must be power of 2 */
+-#define ISCSI_CONN_MAX 1
++#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* must be power of 2 */
++#define ISCSI_MGMT_CMDS_MAX 16 /* must be power of 2 */
+
+ #define ISCSI_MGMT_ITT_OFFSET 0xa00
+
+@@ -73,6 +72,8 @@
+ #define ISCSI_AGE_SHIFT 28
+ #define ISCSI_AGE_MASK (0xf << ISCSI_AGE_SHIFT)
+
++#define ISCSI_ADDRESS_BUF_LEN 64
++
+ struct iscsi_mgmt_task {
+ /*
+ * Becuae LLDs allocate their hdr differently, this is a pointer to
+@@ -80,7 +81,7 @@
+ */
+ struct iscsi_hdr *hdr;
+ char *data; /* mgmt payload */
+- int data_count; /* counts data to be sent */
++ unsigned data_count; /* counts data to be sent */
+ uint32_t itt; /* this ITT */
+ void *dd_data; /* driver/transport data */
+ struct list_head running;
+@@ -90,6 +91,7 @@
+ ISCSI_TASK_COMPLETED,
+ ISCSI_TASK_PENDING,
+ ISCSI_TASK_RUNNING,
++ ISCSI_TASK_ABORTING,
+ };
+
+ struct iscsi_cmd_task {
+@@ -99,16 +101,14 @@
+ */
+ struct iscsi_cmd *hdr;
+ int itt; /* this ITT */
+- int datasn; /* DataSN */
+
+ uint32_t unsol_datasn;
+- int imm_count; /* imm-data (bytes) */
+- int unsol_count; /* unsolicited (bytes)*/
++ unsigned imm_count; /* imm-data (bytes) */
++ unsigned unsol_count; /* unsolicited (bytes)*/
+ /* offset in unsolicited stream (bytes); */
+- int unsol_offset;
+- int data_count; /* remaining Data-Out */
++ unsigned unsol_offset;
++ unsigned data_count; /* remaining Data-Out */
+ struct scsi_cmnd *sc; /* associated SCSI cmd*/
+- int total_length;
+ struct iscsi_conn *conn; /* used connection */
+ struct iscsi_mgmt_task *mtask; /* tmf mtask in progr */
+
+@@ -152,18 +152,11 @@
+ struct iscsi_cmd_task *ctask; /* xmit ctask in progress */
+
+ /* xmit */
+- struct kfifo *immqueue; /* immediate xmit queue */
+ struct kfifo *mgmtqueue; /* mgmt (control) xmit queue */
+ struct list_head mgmt_run_list; /* list of control tasks */
+ struct list_head xmitqueue; /* data-path cmd queue */
+ struct list_head run_list; /* list of cmds in progress */
+ struct work_struct xmitwork; /* per-conn. xmit workqueue */
+- /*
+- * serializes connection xmit, access to kfifos:
+- * xmitqueue, immqueue, mgmtqueue
+- */
+- struct mutex xmitmutex;
+-
+ unsigned long suspend_tx; /* suspend Tx */
+ unsigned long suspend_rx; /* suspend Rx */
+
+@@ -174,8 +167,8 @@
+ int tmabort_state; /* see TMABORT_INITIAL, etc.*/
+
+ /* negotiated params */
+- int max_recv_dlength; /* initiator_max_recv_dsl*/
+- int max_xmit_dlength; /* target_max_recv_dsl */
++ unsigned max_recv_dlength; /* initiator_max_recv_dsl*/
++ unsigned max_xmit_dlength; /* target_max_recv_dsl */
+ int hdrdgst_en;
+ int datadgst_en;
+ int ifmarker_en;
+@@ -183,6 +176,12 @@
+ /* values userspace uses to id a conn */
+ int persistent_port;
+ char *persistent_address;
++ /* remote portal currently connected to */
++ int portal_port;
++ char portal_address[ISCSI_ADDRESS_BUF_LEN];
++ /* local address */
++ int local_port;
++ char local_address[ISCSI_ADDRESS_BUF_LEN];
+
+ /* MIB-statistics */
+ uint64_t txdata_octets;
+@@ -213,18 +212,25 @@
+
+ /* configuration */
+ int initial_r2t_en;
+- int max_r2t;
++ unsigned max_r2t;
+ int imm_data_en;
+- int first_burst;
+- int max_burst;
++ unsigned first_burst;
++ unsigned max_burst;
+ int time2wait;
+ int time2retain;
+ int pdu_inorder_en;
+ int dataseq_inorder_en;
+ int erl;
+ int tpgt;
++ char *username;
++ char *username_in;
++ char *password;
++ char *password_in;
+ char *targetname;
+-
++ char *initiatorname;
++ /* hw address or netdev iscsi connection is bound to */
++ char *hwaddress;
++ char *netdev;
+ /* control data */
+ struct iscsi_transport *tt;
+ struct Scsi_Host *host;
+@@ -255,12 +261,22 @@
+ extern int iscsi_queuecommand(struct scsi_cmnd *sc,
+ void (*done)(struct scsi_cmnd *));
+
++
++/*
++ * iSCSI host helpers.
++ */
++extern int iscsi_host_set_param(struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf,
++ int buflen);
++extern int iscsi_host_get_param(struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf);
++
+ /*
+ * session management
+ */
+ extern struct iscsi_cls_session *
+ iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *,
+- int, int, uint32_t, uint32_t *);
++ uint16_t, uint16_t, int, int, uint32_t, uint32_t *);
+ extern void iscsi_session_teardown(struct iscsi_cls_session *);
+ extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
+ extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
+@@ -289,8 +305,7 @@
+ /*
+ * pdu and task processing
+ */
+-extern int iscsi_check_assign_cmdsn(struct iscsi_session *,
+- struct iscsi_nopin *);
++extern void iscsi_update_cmdsn(struct iscsi_session *, struct iscsi_nopin *);
+ extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *,
+ struct iscsi_data *hdr);
+ extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_cmnd.h linux-2.6.22-591/include/scsi/scsi_cmnd.h
+--- linux-2.6.22-570/include/scsi/scsi_cmnd.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/scsi_cmnd.h 2007-12-21 15:36:12.000000000 -0500
+@@ -135,4 +135,24 @@
+ extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t);
+ extern void scsi_free_sgtable(struct scatterlist *, int);
+
++extern int scsi_dma_map(struct scsi_cmnd *cmd);
++extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
++
++#define scsi_sg_count(cmd) ((cmd)->use_sg)
++#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer)
++#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
++
++static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid)
++{
++ cmd->resid = resid;
++}
++
++static inline int scsi_get_resid(struct scsi_cmnd *cmd)
++{
++ return cmd->resid;
++}
++
++#define scsi_for_each_sg(cmd, sg, nseg, __i) \
++ for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++)
++
+ #endif /* _SCSI_SCSI_CMND_H */
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_device.h linux-2.6.22-591/include/scsi/scsi_device.h
+--- linux-2.6.22-570/include/scsi/scsi_device.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/scsi_device.h 2007-12-21 15:36:12.000000000 -0500
+@@ -287,6 +287,7 @@
+ extern void scsi_target_unblock(struct device *);
+ extern void scsi_remove_target(struct device *);
+ extern void int_to_scsilun(unsigned int, struct scsi_lun *);
++extern int scsilun_to_int(struct scsi_lun *);
+ extern const char *scsi_device_state_name(enum scsi_device_state);
+ extern int scsi_is_sdev_device(const struct device *);
+ extern int scsi_is_target_device(const struct device *);
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_host.h linux-2.6.22-591/include/scsi/scsi_host.h
+--- linux-2.6.22-570/include/scsi/scsi_host.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/scsi_host.h 2007-12-21 15:36:12.000000000 -0500
+@@ -339,12 +339,6 @@
+ enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+
+ /*
+- * suspend support
+- */
+- int (*resume)(struct scsi_device *);
+- int (*suspend)(struct scsi_device *, pm_message_t state);
+-
+- /*
+ * Name of proc directory
+ */
+ char *proc_name;
+@@ -677,6 +671,10 @@
+ #define shost_printk(prefix, shost, fmt, a...) \
+ dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a)
+
++static inline void *shost_priv(struct Scsi_Host *shost)
++{
++ return (void *)shost->hostdata;
++}
+
+ int scsi_is_host_device(const struct device *);
+
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_fc.h linux-2.6.22-591/include/scsi/scsi_transport_fc.h
+--- linux-2.6.22-570/include/scsi/scsi_transport_fc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/scsi_transport_fc.h 2007-12-21 15:36:12.000000000 -0500
+@@ -19,7 +19,7 @@
+ *
+ * ========
+ *
+- * Copyright (C) 2004-2005 James Smart, Emulex Corporation
++ * Copyright (C) 2004-2007 James Smart, Emulex Corporation
+ * Rewrite for host, target, device, and remote port attributes,
+ * statistics, and service functions...
+ *
+@@ -62,8 +62,10 @@
+ FC_PORTTYPE_NLPORT, /* (Public) Loop w/ FLPort */
+ FC_PORTTYPE_LPORT, /* (Private) Loop w/o FLPort */
+ FC_PORTTYPE_PTP, /* Point to Point w/ another NPort */
++ FC_PORTTYPE_NPIV, /* VPORT based on NPIV */
+ };
+
++
+ /*
+ * fc_port_state: If you alter this, you also need to alter scsi_transport_fc.c
+ * (for the ascii descriptions).
+@@ -84,6 +86,25 @@
+
+
+ /*
++ * fc_vport_state: If you alter this, you also need to alter
++ * scsi_transport_fc.c (for the ascii descriptions).
++ */
++enum fc_vport_state {
++ FC_VPORT_UNKNOWN,
++ FC_VPORT_ACTIVE,
++ FC_VPORT_DISABLED,
++ FC_VPORT_LINKDOWN,
++ FC_VPORT_INITIALIZING,
++ FC_VPORT_NO_FABRIC_SUPP,
++ FC_VPORT_NO_FABRIC_RSCS,
++ FC_VPORT_FABRIC_LOGOUT,
++ FC_VPORT_FABRIC_REJ_WWN,
++ FC_VPORT_FAILED,
++};
++
++
++
++/*
+ * FC Classes of Service
+ * Note: values are not enumerated, as they can be "or'd" together
+ * for reporting (e.g. report supported_classes). If you alter this list,
+@@ -124,18 +145,116 @@
+ };
+
+ /*
+- * FC Remote Port Roles
++ * FC Port Roles
+ * Note: values are not enumerated, as they can be "or'd" together
+ * for reporting (e.g. report roles). If you alter this list,
+ * you also need to alter scsi_transport_fc.c (for the ascii descriptions).
+ */
+-#define FC_RPORT_ROLE_UNKNOWN 0x00
+-#define FC_RPORT_ROLE_FCP_TARGET 0x01
+-#define FC_RPORT_ROLE_FCP_INITIATOR 0x02
+-#define FC_RPORT_ROLE_IP_PORT 0x04
++#define FC_PORT_ROLE_UNKNOWN 0x00
++#define FC_PORT_ROLE_FCP_TARGET 0x01
++#define FC_PORT_ROLE_FCP_INITIATOR 0x02
++#define FC_PORT_ROLE_IP_PORT 0x04
++
++/* The following are for compatibility */
++#define FC_RPORT_ROLE_UNKNOWN FC_PORT_ROLE_UNKNOWN
++#define FC_RPORT_ROLE_FCP_TARGET FC_PORT_ROLE_FCP_TARGET
++#define FC_RPORT_ROLE_FCP_INITIATOR FC_PORT_ROLE_FCP_INITIATOR
++#define FC_RPORT_ROLE_IP_PORT FC_PORT_ROLE_IP_PORT
++
++
++/* Macro for use in defining Virtual Port attributes */
++#define FC_VPORT_ATTR(_name,_mode,_show,_store) \
++struct class_device_attribute class_device_attr_vport_##_name = \
++ __ATTR(_name,_mode,_show,_store)
+
+
+ /*
++ * FC Virtual Port Attributes
++ *
++ * This structure exists for each FC port is a virtual FC port. Virtual
++ * ports share the physical link with the Physical port. Each virtual
++ * ports has a unique presense on the SAN, and may be instantiated via
++ * NPIV, Virtual Fabrics, or via additional ALPAs. As the vport is a
++ * unique presense, each vport has it's own view of the fabric,
++ * authentication priviledge, and priorities.
++ *
++ * A virtual port may support 1 or more FC4 roles. Typically it is a
++ * FCP Initiator. It could be a FCP Target, or exist sole for an IP over FC
++ * roles. FC port attributes for the vport will be reported on any
++ * fc_host class object allocated for an FCP Initiator.
++ *
++ * --
++ *
++ * Fixed attributes are not expected to change. The driver is
++ * expected to set these values after receiving the fc_vport structure
++ * via the vport_create() call from the transport.
++ * The transport fully manages all get functions w/o driver interaction.
++ *
++ * Dynamic attributes are expected to change. The driver participates
++ * in all get/set operations via functions provided by the driver.
++ *
++ * Private attributes are transport-managed values. They are fully
++ * managed by the transport w/o driver interaction.
++ */
++
++#define FC_VPORT_SYMBOLIC_NAMELEN 64
++struct fc_vport {
++ /* Fixed Attributes */
++
++ /* Dynamic Attributes */
++
++ /* Private (Transport-managed) Attributes */
++ enum fc_vport_state vport_state;
++ enum fc_vport_state vport_last_state;
++ u64 node_name;
++ u64 port_name;
++ u32 roles;
++ u32 vport_id; /* Admin Identifier for the vport */
++ enum fc_port_type vport_type;
++ char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
++
++ /* exported data */
++ void *dd_data; /* Used for driver-specific storage */
++
++ /* internal data */
++ struct Scsi_Host *shost; /* Physical Port Parent */
++ unsigned int channel;
++ u32 number;
++ u8 flags;
++ struct list_head peers;
++ struct device dev;
++ struct work_struct vport_delete_work;
++} __attribute__((aligned(sizeof(unsigned long))));
++
++/* bit field values for struct fc_vport "flags" field: */
++#define FC_VPORT_CREATING 0x01
++#define FC_VPORT_DELETING 0x02
++#define FC_VPORT_DELETED 0x04
++#define FC_VPORT_DEL 0x06 /* Any DELETE state */
++
++#define dev_to_vport(d) \
++ container_of(d, struct fc_vport, dev)
++#define transport_class_to_vport(classdev) \
++ dev_to_vport(classdev->dev)
++#define vport_to_shost(v) \
++ (v->shost)
++#define vport_to_shost_channel(v) \
++ (v->channel)
++#define vport_to_parent(v) \
++ (v->dev.parent)
++
++
++/* Error return codes for vport_create() callback */
++#define VPCERR_UNSUPPORTED -ENOSYS /* no driver/adapter
++ support */
++#define VPCERR_BAD_WWN -ENOTUNIQ /* driver validation
++ of WWNs failed */
++#define VPCERR_NO_FABRIC_SUPP -EOPNOTSUPP /* Fabric connection
++ is loop or the
++ Fabric Port does
++ not support NPIV */
++
++/*
+ * fc_rport_identifiers: This set of data contains all elements
+ * to uniquely identify a remote FC port. The driver uses this data
+ * to report the existence of a remote FC port in the topology. Internally,
+@@ -149,6 +268,7 @@
+ u32 roles;
+ };
+
++
+ /* Macro for use in defining Remote Port attributes */
+ #define FC_RPORT_ATTR(_name,_mode,_show,_store) \
+ struct class_device_attribute class_device_attr_rport_##_name = \
+@@ -343,6 +463,7 @@
+ u8 supported_fc4s[FC_FC4_LIST_SIZE];
+ u32 supported_speeds;
+ u32 maxframe_size;
++ u16 max_npiv_vports;
+ char serial_number[FC_SERIAL_NUMBER_SIZE];
+
+ /* Dynamic Attributes */
+@@ -361,8 +482,11 @@
+ /* internal data */
+ struct list_head rports;
+ struct list_head rport_bindings;
++ struct list_head vports;
+ u32 next_rport_number;
+ u32 next_target_id;
++ u32 next_vport_number;
++ u16 npiv_vports_inuse;
+
+ /* work queues for rport state manipulation */
+ char work_q_name[KOBJ_NAME_LEN];
+@@ -388,6 +512,8 @@
+ (((struct fc_host_attrs *)(x)->shost_data)->supported_speeds)
+ #define fc_host_maxframe_size(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->maxframe_size)
++#define fc_host_max_npiv_vports(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->max_npiv_vports)
+ #define fc_host_serial_number(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->serial_number)
+ #define fc_host_port_id(x) \
+@@ -412,10 +538,16 @@
+ (((struct fc_host_attrs *)(x)->shost_data)->rports)
+ #define fc_host_rport_bindings(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->rport_bindings)
++#define fc_host_vports(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->vports)
+ #define fc_host_next_rport_number(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number)
+ #define fc_host_next_target_id(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->next_target_id)
++#define fc_host_next_vport_number(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->next_vport_number)
++#define fc_host_npiv_vports_inuse(x) \
++ (((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse)
+ #define fc_host_work_q_name(x) \
+ (((struct fc_host_attrs *)(x)->shost_data)->work_q_name)
+ #define fc_host_work_q(x) \
+@@ -452,8 +584,14 @@
+ void (*dev_loss_tmo_callbk)(struct fc_rport *);
+ void (*terminate_rport_io)(struct fc_rport *);
+
++ void (*set_vport_symbolic_name)(struct fc_vport *);
++ int (*vport_create)(struct fc_vport *, bool);
++ int (*vport_disable)(struct fc_vport *, bool);
++ int (*vport_delete)(struct fc_vport *);
++
+ /* allocation lengths for host-specific data */
+ u32 dd_fcrport_size;
++ u32 dd_fcvport_size;
+
+ /*
+ * The driver sets these to tell the transport class it
+@@ -512,7 +650,7 @@
+
+ switch (rport->port_state) {
+ case FC_PORTSTATE_ONLINE:
+- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
+ result = 0;
+ else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
+ result = DID_IMM_RETRY << 16;
+@@ -549,6 +687,27 @@
+ wwn[7] = inm & 0xff;
+ }
+
++/**
++ * fc_vport_set_state() - called to set a vport's state. Saves the old state,
++ * excepting the transitory states of initializing and sending the ELS
++ * traffic to instantiate the vport on the link.
++ *
++ * Assumes the driver has surrounded this with the proper locking to ensure
++ * a coherent state change.
++ *
++ * @vport: virtual port whose state is changing
++ * @new_state: new state
++ **/
++static inline void
++fc_vport_set_state(struct fc_vport *vport, enum fc_vport_state new_state)
++{
++ if ((new_state != FC_VPORT_UNKNOWN) &&
++ (new_state != FC_VPORT_INITIALIZING))
++ vport->vport_last_state = vport->vport_state;
++ vport->vport_state = new_state;
++}
++
++
+ struct scsi_transport_template *fc_attach_transport(
+ struct fc_function_template *);
+ void fc_release_transport(struct scsi_transport_template *);
+@@ -567,5 +726,6 @@
+ * be sure to read the Vendor Type and ID formatting requirements
+ * specified in scsi_netlink.h
+ */
++int fc_vport_terminate(struct fc_vport *vport);
+
+ #endif /* SCSI_TRANSPORT_FC_H */
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h
+--- linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h 2007-12-21 15:36:12.000000000 -0500
+@@ -79,7 +79,8 @@
+ char *name;
+ unsigned int caps;
+ /* LLD sets this to indicate what values it can export to sysfs */
+- unsigned int param_mask;
++ uint64_t param_mask;
++ uint64_t host_param_mask;
+ struct scsi_host_template *host_template;
+ /* LLD connection data size */
+ int conndata_size;
+@@ -89,7 +90,8 @@
+ unsigned int max_conn;
+ unsigned int max_cmd_len;
+ struct iscsi_cls_session *(*create_session) (struct iscsi_transport *it,
+- struct scsi_transport_template *t, uint32_t sn, uint32_t *hn);
++ struct scsi_transport_template *t, uint16_t, uint16_t,
++ uint32_t sn, uint32_t *hn);
+ void (*destroy_session) (struct iscsi_cls_session *session);
+ struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess,
+ uint32_t cid);
+@@ -105,14 +107,18 @@
+ enum iscsi_param param, char *buf);
+ int (*get_session_param) (struct iscsi_cls_session *session,
+ enum iscsi_param param, char *buf);
++ int (*get_host_param) (struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf);
++ int (*set_host_param) (struct Scsi_Host *shost,
++ enum iscsi_host_param param, char *buf,
++ int buflen);
+ int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
+ char *data, uint32_t data_size);
+ void (*get_stats) (struct iscsi_cls_conn *conn,
+ struct iscsi_stats *stats);
+ void (*init_cmd_task) (struct iscsi_cmd_task *ctask);
+ void (*init_mgmt_task) (struct iscsi_conn *conn,
+- struct iscsi_mgmt_task *mtask,
+- char *data, uint32_t data_size);
++ struct iscsi_mgmt_task *mtask);
+ int (*xmit_cmd_task) (struct iscsi_conn *conn,
+ struct iscsi_cmd_task *ctask);
+ void (*cleanup_cmd_task) (struct iscsi_conn *conn,
+@@ -124,7 +130,7 @@
+ uint64_t *ep_handle);
+ int (*ep_poll) (uint64_t ep_handle, int timeout_ms);
+ void (*ep_disconnect) (uint64_t ep_handle);
+- int (*tgt_dscvr) (enum iscsi_tgt_dscvr type, uint32_t host_no,
++ int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
+ uint32_t enable, struct sockaddr *dst_addr);
+ };
+
+diff -Nurb linux-2.6.22-570/init/Kconfig linux-2.6.22-591/init/Kconfig
+--- linux-2.6.22-570/init/Kconfig 2007-12-21 15:36:05.000000000 -0500
++++ linux-2.6.22-591/init/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -120,15 +120,6 @@
+ section 6.4 of the Linux Programmer's Guide, available from
+ <http://www.tldp.org/guides.html>.
+
+-config IPC_NS
+- bool "IPC Namespaces"
+- depends on SYSVIPC
+- default n
+- help
+- Support ipc namespaces. This allows containers, i.e. virtual
+- environments, to use ipc namespaces to provide different ipc
+- objects for different servers. If unsure, say N.
+-
+ config SYSVIPC_SYSCTL
+ bool
+ depends on SYSVIPC
+@@ -218,13 +209,14 @@
+
+ Say N if unsure.
+
+-config UTS_NS
+- bool "UTS Namespaces"
++config USER_NS
++ bool "User Namespaces (EXPERIMENTAL)"
+ default n
++ depends on EXPERIMENTAL
+ help
+- Support uts namespaces. This allows containers, i.e.
+- vservers, to use uts namespaces to provide different
+- uts info for different servers. If unsure, say N.
++ Support user namespaces. This allows containers, i.e.
++ vservers, to use user namespaces to provide different
++ user info for different servers. If unsure, say N.
+
+ config AUDIT
+ bool "Auditing support"
+@@ -298,9 +290,23 @@
+ depends on !OOM_PANIC
+ default y
+
++config CONTAINERS
++ bool
++
++config CONTAINER_DEBUG
++ bool "Example debug container subsystem"
++ select CONTAINERS
++ help
++ This option enables a simple container subsystem that
++ exports useful debugging information about the containers
++ framework
++
++ Say N if unsure
++
+ config CPUSETS
+ bool "Cpuset support"
+ depends on SMP
++ select CONTAINERS
+ help
+ This option will let you create and manage CPUSETs which
+ allow dynamically partitioning a system into sets of CPUs and
+@@ -329,6 +335,27 @@
+ If you are using a distro that was released in 2006 or later,
+ it should be safe to say N here.
+
++config CONTAINER_CPUACCT
++ bool "Simple CPU accounting container subsystem"
++ select CONTAINERS
++ help
++ Provides a simple Resource Controller for monitoring the
++ total CPU consumed by the tasks in a container
++
++config CONTAINER_NS
++ bool "Namespace container subsystem"
++ select CONTAINERS
++ help
++ Provides a simple namespace container subsystem to
++ provide hierarchical naming of sets of namespaces,
++ for instance virtual servers and checkpoint/restart
++ jobs.
++
++config PROC_PID_CPUSET
++ bool "Include legacy /proc/<pid>/cpuset file"
++ depends on CPUSETS
++ default y
++
+ config RELAY
+ bool "Kernel->user space relay support (formerly relayfs)"
+ help
+@@ -605,6 +632,33 @@
+
+ endchoice
+
++config PROC_SMAPS
++ default y
++ bool "Enable /proc/pid/smaps support" if EMBEDDED && PROC_FS && MMU
++ help
++ The /proc/pid/smaps interface reports a process's private and
++ shared memory per mapping. Disabling this interface will reduce
++ the size of the kernel for small machines.
++
++config PROC_CLEAR_REFS
++ default y
++ bool "Enable /proc/pid/clear_refs support" if EMBEDDED && PROC_FS && MMU
++ help
++ The /proc/pid/clear_refs interface allows clearing the
++ referenced bits on a process's memory maps to allow monitoring
++ working set size. Disabling this interface will reduce
++ the size of the kernel for small machines.
++
++config PROC_PAGEMAP
++ default y
++ bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU
++ help
++ The /proc/pid/pagemap interface allows reading the
++ kernel's virtual memory to page frame mapping to determine which
++ individual pages a process has mapped and which pages it shares
++ with other processes. Disabling this interface will reduce the
++ size of the kernel for small machines.
++
+ endmenu # General setup
+
+ config RT_MUTEXES
+@@ -620,6 +674,19 @@
+ default 0 if BASE_FULL
+ default 1 if !BASE_FULL
+
++config PAGE_GROUP_BY_MOBILITY
++ bool "Group pages based on their mobility in the page allocator"
++ def_bool y
++ help
++ The standard allocator will fragment memory over time which means
++ that high order allocations will fail even if kswapd is running. If
++ this option is set, the allocator will try and group page types
++ based on their ability to migrate or reclaim. This is a best effort
++ attempt at lowering fragmentation which a few workloads care about.
++ The loss is a more complex allocator that may perform slower. If
++ you are interested in working with large pages, say Y and set
++ /proc/sys/vm/min_free_bytes to 16374. Otherwise say N
++
+ menu "Loadable module support"
+
+ config MODULES
+diff -Nurb linux-2.6.22-570/init/do_mounts_initrd.c linux-2.6.22-591/init/do_mounts_initrd.c
+--- linux-2.6.22-570/init/do_mounts_initrd.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/init/do_mounts_initrd.c 2007-12-21 15:36:12.000000000 -0500
+@@ -56,12 +56,9 @@
+ sys_chroot(".");
+
+ pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
+- if (pid > 0) {
+- while (pid != sys_wait4(-1, NULL, 0, NULL)) {
+- try_to_freeze();
++ if (pid > 0)
++ while (pid != sys_wait4(-1, NULL, 0, NULL))
+ yield();
+- }
+- }
+
+ /* move initrd to rootfs' /old */
+ sys_fchdir(old_fd);
+diff -Nurb linux-2.6.22-570/init/main.c linux-2.6.22-591/init/main.c
+--- linux-2.6.22-570/init/main.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/init/main.c 2007-12-21 15:36:12.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <linux/writeback.h>
+ #include <linux/cpu.h>
+ #include <linux/cpuset.h>
++#include <linux/container.h>
+ #include <linux/efi.h>
+ #include <linux/tick.h>
+ #include <linux/interrupt.h>
+@@ -502,6 +503,7 @@
+ char * command_line;
+ extern struct kernel_param __start___param[], __stop___param[];
+
++ container_init_early();
+ smp_setup_processor_id();
+
+ /*
+@@ -627,6 +629,7 @@
+ #ifdef CONFIG_PROC_FS
+ proc_root_init();
+ #endif
++ container_init();
+ cpuset_init();
+ taskstats_init_early();
+ delayacct_init();
+diff -Nurb linux-2.6.22-570/ipc/msg.c linux-2.6.22-591/ipc/msg.c
+--- linux-2.6.22-570/ipc/msg.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/ipc/msg.c 2007-12-21 15:36:12.000000000 -0500
+@@ -88,7 +88,7 @@
+ static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
+ #endif
+
+-static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ ns->ids[IPC_MSG_IDS] = ids;
+ ns->msg_ctlmax = MSGMAX;
+@@ -97,7 +97,6 @@
+ ipc_init_ids(ids, ns->msg_ctlmni);
+ }
+
+-#ifdef CONFIG_IPC_NS
+ int msg_init_ns(struct ipc_namespace *ns)
+ {
+ struct ipc_ids *ids;
+@@ -129,7 +128,6 @@
+ kfree(ns->ids[IPC_MSG_IDS]);
+ ns->ids[IPC_MSG_IDS] = NULL;
+ }
+-#endif
+
+ void __init msg_init(void)
+ {
+diff -Nurb linux-2.6.22-570/ipc/sem.c linux-2.6.22-591/ipc/sem.c
+--- linux-2.6.22-570/ipc/sem.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/ipc/sem.c 2007-12-21 15:36:12.000000000 -0500
+@@ -123,7 +123,7 @@
+ #define sc_semopm sem_ctls[2]
+ #define sc_semmni sem_ctls[3]
+
+-static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ ns->ids[IPC_SEM_IDS] = ids;
+ ns->sc_semmsl = SEMMSL;
+@@ -134,7 +134,6 @@
+ ipc_init_ids(ids, ns->sc_semmni);
+ }
+
+-#ifdef CONFIG_IPC_NS
+ int sem_init_ns(struct ipc_namespace *ns)
+ {
+ struct ipc_ids *ids;
+@@ -166,7 +165,6 @@
+ kfree(ns->ids[IPC_SEM_IDS]);
+ ns->ids[IPC_SEM_IDS] = NULL;
+ }
+-#endif
+
+ void __init sem_init (void)
+ {
+diff -Nurb linux-2.6.22-570/ipc/shm.c linux-2.6.22-591/ipc/shm.c
+--- linux-2.6.22-570/ipc/shm.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/ipc/shm.c 2007-12-21 15:36:12.000000000 -0500
+@@ -79,7 +79,7 @@
+ static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
+ #endif
+
+-static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ ns->ids[IPC_SHM_IDS] = ids;
+ ns->shm_ctlmax = SHMMAX;
+@@ -100,7 +100,6 @@
+ shm_destroy(ns, shp);
+ }
+
+-#ifdef CONFIG_IPC_NS
+ int shm_init_ns(struct ipc_namespace *ns)
+ {
+ struct ipc_ids *ids;
+@@ -132,7 +131,6 @@
+ kfree(ns->ids[IPC_SHM_IDS]);
+ ns->ids[IPC_SHM_IDS] = NULL;
+ }
+-#endif
+
+ void __init shm_init (void)
+ {
+@@ -234,13 +232,13 @@
+ mutex_unlock(&shm_ids(ns).mutex);
+ }
+
+-static struct page *shm_nopage(struct vm_area_struct *vma,
+- unsigned long address, int *type)
++static struct page *shm_fault(struct vm_area_struct *vma,
++ struct fault_data *fdata)
+ {
+ struct file *file = vma->vm_file;
+ struct shm_file_data *sfd = shm_file_data(file);
+
+- return sfd->vm_ops->nopage(vma, address, type);
++ return sfd->vm_ops->fault(vma, fdata);
+ }
+
+ #ifdef CONFIG_NUMA
+@@ -279,6 +277,7 @@
+ if (ret != 0)
+ return ret;
+ sfd->vm_ops = vma->vm_ops;
++ BUG_ON(!sfd->vm_ops->fault);
+ vma->vm_ops = &shm_vm_ops;
+ shm_open(vma);
+
+@@ -337,7 +336,7 @@
+ static struct vm_operations_struct shm_vm_ops = {
+ .open = shm_open, /* callback for a new vm-area open */
+ .close = shm_close, /* callback for when the vm-area is released */
+- .nopage = shm_nopage,
++ .fault = shm_fault,
+ #if defined(CONFIG_NUMA)
+ .set_policy = shm_set_policy,
+ .get_policy = shm_get_policy,
+diff -Nurb linux-2.6.22-570/ipc/util.c linux-2.6.22-591/ipc/util.c
+--- linux-2.6.22-570/ipc/util.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/ipc/util.c 2007-12-21 15:36:12.000000000 -0500
+@@ -52,7 +52,6 @@
+ },
+ };
+
+-#ifdef CONFIG_IPC_NS
+ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+ {
+ int err;
+@@ -114,14 +113,6 @@
+ atomic_dec(&vs_global_ipc_ns);
+ kfree(ns);
+ }
+-#else
+-struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
+-{
+- if (flags & CLONE_NEWIPC)
+- return ERR_PTR(-EINVAL);
+- return ns;
+-}
+-#endif
+
+ /**
+ * ipc_init - initialise IPC subsystem
+@@ -149,7 +140,7 @@
+ * array itself.
+ */
+
+-void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
++void ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ int i;
+
+diff -Nurb linux-2.6.22-570/ipc/util.h linux-2.6.22-591/ipc/util.h
+--- linux-2.6.22-570/ipc/util.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/ipc/util.h 2007-12-21 15:36:12.000000000 -0500
+@@ -41,12 +41,8 @@
+ };
+
+ struct seq_file;
+-#ifdef CONFIG_IPC_NS
+-#define __ipc_init
+-#else
+-#define __ipc_init __init
+-#endif
+-void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size);
++
++void ipc_init_ids(struct ipc_ids *ids, int size);
+ #ifdef CONFIG_PROC_FS
+ void __init ipc_init_proc_interface(const char *path, const char *header,
+ int ids, int (*show)(struct seq_file *, void *));
+diff -Nurb linux-2.6.22-570/kernel/Makefile linux-2.6.22-591/kernel/Makefile
+--- linux-2.6.22-570/kernel/Makefile 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -4,11 +4,12 @@
+
+ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+ exit.o itimer.o time.o softirq.o resource.o \
+- sysctl.o capability.o ptrace.o timer.o user.o \
++ sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
+ signal.o sys.o kmod.o workqueue.o pid.o \
+ rcupdate.o extable.o params.o posix-timers.o \
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+- hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o
++ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
++ utsname.o
+
+ obj-y += vserver/
+
+@@ -33,16 +34,22 @@
+ obj-$(CONFIG_UID16) += uid16.o
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_KALLSYMS) += kallsyms.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+ obj-$(CONFIG_PM) += power/
+ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+ obj-$(CONFIG_KEXEC) += kexec.o
+ obj-$(CONFIG_COMPAT) += compat.o
++obj-$(CONFIG_CONTAINERS) += container.o
++obj-$(CONFIG_CONTAINER_DEBUG) += container_debug.o
+ obj-$(CONFIG_CPUSETS) += cpuset.o
++obj-$(CONFIG_CONTAINER_CPUACCT) += cpu_acct.o
++obj-$(CONFIG_CONTAINER_NS) += ns_container.o
+ obj-$(CONFIG_IKCONFIG) += configs.o
+ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
+ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_KGDB) += kgdb.o
+ obj-$(CONFIG_SYSFS) += ksysfs.o
+ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+@@ -50,7 +57,6 @@
+ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+ obj-$(CONFIG_RELAY) += relay.o
+ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
+-obj-$(CONFIG_UTS_NS) += utsname.o
+ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+
+diff -Nurb linux-2.6.22-570/kernel/audit.c linux-2.6.22-591/kernel/audit.c
+--- linux-2.6.22-570/kernel/audit.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/audit.c 2007-12-21 15:36:15.000000000 -0500
+@@ -391,6 +391,7 @@
+ {
+ struct sk_buff *skb;
+
++ set_freezable();
+ while (!kthread_should_stop()) {
+ skb = skb_dequeue(&audit_skb_queue);
+ wake_up(&audit_backlog_wait);
+@@ -794,8 +795,8 @@
+
+ printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
+ audit_default ? "enabled" : "disabled");
+- audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
+- NULL, THIS_MODULE);
++ audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0,
++ audit_receive, NULL, THIS_MODULE);
+ if (!audit_sock)
+ audit_panic("cannot initialize netlink socket");
+ else
+diff -Nurb linux-2.6.22-570/kernel/auditsc.c linux-2.6.22-591/kernel/auditsc.c
+--- linux-2.6.22-570/kernel/auditsc.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/kernel/auditsc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1500,6 +1500,7 @@
+ context->names[idx].ino = (unsigned long)-1;
+ }
+ }
++EXPORT_SYMBOL(__audit_inode_child);
+
+ /**
+ * auditsc_get_stamp - get local copies of audit_context values
+diff -Nurb linux-2.6.22-570/kernel/container.c linux-2.6.22-591/kernel/container.c
+--- linux-2.6.22-570/kernel/container.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/container.c 2007-12-21 15:36:15.000000000 -0500
+@@ -0,0 +1,2545 @@
++/*
++ * kernel/container.c
++ *
++ * Generic process-grouping system.
++ *
++ * Based originally on the cpuset system, extracted by Paul Menage
++ * Copyright (C) 2006 Google, Inc
++ *
++ * Copyright notices from the original cpuset code:
++ * --------------------------------------------------
++ * Copyright (C) 2003 BULL SA.
++ * Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ *
++ * Portions derived from Patrick Mochel's sysfs code.
++ * sysfs is Copyright (c) 2001-3 Patrick Mochel
++ *
++ * 2003-10-10 Written by Simon Derr.
++ * 2003-10-22 Updates by Stephen Hemminger.
++ * 2004 May-July Rework by Paul Jackson.
++ * ---------------------------------------------------
++ *
++ * This file is subject to the terms and conditions of the GNU General Public
++ * License. See the file COPYING in the main directory of the Linux
++ * distribution for more details.
++ */
++
++#include <linux/cpu.h>
++#include <linux/cpumask.h>
++#include <linux/container.h>
++#include <linux/err.h>
++#include <linux/errno.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/kernel.h>
++#include <linux/kmod.h>
++#include <linux/list.h>
++#include <linux/mempolicy.h>
++#include <linux/mm.h>
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/pagemap.h>
++#include <linux/proc_fs.h>
++#include <linux/rcupdate.h>
++#include <linux/uaccess.h>
++#include <linux/sched.h>
++#include <linux/seq_file.h>
++#include <linux/security.h>
++#include <linux/slab.h>
++#include <linux/magic.h>
++#include <linux/smp_lock.h>
++#include <linux/spinlock.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/time.h>
++#include <linux/backing-dev.h>
++#include <linux/sort.h>
++
++#include <asm/atomic.h>
++
++static DEFINE_MUTEX(container_mutex);
++
++/* Generate an array of container subsystem pointers */
++#define SUBSYS(_x) &_x ## _subsys,
++
++static struct container_subsys *subsys[] = {
++#include <linux/container_subsys.h>
++};
++
++/* A containerfs_root represents the root of a container hierarchy,
++ * and may be associated with a superblock to form an active
++ * hierarchy */
++struct containerfs_root {
++ struct super_block *sb;
++
++ /* The bitmask of subsystems attached to this hierarchy */
++ unsigned long subsys_bits;
++
++ /* A list running through the attached subsystems */
++ struct list_head subsys_list;
++
++ /* The root container for this hierarchy */
++ struct container top_container;
++
++ /* Tracks how many containers are currently defined in hierarchy.*/
++ int number_of_containers;
++
++ /* A list running through the mounted hierarchies */
++ struct list_head root_list;
++
++ /* The path to use for release notifications. No locking
++ * between setting and use - so if userspace updates this
++ * while subcontainers exist, you could miss a
++ * notification. We ensure that it's always a valid
++ * NUL-terminated string */
++ char release_agent_path[PATH_MAX];
++};
++
++
++/* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
++ * subsystems that are otherwise unattached - it never has more than a
++ * single container, and all tasks are part of that container. */
++
++static struct containerfs_root rootnode;
++
++/* The list of hierarchy roots */
++
++static LIST_HEAD(roots);
++static int root_count;
++
++/* dummytop is a shorthand for the dummy hierarchy's top container */
++#define dummytop (&rootnode.top_container)
++
++/* This flag indicates whether tasks in the fork and exit paths should
++ * take callback_mutex and check for fork/exit handlers to call. This
++ * avoids us having to do extra work in the fork/exit path if none of the
++ * subsystems need to be called.
++ */
++static int need_forkexit_callback;
++
++/* bits in struct container flags field */
++enum {
++ /* Container is dead */
++ CONT_REMOVED,
++ /* Container has previously had a child container or a task,
++ * but no longer (only if CONT_NOTIFY_ON_RELEASE is set) */
++ CONT_RELEASABLE,
++ /* Container requires release notifications to userspace */
++ CONT_NOTIFY_ON_RELEASE,
++};
++
++/* convenient tests for these bits */
++inline int container_is_removed(const struct container *cont)
++{
++ return test_bit(CONT_REMOVED, &cont->flags);
++}
++
++inline int container_is_releasable(const struct container *cont)
++{
++ const int bits =
++ (1 << CONT_RELEASABLE) |
++ (1 << CONT_NOTIFY_ON_RELEASE);
++ return (cont->flags & bits) == bits;
++}
++
++inline int notify_on_release(const struct container *cont)
++{
++ return test_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++}
++
++/* for_each_subsys() allows you to iterate on each subsystem attached to
++ * an active hierarchy */
++#define for_each_subsys(_root, _ss) \
++list_for_each_entry(_ss, &_root->subsys_list, sibling)
++
++/* for_each_root() allows you to iterate across the active hierarchies */
++#define for_each_root(_root) \
++list_for_each_entry(_root, &roots, root_list)
++
++/* the list of containers eligible for automatic release */
++static LIST_HEAD(release_list);
++static void container_release_agent(struct work_struct *work);
++static DECLARE_WORK(release_agent_work, container_release_agent);
++static void check_for_release(struct container *cont);
++
++/* Link structure for associating css_group objects with containers */
++struct cg_container_link {
++ /*
++ * List running through cg_container_links associated with a
++ * container, anchored on container->css_groups
++ */
++ struct list_head cont_link_list;
++ /*
++ * List running through cg_container_links pointing at a
++ * single css_group object, anchored on css_group->cg_links
++ */
++ struct list_head cg_link_list;
++ struct css_group *cg;
++};
++
++/* The default css_group - used by init and its children prior to any
++ * hierarchies being mounted. It contains a pointer to the root state
++ * for each subsystem. Also used to anchor the list of css_groups. Not
++ * reference-counted, to improve performance when child containers
++ * haven't been created.
++ */
++
++static struct css_group init_css_group;
++static struct cg_container_link init_css_group_link;
++
++/* css_group_lock protects the list of css_group objects, and the
++ * chain of tasks off each css_group. Nests inside task->alloc_lock */
++static DEFINE_RWLOCK(css_group_lock);
++static int css_group_count;
++
++
++/* When we create or destroy a css_group, the operation simply
++ * takes/releases a reference count on all the containers referenced
++ * by subsystems in this css_group. This can end up multiple-counting
++ * some containers, but that's OK - the ref-count is just a
++ * busy/not-busy indicator; ensuring that we only count each container
++ * once would require taking a global lock to ensure that no
++ * subsystems moved between hierarchies while we were doing so.
++ *
++ * Possible TODO: decide at boot time based on the number of
++ * registered subsystems and the number of CPUs or NUMA nodes whether
++ * it's better for performance to ref-count every subsystem, or to
++ * take a global lock and only add one ref count to each hierarchy.
++ */
++
++/*
++ * unlink a css_group from the list and free it
++ */
++static void unlink_css_group(struct css_group *cg)
++{
++ write_lock(&css_group_lock);
++ list_del(&cg->list);
++ css_group_count--;
++ while (!list_empty(&cg->cg_links)) {
++ struct cg_container_link *link;
++ link = list_entry(cg->cg_links.next,
++ struct cg_container_link, cg_link_list);
++ list_del(&link->cg_link_list);
++ list_del(&link->cont_link_list);
++ kfree(link);
++ }
++ write_unlock(&css_group_lock);
++}
++
++static void release_css_group(struct kref *k)
++{
++ int i;
++ struct css_group *cg = container_of(k, struct css_group, ref);
++
++ BUG_ON(!mutex_is_locked(&container_mutex));
++ unlink_css_group(cg);
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container *cont = cg->subsys[i]->container;
++ if (atomic_dec_and_test(&cont->count) &&
++ container_is_releasable(cont)) {
++ check_for_release(cont);
++ }
++ }
++ kfree(cg);
++}
++
++/*
++ * In the task exit path we want to avoid taking container_mutex
++ * unless absolutely necessary, so the release process is slightly
++ * different.
++ */
++static void release_css_group_taskexit(struct kref *k)
++{
++ int i;
++ struct css_group *cg = container_of(k, struct css_group, ref);
++
++ unlink_css_group(cg);
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container *cont = cg->subsys[i]->container;
++ if (notify_on_release(cont)) {
++ mutex_lock(&container_mutex);
++ set_bit(CONT_RELEASABLE, &cont->flags);
++ if (atomic_dec_and_test(&cont->count))
++ check_for_release(cont);
++ mutex_unlock(&container_mutex);
++ } else {
++ atomic_dec(&cont->count);
++ }
++ }
++ kfree(cg);
++}
++
++/*
++ * refcounted get/put for css_group objects
++ */
++static inline void get_css_group(struct css_group *cg)
++{
++ kref_get(&cg->ref);
++}
++
++static inline void put_css_group(struct css_group *cg)
++{
++ kref_put(&cg->ref, release_css_group);
++}
++
++static inline void put_css_group_taskexit(struct css_group *cg)
++{
++ kref_put(&cg->ref, release_css_group_taskexit);
++}
++
++/*
++ * find_existing_css_group() is a helper for
++ * find_css_group(), and checks to see whether an existing
++ * css_group is suitable. This currently walks a linked-list for
++ * simplicity; a later patch will use a hash table for better
++ * performance
++ *
++ * oldcg: the container group that we're using before the container
++ * transition
++ *
++ * cont: the container that we're moving into
++ *
++ * template: location in which to build the desired set of subsystem
++ * state objects for the new container group
++ */
++
++static struct css_group *find_existing_css_group(
++ struct css_group *oldcg,
++ struct container *cont,
++ struct container_subsys_state *template[])
++{
++ int i;
++ struct containerfs_root *root = cont->root;
++ struct list_head *l = &init_css_group.list;
++
++ /* Built the set of subsystem state objects that we want to
++ * see in the new css_group */
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ if (root->subsys_bits & (1ull << i)) {
++ /* Subsystem is in this hierarchy. So we want
++ * the subsystem state from the new
++ * container */
++ template[i] = cont->subsys[i];
++ } else {
++ /* Subsystem is not in this hierarchy, so we
++ * don't want to change the subsystem state */
++ template[i] = oldcg->subsys[i];
++ }
++ }
++
++ /* Look through existing container groups to find one to reuse */
++ do {
++ struct css_group *cg =
++ list_entry(l, struct css_group, list);
++
++ if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
++ /* All subsystems matched */
++ return cg;
++ }
++ /* Try the next container group */
++ l = l->next;
++ } while (l != &init_css_group.list);
++
++ /* No existing container group matched */
++ return NULL;
++}
++
++/*
++ * allocate_cg_links() allocates "count" cg_container_link structures
++ * and chains them on tmp through their cont_link_list fields. Returns 0 on
++ * success or a negative error
++ */
++
++static int allocate_cg_links(int count, struct list_head *tmp)
++{
++ struct cg_container_link *link;
++ int i;
++ INIT_LIST_HEAD(tmp);
++ for (i = 0; i < count; i++) {
++ link = kmalloc(sizeof(*link), GFP_KERNEL);
++ if (!link) {
++ while (!list_empty(tmp)) {
++ link = list_entry(tmp->next,
++ struct cg_container_link,
++ cont_link_list);
++ list_del(&link->cont_link_list);
++ kfree(link);
++ }
++ return -ENOMEM;
++ }
++ list_add(&link->cont_link_list, tmp);
++ }
++ return 0;
++}
++
++/*
++ * find_css_group() takes an existing container group and a
++ * container object, and returns a css_group object that's
++ * equivalent to the old group, but with the given container
++ * substituted into the appropriate hierarchy. Must be called with
++ * container_mutex held
++ */
++
++static struct css_group *find_css_group(
++ struct css_group *oldcg, struct container *cont)
++{
++ struct css_group *res;
++ struct container_subsys_state *template[CONTAINER_SUBSYS_COUNT];
++ int i;
++
++ struct list_head tmp_cg_links;
++ struct cg_container_link *link;
++
++ /* First see if we already have a container group that matches
++ * the desired set */
++ write_lock(&css_group_lock);
++ res = find_existing_css_group(oldcg, cont, template);
++ if (res)
++ get_css_group(res);
++ write_unlock(&css_group_lock);
++
++ if (res)
++ return res;
++
++ res = kmalloc(sizeof(*res), GFP_KERNEL);
++ if (!res)
++ return NULL;
++
++ /* Allocate all the cg_container_link objects that we'll need */
++ if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
++ kfree(res);
++ return NULL;
++ }
++
++ kref_init(&res->ref);
++ INIT_LIST_HEAD(&res->cg_links);
++ INIT_LIST_HEAD(&res->tasks);
++
++ /* Copy the set of subsystem state objects generated in
++ * find_existing_css_group() */
++ memcpy(res->subsys, template, sizeof(res->subsys));
++
++ write_lock(&css_group_lock);
++ /* Add reference counts and links from the new css_group. */
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container *cont = res->subsys[i]->container;
++ struct container_subsys *ss = subsys[i];
++ atomic_inc(&cont->count);
++ /*
++ * We want to add a link once per container, so we
++ * only do it for the first subsystem in each
++ * hierarchy
++ */
++ if (ss->root->subsys_list.next == &ss->sibling) {
++ BUG_ON(list_empty(&tmp_cg_links));
++ link = list_entry(tmp_cg_links.next,
++ struct cg_container_link,
++ cont_link_list);
++ list_del(&link->cont_link_list);
++ list_add(&link->cont_link_list, &cont->css_groups);
++ link->cg = res;
++ list_add(&link->cg_link_list, &res->cg_links);
++ }
++ }
++ if (list_empty(&rootnode.subsys_list)) {
++ link = list_entry(tmp_cg_links.next,
++ struct cg_container_link,
++ cont_link_list);
++ list_del(&link->cont_link_list);
++ list_add(&link->cont_link_list, &dummytop->css_groups);
++ link->cg = res;
++ list_add(&link->cg_link_list, &res->cg_links);
++ }
++
++ BUG_ON(!list_empty(&tmp_cg_links));
++
++ /* Link this container group into the list */
++ list_add(&res->list, &init_css_group.list);
++ css_group_count++;
++ INIT_LIST_HEAD(&res->tasks);
++ write_unlock(&css_group_lock);
++
++ return res;
++}
++
++/*
++ * There is one global container mutex. We also require taking
++ * task_lock() when dereferencing a task's container subsys pointers.
++ * See "The task_lock() exception", at the end of this comment.
++ *
++ * A task must hold container_mutex to modify containers.
++ *
++ * Any task can increment and decrement the count field without lock.
++ * So in general, code holding container_mutex can't rely on the count
++ * field not changing. However, if the count goes to zero, then only
++ * attach_task() can increment it again. Because a count of zero
++ * means that no tasks are currently attached, therefore there is no
++ * way a task attached to that container can fork (the other way to
++ * increment the count). So code holding container_mutex can safely
++ * assume that if the count is zero, it will stay zero. Similarly, if
++ * a task holds container_mutex on a container with zero count, it
++ * knows that the container won't be removed, as container_rmdir()
++ * needs that mutex.
++ *
++ * The container_common_file_write handler for operations that modify
++ * the container hierarchy holds container_mutex across the entire operation,
++ * single threading all such container modifications across the system.
++ *
++ * The fork and exit callbacks container_fork() and container_exit(), don't
++ * (usually) take container_mutex. These are the two most performance
++ * critical pieces of code here. The exception occurs on container_exit(),
++ * when a task in a notify_on_release container exits. Then container_mutex
++ * is taken, and if the container count is zero, a usermode call made
++ * to /sbin/container_release_agent with the name of the container (path
++ * relative to the root of container file system) as the argument.
++ *
++ * A container can only be deleted if both its 'count' of using tasks
++ * is zero, and its list of 'children' containers is empty. Since all
++ * tasks in the system use _some_ container, and since there is always at
++ * least one task in the system (init, pid == 1), therefore, top_container
++ * always has either children containers and/or using tasks. So we don't
++ * need a special hack to ensure that top_container cannot be deleted.
++ *
++ * The task_lock() exception
++ *
++ * The need for this exception arises from the action of
++ * attach_task(), which overwrites one tasks container pointer with
++ * another. It does so using container_mutexe, however there are
++ * several performance critical places that need to reference
++ * task->container without the expense of grabbing a system global
++ * mutex. Therefore except as noted below, when dereferencing or, as
++ * in attach_task(), modifying a task'ss container pointer we use
++ * task_lock(), which acts on a spinlock (task->alloc_lock) already in
++ * the task_struct routinely used for such matters.
++ *
++ * P.S. One more locking exception. RCU is used to guard the
++ * update of a tasks container pointer by attach_task()
++ */
++
++/**
++ * container_lock - lock out any changes to container structures
++ *
++ */
++
++void container_lock(void)
++{
++ mutex_lock(&container_mutex);
++}
++
++/**
++ * container_unlock - release lock on container changes
++ *
++ * Undo the lock taken in a previous container_lock() call.
++ */
++
++void container_unlock(void)
++{
++ mutex_unlock(&container_mutex);
++}
++
++/*
++ * A couple of forward declarations required, due to cyclic reference loop:
++ * container_mkdir -> container_create -> container_populate_dir ->
++ * container_add_file -> container_create_file -> container_dir_inode_operations
++ * -> container_mkdir.
++ */
++
++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode);
++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry);
++static int container_populate_dir(struct container *cont);
++static struct inode_operations container_dir_inode_operations;
++static struct file_operations proc_containerstats_operations;
++
++static struct inode *container_new_inode(mode_t mode, struct super_block *sb)
++{
++ struct inode *inode = new_inode(sb);
++ static struct backing_dev_info container_backing_dev_info = {
++ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
++ };
++
++ if (inode) {
++ inode->i_mode = mode;
++ inode->i_uid = current->fsuid;
++ inode->i_gid = current->fsgid;
++ inode->i_blocks = 0;
++ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++ inode->i_mapping->backing_dev_info = &container_backing_dev_info;
++ }
++ return inode;
++}
++
++static void container_diput(struct dentry *dentry, struct inode *inode)
++{
++ /* is dentry a directory ? if so, kfree() associated container */
++ if (S_ISDIR(inode->i_mode)) {
++ struct container *cont = dentry->d_fsdata;
++ BUG_ON(!(container_is_removed(cont)));
++ kfree(cont);
++ }
++ iput(inode);
++}
++
++static struct dentry *container_get_dentry(struct dentry *parent,
++ const char *name)
++{
++ struct dentry *d = lookup_one_len(name, parent, strlen(name));
++ static struct dentry_operations container_dops = {
++ .d_iput = container_diput,
++ };
++
++ if (!IS_ERR(d))
++ d->d_op = &container_dops;
++ return d;
++}
++
++static void remove_dir(struct dentry *d)
++{
++ struct dentry *parent = dget(d->d_parent);
++
++ d_delete(d);
++ simple_rmdir(parent->d_inode, d);
++ dput(parent);
++}
++
++static void container_clear_directory(struct dentry *dentry)
++{
++ struct list_head *node;
++
++ BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
++ spin_lock(&dcache_lock);
++ node = dentry->d_subdirs.next;
++ while (node != &dentry->d_subdirs) {
++ struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
++ list_del_init(node);
++ if (d->d_inode) {
++ /* This should never be called on a container
++ * directory with child containers */
++ BUG_ON(d->d_inode->i_mode & S_IFDIR);
++ d = dget_locked(d);
++ spin_unlock(&dcache_lock);
++ d_delete(d);
++ simple_unlink(dentry->d_inode, d);
++ dput(d);
++ spin_lock(&dcache_lock);
++ }
++ node = dentry->d_subdirs.next;
++ }
++ spin_unlock(&dcache_lock);
++}
++
++/*
++ * NOTE : the dentry must have been dget()'ed
++ */
++static void container_d_remove_dir(struct dentry *dentry)
++{
++ container_clear_directory(dentry);
++
++ spin_lock(&dcache_lock);
++ list_del_init(&dentry->d_u.d_child);
++ spin_unlock(&dcache_lock);
++ remove_dir(dentry);
++}
++
++static int rebind_subsystems(struct containerfs_root *root,
++ unsigned long final_bits)
++{
++ unsigned long added_bits, removed_bits;
++ struct container *cont = &root->top_container;
++ int i;
++
++ removed_bits = root->subsys_bits & ~final_bits;
++ added_bits = final_bits & ~root->subsys_bits;
++ /* Check that any added subsystems are currently free */
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ unsigned long long bit = 1ull << i;
++ struct container_subsys *ss = subsys[i];
++ if (!(bit & added_bits))
++ continue;
++ if (ss->root != &rootnode) {
++ /* Subsystem isn't free */
++ return -EBUSY;
++ }
++ }
++
++ /* Currently we don't handle adding/removing subsystems when
++ * any subcontainers exist. This is theoretically supportable
++ * but involves complex error handling, so it's being left until
++ * later */
++ if (!list_empty(&cont->children))
++ return -EBUSY;
++
++ /* Process each subsystem */
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++ unsigned long bit = 1UL << i;
++ if (bit & added_bits) {
++ /* We're binding this subsystem to this hierarchy */
++ BUG_ON(cont->subsys[i]);
++ BUG_ON(!dummytop->subsys[i]);
++ BUG_ON(dummytop->subsys[i]->container != dummytop);
++ cont->subsys[i] = dummytop->subsys[i];
++ cont->subsys[i]->container = cont;
++ list_add(&ss->sibling, &root->subsys_list);
++ rcu_assign_pointer(ss->root, root);
++ if (ss->bind)
++ ss->bind(ss, cont);
++
++ } else if (bit & removed_bits) {
++ /* We're removing this subsystem */
++ BUG_ON(cont->subsys[i] != dummytop->subsys[i]);
++ BUG_ON(cont->subsys[i]->container != cont);
++ if (ss->bind)
++ ss->bind(ss, dummytop);
++ dummytop->subsys[i]->container = dummytop;
++ cont->subsys[i] = NULL;
++ rcu_assign_pointer(subsys[i]->root, &rootnode);
++ list_del(&ss->sibling);
++ } else if (bit & final_bits) {
++ /* Subsystem state should already exist */
++ BUG_ON(!cont->subsys[i]);
++ } else {
++ /* Subsystem state shouldn't exist */
++ BUG_ON(cont->subsys[i]);
++ }
++ }
++ root->subsys_bits = final_bits;
++ synchronize_rcu();
++
++ return 0;
++}
++
++/*
++ * Release the last use of a hierarchy. Will never be called when
++ * there are active subcontainers since each subcontainer bumps the
++ * value of sb->s_active.
++ */
++static void container_put_super(struct super_block *sb)
++{
++ struct containerfs_root *root = sb->s_fs_info;
++ struct container *cont = &root->top_container;
++ int ret;
++
++ root->sb = NULL;
++ sb->s_fs_info = NULL;
++
++ mutex_lock(&container_mutex);
++
++ BUG_ON(root->number_of_containers != 1);
++ BUG_ON(!list_empty(&cont->children));
++ BUG_ON(!list_empty(&cont->sibling));
++ BUG_ON(!root->subsys_bits);
++
++ /* Rebind all subsystems back to the default hierarchy */
++ ret = rebind_subsystems(root, 0);
++ BUG_ON(ret);
++
++ write_lock(&css_group_lock);
++ while (!list_empty(&cont->css_groups)) {
++ struct cg_container_link *link;
++ link = list_entry(cont->css_groups.next,
++ struct cg_container_link, cont_link_list);
++ list_del(&link->cg_link_list);
++ list_del(&link->cont_link_list);
++ kfree(link);
++ }
++ write_unlock(&css_group_lock);
++
++ list_del(&root->root_list);
++ root_count--;
++ kfree(root);
++ mutex_unlock(&container_mutex);
++}
++
++static int container_show_options(struct seq_file *seq, struct vfsmount *vfs)
++{
++ struct containerfs_root *root = vfs->mnt_sb->s_fs_info;
++ struct container_subsys *ss;
++
++ for_each_subsys(root, ss)
++ seq_printf(seq, ",%s", ss->name);
++ return 0;
++}
++
++/* Convert a hierarchy specifier into a bitmask. LL=container_mutex */
++static int parse_containerfs_options(char *opts, unsigned long *bits)
++{
++ char *token, *o = opts ?: "all";
++
++ *bits = 0;
++
++ while ((token = strsep(&o, ",")) != NULL) {
++ if (!*token)
++ return -EINVAL;
++ if (!strcmp(token, "all")) {
++ *bits = (1 << CONTAINER_SUBSYS_COUNT) - 1;
++ } else {
++ struct container_subsys *ss;
++ int i;
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ ss = subsys[i];
++ if (!strcmp(token, ss->name)) {
++ *bits |= 1 << i;
++ break;
++ }
++ }
++ if (i == CONTAINER_SUBSYS_COUNT)
++ return -ENOENT;
++ }
++ }
++
++ /* We can't have an empty hierarchy */
++ if (!*bits)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int container_remount(struct super_block *sb, int *flags, char *data)
++{
++ int ret = 0;
++ unsigned long subsys_bits;
++ struct containerfs_root *root = sb->s_fs_info;
++ struct container *cont = &root->top_container;
++
++ mutex_lock(&cont->dentry->d_inode->i_mutex);
++ mutex_lock(&container_mutex);
++
++ /* See what subsystems are wanted */
++ ret = parse_containerfs_options(data, &subsys_bits);
++ if (ret)
++ goto out_unlock;
++
++ ret = rebind_subsystems(root, subsys_bits);
++
++ /* (re)populate subsystem files */
++ if (!ret)
++ container_populate_dir(cont);
++
++ out_unlock:
++ mutex_unlock(&container_mutex);
++ mutex_unlock(&cont->dentry->d_inode->i_mutex);
++ return ret;
++}
++
++static struct super_operations container_ops = {
++ .statfs = simple_statfs,
++ .drop_inode = generic_delete_inode,
++ .put_super = container_put_super,
++ .show_options = container_show_options,
++ .remount_fs = container_remount,
++};
++
++static int container_fill_super(struct super_block *sb, void *options,
++ int unused_silent)
++{
++ struct inode *inode;
++ struct dentry *root;
++ struct containerfs_root *hroot = options;
++
++ sb->s_blocksize = PAGE_CACHE_SIZE;
++ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
++ sb->s_magic = CONTAINER_SUPER_MAGIC;
++ sb->s_op = &container_ops;
++
++ inode = container_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
++ if (!inode)
++ return -ENOMEM;
++
++ inode->i_op = &simple_dir_inode_operations;
++ inode->i_fop = &simple_dir_operations;
++ inode->i_op = &container_dir_inode_operations;
++ /* directories start off with i_nlink == 2 (for "." entry) */
++ inc_nlink(inode);
++
++ root = d_alloc_root(inode);
++ if (!root) {
++ iput(inode);
++ return -ENOMEM;
++ }
++ sb->s_root = root;
++ root->d_fsdata = &hroot->top_container;
++ hroot->top_container.dentry = root;
++
++ strcpy(hroot->release_agent_path, "");
++ sb->s_fs_info = hroot;
++ hroot->sb = sb;
++
++ return 0;
++}
++
++static void init_container_root(struct containerfs_root *root)
++{
++ struct container *cont = &root->top_container;
++ INIT_LIST_HEAD(&root->subsys_list);
++ root->number_of_containers = 1;
++ cont->root = root;
++ cont->top_container = cont;
++ INIT_LIST_HEAD(&cont->sibling);
++ INIT_LIST_HEAD(&cont->children);
++ INIT_LIST_HEAD(&cont->css_groups);
++ INIT_LIST_HEAD(&cont->release_list);
++ list_add(&root->root_list, &roots);
++ root_count++;
++}
++
++static int container_get_sb(struct file_system_type *fs_type,
++ int flags, const char *unused_dev_name,
++ void *data, struct vfsmount *mnt)
++{
++ unsigned long subsys_bits = 0;
++ int ret = 0;
++ struct containerfs_root *root = NULL;
++ int use_existing = 0;
++
++ mutex_lock(&container_mutex);
++
++ /* First find the desired set of resource controllers */
++ ret = parse_containerfs_options(data, &subsys_bits);
++ if (ret)
++ goto out_unlock;
++
++ /* See if we already have a hierarchy containing this set */
++
++ for_each_root(root) {
++ /* We match - use this hieracrchy */
++ if (root->subsys_bits == subsys_bits) {
++ use_existing = 1;
++ break;
++ }
++ /* We clash - fail */
++ if (root->subsys_bits & subsys_bits) {
++ ret = -EBUSY;
++ goto out_unlock;
++ }
++ }
++
++ if (!use_existing) {
++ /* We need a new root */
++ struct list_head tmp_cg_links, *l;
++ root = kzalloc(sizeof(*root), GFP_KERNEL);
++ if (!root) {
++ ret = -ENOMEM;
++ goto out_unlock;
++ }
++ /* We're accessing css_group_count without locking
++ * here, but that's OK - it can only be increased by
++ * someone holding container_lock, and that's us. The
++ * worst that can happen is that we have some link
++ * structures left over */
++ ret = allocate_cg_links(css_group_count, &tmp_cg_links);
++ if (ret < 0) {
++ kfree(root);
++ goto out_unlock;
++ }
++ init_container_root(root);
++
++ /* Link the top container in this hierarchy into all
++ * the css_group objects */
++ write_lock(&css_group_lock);
++ l = &init_css_group.list;
++ do {
++ struct css_group *cg;
++ struct cg_container_link *link;
++ cg = list_entry(l, struct css_group, list);
++ BUG_ON(list_empty(&tmp_cg_links));
++ link = list_entry(tmp_cg_links.next,
++ struct cg_container_link,
++ cont_link_list);
++ list_del(&link->cont_link_list);
++ link->cg = cg;
++ list_add(&link->cont_link_list,
++ &root->top_container.css_groups);
++ list_add(&link->cg_link_list, &cg->cg_links);
++ l = l->next;
++ } while (l != &init_css_group.list);
++ write_unlock(&css_group_lock);
++
++ while (!list_empty(&tmp_cg_links)) {
++ /* Probably shouldn't happen */
++ struct cg_container_link *link;
++ printk(KERN_INFO "Freeing unused cg_container_link\n");
++ link = list_entry(tmp_cg_links.next,
++ struct cg_container_link,
++ cont_link_list);
++ list_del(&link->cont_link_list);
++ kfree(link);
++ }
++ }
++
++ if (!root->sb) {
++ /* We need a new superblock for this container combination */
++ struct container *cont = &root->top_container;
++
++ BUG_ON(root->subsys_bits);
++ ret = get_sb_nodev(fs_type, flags, root,
++ container_fill_super, mnt);
++ if (ret)
++ goto out_unlock;
++
++ BUG_ON(!list_empty(&cont->sibling));
++ BUG_ON(!list_empty(&cont->children));
++ BUG_ON(root->number_of_containers != 1);
++
++ ret = rebind_subsystems(root, subsys_bits);
++
++ /* It's safe to nest i_mutex inside container_mutex in
++ * this case, since no-one else can be accessing this
++ * directory yet */
++ mutex_lock(&cont->dentry->d_inode->i_mutex);
++ container_populate_dir(cont);
++ mutex_unlock(&cont->dentry->d_inode->i_mutex);
++ BUG_ON(ret);
++ } else {
++ /* Reuse the existing superblock */
++ down_write(&(root->sb->s_umount));
++ ret = simple_set_mnt(mnt, root->sb);
++ if (!ret)
++ atomic_inc(&root->sb->s_active);
++ }
++
++ out_unlock:
++ mutex_unlock(&container_mutex);
++ return ret;
++}
++
++static struct file_system_type container_fs_type = {
++ .name = "container",
++ .get_sb = container_get_sb,
++ .kill_sb = kill_litter_super,
++};
++
++static inline struct container *__d_cont(struct dentry *dentry)
++{
++ return dentry->d_fsdata;
++}
++
++static inline struct cftype *__d_cft(struct dentry *dentry)
++{
++ return dentry->d_fsdata;
++}
++
++/*
++ * Called with container_mutex held. Writes path of container into buf.
++ * Returns 0 on success, -errno on error.
++ */
++int container_path(const struct container *cont, char *buf, int buflen)
++{
++ char *start;
++
++ start = buf + buflen;
++
++ *--start = '\0';
++ for (;;) {
++ int len = cont->dentry->d_name.len;
++ if ((start -= len) < buf)
++ return -ENAMETOOLONG;
++ memcpy(start, cont->dentry->d_name.name, len);
++ cont = cont->parent;
++ if (!cont)
++ break;
++ if (!cont->parent)
++ continue;
++ if (--start < buf)
++ return -ENAMETOOLONG;
++ *start = '/';
++ }
++ memmove(buf, start, buf + buflen - start);
++ return 0;
++}
++
++static void get_first_subsys(const struct container *cont,
++ struct container_subsys_state **css, int *subsys_id)
++{
++ const struct containerfs_root *root = cont->root;
++ const struct container_subsys *test_ss;
++ BUG_ON(list_empty(&root->subsys_list));
++ test_ss = list_entry(root->subsys_list.next,
++ struct container_subsys, sibling);
++ if (css) {
++ *css = cont->subsys[test_ss->subsys_id];
++ BUG_ON(!*css);
++ }
++ if (subsys_id)
++ *subsys_id = test_ss->subsys_id;
++}
++
++/*
++ * Attach task 'tsk' to container 'cont'
++ *
++ * Call holding container_mutex. May take task_lock of
++ * the task 'pid' during call.
++ */
++static int attach_task(struct container *cont, struct task_struct *tsk)
++{
++ int retval = 0;
++ struct container_subsys *ss;
++ struct container *oldcont;
++ struct css_group *cg = tsk->containers;
++ struct css_group *newcg;
++ struct containerfs_root *root = cont->root;
++ int subsys_id;
++
++ get_first_subsys(cont, NULL, &subsys_id);
++
++ /* Nothing to do if the task is already in that container */
++ oldcont = task_container(tsk, subsys_id);
++ if (cont == oldcont)
++ return 0;
++
++ for_each_subsys(root, ss) {
++ if (ss->can_attach) {
++ retval = ss->can_attach(ss, cont, tsk);
++ if (retval) {
++ return retval;
++ }
++ }
++ }
++
++ /* Locate or allocate a new css_group for this task,
++ * based on its final set of containers */
++ newcg = find_css_group(cg, cont);
++ if (!newcg) {
++ return -ENOMEM;
++ }
++
++ task_lock(tsk);
++ if (tsk->flags & PF_EXITING) {
++ task_unlock(tsk);
++ put_css_group(newcg);
++ return -ESRCH;
++ }
++ rcu_assign_pointer(tsk->containers, newcg);
++ if (!list_empty(&tsk->cg_list)) {
++ write_lock(&css_group_lock);
++ list_del(&tsk->cg_list);
++ list_add(&tsk->cg_list, &newcg->tasks);
++ write_unlock(&css_group_lock);
++ }
++ task_unlock(tsk);
++
++ for_each_subsys(root, ss) {
++ if (ss->attach) {
++ ss->attach(ss, cont, oldcont, tsk);
++ }
++ }
++ set_bit(CONT_RELEASABLE, &oldcont->flags);
++ synchronize_rcu();
++ put_css_group(cg);
++ return 0;
++}
++
++/*
++ * Attach task with pid 'pid' to container 'cont'. Call with
++ * container_mutex, may take task_lock of task
++ */
++static int attach_task_by_pid(struct container *cont, char *pidbuf)
++{
++ pid_t pid;
++ struct task_struct *tsk;
++ int ret;
++
++ if (sscanf(pidbuf, "%d", &pid) != 1)
++ return -EIO;
++
++ if (pid) {
++ rcu_read_lock();
++ tsk = find_task_by_pid(pid);
++ if (!tsk || tsk->flags & PF_EXITING) {
++ rcu_read_unlock();
++ return -ESRCH;
++ }
++ get_task_struct(tsk);
++ rcu_read_unlock();
++
++ if ((current->euid) && (current->euid != tsk->uid)
++ && (current->euid != tsk->suid)) {
++ put_task_struct(tsk);
++ return -EACCES;
++ }
++ } else {
++ tsk = current;
++ get_task_struct(tsk);
++ }
++
++ ret = attach_task(cont, tsk);
++ put_task_struct(tsk);
++ return ret;
++}
++
++/* The various types of files and directories in a container file system */
++
++enum container_filetype {
++ FILE_ROOT,
++ FILE_DIR,
++ FILE_TASKLIST,
++ FILE_NOTIFY_ON_RELEASE,
++ FILE_RELEASABLE,
++ FILE_RELEASE_AGENT,
++};
++
++static ssize_t container_common_file_write(struct container *cont,
++ struct cftype *cft,
++ struct file *file,
++ const char __user *userbuf,
++ size_t nbytes, loff_t *unused_ppos)
++{
++ enum container_filetype type = cft->private;
++ char *buffer;
++ int retval = 0;
++
++ if (nbytes >= PATH_MAX)
++ return -E2BIG;
++
++ /* +1 for nul-terminator */
++ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
++ if (buffer == NULL)
++ return -ENOMEM;
++
++ if (copy_from_user(buffer, userbuf, nbytes)) {
++ retval = -EFAULT;
++ goto out1;
++ }
++ buffer[nbytes] = 0; /* nul-terminate */
++
++ mutex_lock(&container_mutex);
++
++ if (container_is_removed(cont)) {
++ retval = -ENODEV;
++ goto out2;
++ }
++
++ switch (type) {
++ case FILE_TASKLIST:
++ retval = attach_task_by_pid(cont, buffer);
++ break;
++ case FILE_NOTIFY_ON_RELEASE:
++ clear_bit(CONT_RELEASABLE, &cont->flags);
++ if (simple_strtoul(buffer, NULL, 10) != 0)
++ set_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++ else
++ clear_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++ break;
++ case FILE_RELEASE_AGENT:
++ {
++ struct containerfs_root *root = cont->root;
++ if (nbytes < sizeof(root->release_agent_path)) {
++ /* We never write anything other than '\0'
++ * into the last char of release_agent_path,
++ * so it always remains a NUL-terminated
++ * string */
++ strncpy(root->release_agent_path, buffer, nbytes);
++ root->release_agent_path[nbytes] = 0;
++ } else {
++ retval = -ENOSPC;
++ }
++ break;
++ }
++ default:
++ retval = -EINVAL;
++ goto out2;
++ }
++
++ if (retval == 0)
++ retval = nbytes;
++out2:
++ mutex_unlock(&container_mutex);
++out1:
++ kfree(buffer);
++ return retval;
++}
++
++static ssize_t container_file_write(struct file *file, const char __user *buf,
++ size_t nbytes, loff_t *ppos)
++{
++ struct cftype *cft = __d_cft(file->f_dentry);
++ struct container *cont = __d_cont(file->f_dentry->d_parent);
++
++ if (!cft)
++ return -ENODEV;
++ if (!cft->write)
++ return -EINVAL;
++
++ return cft->write(cont, cft, file, buf, nbytes, ppos);
++}
++
++static ssize_t container_read_uint(struct container *cont, struct cftype *cft,
++ struct file *file,
++ char __user *buf, size_t nbytes,
++ loff_t *ppos)
++{
++ char tmp[64];
++ u64 val = cft->read_uint(cont, cft);
++ int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
++
++ return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
++}
++
++static ssize_t container_common_file_read(struct container *cont,
++ struct cftype *cft,
++ struct file *file,
++ char __user *buf,
++ size_t nbytes, loff_t *ppos)
++{
++ enum container_filetype type = cft->private;
++ char *page;
++ ssize_t retval = 0;
++ char *s;
++
++ if (!(page = (char *)__get_free_page(GFP_KERNEL)))
++ return -ENOMEM;
++
++ s = page;
++
++ switch (type) {
++ case FILE_RELEASE_AGENT:
++ {
++ struct containerfs_root *root;
++ size_t n;
++ mutex_lock(&container_mutex);
++ root = cont->root;
++ n = strnlen(root->release_agent_path,
++ sizeof(root->release_agent_path));
++ n = min(n, (size_t) PAGE_SIZE);
++ strncpy(s, root->release_agent_path, n);
++ mutex_unlock(&container_mutex);
++ s += n;
++ break;
++ }
++ default:
++ retval = -EINVAL;
++ goto out;
++ }
++ *s++ = '\n';
++
++ retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
++out:
++ free_page((unsigned long)page);
++ return retval;
++}
++
++static ssize_t container_file_read(struct file *file, char __user *buf,
++ size_t nbytes, loff_t *ppos)
++{
++ struct cftype *cft = __d_cft(file->f_dentry);
++ struct container *cont = __d_cont(file->f_dentry->d_parent);
++
++ if (!cft)
++ return -ENODEV;
++
++ if (cft->read)
++ return cft->read(cont, cft, file, buf, nbytes, ppos);
++ if (cft->read_uint)
++ return container_read_uint(cont, cft, file, buf, nbytes, ppos);
++ return -EINVAL;
++}
++
++static int container_file_open(struct inode *inode, struct file *file)
++{
++ int err;
++ struct cftype *cft;
++
++ err = generic_file_open(inode, file);
++ if (err)
++ return err;
++
++ cft = __d_cft(file->f_dentry);
++ if (!cft)
++ return -ENODEV;
++ if (cft->open)
++ err = cft->open(inode, file);
++ else
++ err = 0;
++
++ return err;
++}
++
++static int container_file_release(struct inode *inode, struct file *file)
++{
++ struct cftype *cft = __d_cft(file->f_dentry);
++ if (cft->release)
++ return cft->release(inode, file);
++ return 0;
++}
++
++/*
++ * container_rename - Only allow simple rename of directories in place.
++ */
++static int container_rename(struct inode *old_dir, struct dentry *old_dentry,
++ struct inode *new_dir, struct dentry *new_dentry)
++{
++ if (!S_ISDIR(old_dentry->d_inode->i_mode))
++ return -ENOTDIR;
++ if (new_dentry->d_inode)
++ return -EEXIST;
++ if (old_dir != new_dir)
++ return -EIO;
++ return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
++}
++
++static struct file_operations container_file_operations = {
++ .read = container_file_read,
++ .write = container_file_write,
++ .llseek = generic_file_llseek,
++ .open = container_file_open,
++ .release = container_file_release,
++};
++
++static struct inode_operations container_dir_inode_operations = {
++ .lookup = simple_lookup,
++ .mkdir = container_mkdir,
++ .rmdir = container_rmdir,
++ .rename = container_rename,
++};
++
++static int container_create_file(struct dentry *dentry, int mode,
++ struct super_block *sb)
++{
++ struct inode *inode;
++
++ if (!dentry)
++ return -ENOENT;
++ if (dentry->d_inode)
++ return -EEXIST;
++
++ inode = container_new_inode(mode, sb);
++ if (!inode)
++ return -ENOMEM;
++
++ if (S_ISDIR(mode)) {
++ inode->i_op = &container_dir_inode_operations;
++ inode->i_fop = &simple_dir_operations;
++
++ /* start off with i_nlink == 2 (for "." entry) */
++ inc_nlink(inode);
++
++ /* start with the directory inode held, so that we can
++ * populate it without racing with another mkdir */
++ mutex_lock(&inode->i_mutex);
++ } else if (S_ISREG(mode)) {
++ inode->i_size = 0;
++ inode->i_fop = &container_file_operations;
++ }
++
++ d_instantiate(dentry, inode);
++ dget(dentry); /* Extra count - pin the dentry in core */
++ return 0;
++}
++
++/*
++ * container_create_dir - create a directory for an object.
++ * cont: the container we create the directory for.
++ * It must have a valid ->parent field
++ * And we are going to fill its ->dentry field.
++ * name: The name to give to the container directory. Will be copied.
++ * mode: mode to set on new directory.
++ */
++static int container_create_dir(struct container *cont, struct dentry *dentry,
++ int mode)
++{
++ struct dentry *parent;
++ int error = 0;
++
++ parent = cont->parent->dentry;
++ if (IS_ERR(dentry))
++ return PTR_ERR(dentry);
++ error = container_create_file(dentry, S_IFDIR | mode, cont->root->sb);
++ if (!error) {
++ dentry->d_fsdata = cont;
++ inc_nlink(parent->d_inode);
++ cont->dentry = dentry;
++ }
++ dput(dentry);
++
++ return error;
++}
++
++int container_add_file(struct container *cont, const struct cftype *cft)
++{
++ struct dentry *dir = cont->dentry;
++ struct dentry *dentry;
++ int error;
++
++ BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
++ dentry = container_get_dentry(dir, cft->name);
++ if (!IS_ERR(dentry)) {
++ error = container_create_file(dentry, 0644 | S_IFREG,
++ cont->root->sb);
++ if (!error)
++ dentry->d_fsdata = (void *)cft;
++ dput(dentry);
++ } else
++ error = PTR_ERR(dentry);
++ return error;
++}
++
++int container_add_files(struct container *cont, const struct cftype cft[],
++ int count)
++{
++ int i, err;
++ for (i = 0; i < count; i++) {
++ err = container_add_file(cont, &cft[i]);
++ if (err)
++ return err;
++ }
++ return 0;
++}
++
++/* Count the number of tasks in a container. */
++
++int container_task_count(const struct container *cont)
++{
++ int count = 0;
++ struct list_head *l;
++
++ read_lock(&css_group_lock);
++ l = cont->css_groups.next;
++ while (l != &cont->css_groups) {
++ struct cg_container_link *link =
++ list_entry(l, struct cg_container_link, cont_link_list);
++ count += atomic_read(&link->cg->ref.refcount);
++ l = l->next;
++ }
++ read_unlock(&css_group_lock);
++ return count;
++}
++
++/* Advance a list_head iterator pointing at a cg_container_link's */
++static void container_advance_iter(struct container *cont,
++ struct container_iter *it)
++{
++ struct list_head *l = it->cg_link;
++ struct cg_container_link *link;
++ struct css_group *cg;
++
++ /* Advance to the next non-empty css_group */
++ do {
++ l = l->next;
++ if (l == &cont->css_groups) {
++ it->cg_link = NULL;
++ return;
++ }
++ link = list_entry(l, struct cg_container_link, cont_link_list);
++ cg = link->cg;
++ } while (list_empty(&cg->tasks));
++ it->cg_link = l;
++ it->task = cg->tasks.next;
++}
++
++void container_iter_start(struct container *cont, struct container_iter *it)
++{
++ read_lock(&css_group_lock);
++ it->cg_link = &cont->css_groups;
++ container_advance_iter(cont, it);
++}
++
++struct task_struct *container_iter_next(struct container *cont,
++ struct container_iter *it)
++{
++ struct task_struct *res;
++ struct list_head *l = it->task;
++
++ /* If the iterator cg is NULL, we have no tasks */
++ if (!it->cg_link)
++ return NULL;
++ res = list_entry(l, struct task_struct, cg_list);
++ /* Advance iterator to find next entry */
++ l = l->next;
++ if (l == &res->containers->tasks) {
++ /* We reached the end of this task list - move on to
++ * the next cg_container_link */
++ container_advance_iter(cont, it);
++ } else {
++ it->task = l;
++ }
++ return res;
++}
++
++void container_iter_end(struct container *cont, struct container_iter *it)
++{
++ read_unlock(&css_group_lock);
++}
++
++/*
++ * Stuff for reading the 'tasks' file.
++ *
++ * Reading this file can return large amounts of data if a container has
++ * *lots* of attached tasks. So it may need several calls to read(),
++ * but we cannot guarantee that the information we produce is correct
++ * unless we produce it entirely atomically.
++ *
++ * Upon tasks file open(), a struct ctr_struct is allocated, that
++ * will have a pointer to an array (also allocated here). The struct
++ * ctr_struct * is stored in file->private_data. Its resources will
++ * be freed by release() when the file is closed. The array is used
++ * to sprintf the PIDs and then used by read().
++ */
++struct ctr_struct {
++ char *buf;
++ int bufsz;
++};
++
++/*
++ * Load into 'pidarray' up to 'npids' of the tasks using container
++ * 'cont'. Return actual number of pids loaded. No need to
++ * task_lock(p) when reading out p->container, since we're in an RCU
++ * read section, so the css_group can't go away, and is
++ * immutable after creation.
++ */
++static int pid_array_load(pid_t *pidarray, int npids, struct container *cont)
++{
++ int n = 0;
++ struct container_iter it;
++ struct task_struct *tsk;
++ container_iter_start(cont, &it);
++ while ((tsk = container_iter_next(cont, &it))) {
++ if (unlikely(n == npids))
++ break;
++ pidarray[n++] = pid_nr(task_pid(tsk));
++ }
++ container_iter_end(cont, &it);
++ return n;
++}
++
++static int cmppid(const void *a, const void *b)
++{
++ return *(pid_t *)a - *(pid_t *)b;
++}
++
++/*
++ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
++ * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
++ * count 'cnt' of how many chars would be written if buf were large enough.
++ */
++static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
++{
++ int cnt = 0;
++ int i;
++
++ for (i = 0; i < npids; i++)
++ cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
++ return cnt;
++}
++
++/*
++ * Handle an open on 'tasks' file. Prepare a buffer listing the
++ * process id's of tasks currently attached to the container being opened.
++ *
++ * Does not require any specific container mutexes, and does not take any.
++ */
++static int container_tasks_open(struct inode *unused, struct file *file)
++{
++ struct container *cont = __d_cont(file->f_dentry->d_parent);
++ struct ctr_struct *ctr;
++ pid_t *pidarray;
++ int npids;
++ char c;
++
++ if (!(file->f_mode & FMODE_READ))
++ return 0;
++
++ ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
++ if (!ctr)
++ goto err0;
++
++ /*
++ * If container gets more users after we read count, we won't have
++ * enough space - tough. This race is indistinguishable to the
++ * caller from the case that the additional container users didn't
++ * show up until sometime later on.
++ */
++ npids = container_task_count(cont);
++ if (npids) {
++ pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
++ if (!pidarray)
++ goto err1;
++
++ npids = pid_array_load(pidarray, npids, cont);
++ sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
++
++ /* Call pid_array_to_buf() twice, first just to get bufsz */
++ ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
++ ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
++ if (!ctr->buf)
++ goto err2;
++ ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
++
++ kfree(pidarray);
++ } else {
++ ctr->buf = 0;
++ ctr->bufsz = 0;
++ }
++ file->private_data = ctr;
++ return 0;
++
++err2:
++ kfree(pidarray);
++err1:
++ kfree(ctr);
++err0:
++ return -ENOMEM;
++}
++
++static ssize_t container_tasks_read(struct container *cont,
++ struct cftype *cft,
++ struct file *file, char __user *buf,
++ size_t nbytes, loff_t *ppos)
++{
++ struct ctr_struct *ctr = file->private_data;
++
++ return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
++}
++
++static int container_tasks_release(struct inode *unused_inode,
++ struct file *file)
++{
++ struct ctr_struct *ctr;
++
++ if (file->f_mode & FMODE_READ) {
++ ctr = file->private_data;
++ kfree(ctr->buf);
++ kfree(ctr);
++ }
++ return 0;
++}
++
++static u64 container_read_notify_on_release(struct container *cont,
++ struct cftype *cft)
++{
++ return notify_on_release(cont);
++}
++
++static u64 container_read_releasable(struct container *cont, struct cftype *cft)
++{
++ return test_bit(CONT_RELEASABLE, &cont->flags);
++}
++
++/*
++ * for the common functions, 'private' gives the type of file
++ */
++static struct cftype files[] = {
++ {
++ .name = "tasks",
++ .open = container_tasks_open,
++ .read = container_tasks_read,
++ .write = container_common_file_write,
++ .release = container_tasks_release,
++ .private = FILE_TASKLIST,
++ },
++
++ {
++ .name = "notify_on_release",
++ .read_uint = container_read_notify_on_release,
++ .write = container_common_file_write,
++ .private = FILE_NOTIFY_ON_RELEASE,
++ },
++
++ {
++ .name = "releasable",
++ .read_uint = container_read_releasable,
++ .private = FILE_RELEASABLE,
++ }
++};
++
++static struct cftype cft_release_agent = {
++ .name = "release_agent",
++ .read = container_common_file_read,
++ .write = container_common_file_write,
++ .private = FILE_RELEASE_AGENT,
++};
++
++static int container_populate_dir(struct container *cont)
++{
++ int err;
++ struct container_subsys *ss;
++
++ /* First clear out any existing files */
++ container_clear_directory(cont->dentry);
++
++ err = container_add_files(cont, files, ARRAY_SIZE(files));
++ if (err < 0)
++ return err;
++
++ if (cont == cont->top_container) {
++ if ((err = container_add_file(cont, &cft_release_agent)) < 0)
++ return err;
++ }
++
++ for_each_subsys(cont->root, ss) {
++ if (ss->populate && (err = ss->populate(ss, cont)) < 0)
++ return err;
++ }
++
++ return 0;
++}
++
++static void init_container_css(struct container_subsys *ss,
++ struct container *cont)
++{
++ struct container_subsys_state *css = cont->subsys[ss->subsys_id];
++ css->container = cont;
++ atomic_set(&css->refcnt, 0);
++}
++
++/*
++ * container_create - create a container
++ * parent: container that will be parent of the new container.
++ * name: name of the new container. Will be strcpy'ed.
++ * mode: mode to set on new inode
++ *
++ * Must be called with the mutex on the parent inode held
++ */
++
++static long container_create(struct container *parent, struct dentry *dentry,
++ int mode)
++{
++ struct container *cont;
++ struct containerfs_root *root = parent->root;
++ int err = 0;
++ struct container_subsys *ss;
++ struct super_block *sb = root->sb;
++
++ cont = kzalloc(sizeof(*cont), GFP_KERNEL);
++ if (!cont)
++ return -ENOMEM;
++
++ /* Grab a reference on the superblock so the hierarchy doesn't
++ * get deleted on unmount if there are child containers. This
++ * can be done outside container_mutex, since the sb can't
++ * disappear while someone has an open control file on the
++ * fs */
++ atomic_inc(&sb->s_active);
++
++ mutex_lock(&container_mutex);
++
++ cont->flags = 0;
++ INIT_LIST_HEAD(&cont->sibling);
++ INIT_LIST_HEAD(&cont->children);
++ INIT_LIST_HEAD(&cont->css_groups);
++ INIT_LIST_HEAD(&cont->release_list);
++
++ cont->parent = parent;
++ cont->root = parent->root;
++ cont->top_container = parent->top_container;
++
++ for_each_subsys(root, ss) {
++ err = ss->create(ss, cont);
++ if (err)
++ goto err_destroy;
++ init_container_css(ss, cont);
++ }
++
++ list_add(&cont->sibling, &cont->parent->children);
++ root->number_of_containers++;
++
++ err = container_create_dir(cont, dentry, mode);
++ if (err < 0)
++ goto err_remove;
++
++ /* The container directory was pre-locked for us */
++ BUG_ON(!mutex_is_locked(&cont->dentry->d_inode->i_mutex));
++
++ err = container_populate_dir(cont);
++ /* If err < 0, we have a half-filled directory - oh well ;) */
++
++ mutex_unlock(&container_mutex);
++ mutex_unlock(&cont->dentry->d_inode->i_mutex);
++
++ return 0;
++
++ err_remove:
++
++ list_del(&cont->sibling);
++ root->number_of_containers--;
++
++ err_destroy:
++
++ for_each_subsys(root, ss) {
++ if (cont->subsys[ss->subsys_id])
++ ss->destroy(ss, cont);
++ }
++
++ mutex_unlock(&container_mutex);
++
++ /* Release the reference count that we took on the superblock */
++ deactivate_super(sb);
++
++ kfree(cont);
++ return err;
++}
++
++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++{
++ struct container *c_parent = dentry->d_parent->d_fsdata;
++
++ /* the vfs holds inode->i_mutex already */
++ return container_create(c_parent, dentry, mode | S_IFDIR);
++}
++
++static inline int container_has_css_refs(struct container *cont)
++{
++ /* Check the reference count on each subsystem. Since we
++ * already established that there are no tasks in the
++ * container, if the css refcount is also 0, then there should
++ * be no outstanding references, so the subsystem is safe to
++ * destroy */
++ struct container_subsys *ss;
++ for_each_subsys(cont->root, ss) {
++ struct container_subsys_state *css;
++ css = cont->subsys[ss->subsys_id];
++ if (atomic_read(&css->refcnt)) {
++ return 1;
++ }
++ }
++ return 0;
++}
++
++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry)
++{
++ struct container *cont = dentry->d_fsdata;
++ struct dentry *d;
++ struct container *parent;
++ struct container_subsys *ss;
++ struct super_block *sb;
++ struct containerfs_root *root;
++
++ /* the vfs holds both inode->i_mutex already */
++
++ mutex_lock(&container_mutex);
++ if (atomic_read(&cont->count) != 0) {
++ mutex_unlock(&container_mutex);
++ return -EBUSY;
++ }
++ if (!list_empty(&cont->children)) {
++ mutex_unlock(&container_mutex);
++ return -EBUSY;
++ }
++
++ parent = cont->parent;
++ root = cont->root;
++ sb = root->sb;
++
++ if (container_has_css_refs(cont)) {
++ mutex_unlock(&container_mutex);
++ return -EBUSY;
++ }
++
++ for_each_subsys(root, ss) {
++ if (cont->subsys[ss->subsys_id])
++ ss->destroy(ss, cont);
++ }
++
++ set_bit(CONT_REMOVED, &cont->flags);
++ /* delete my sibling from parent->children */
++ list_del(&cont->sibling);
++ spin_lock(&cont->dentry->d_lock);
++ d = dget(cont->dentry);
++ cont->dentry = NULL;
++ spin_unlock(&d->d_lock);
++
++ container_d_remove_dir(d);
++ dput(d);
++ root->number_of_containers--;
++
++ if (!list_empty(&cont->release_list))
++ list_del(&cont->release_list);
++ set_bit(CONT_RELEASABLE, &parent->flags);
++ check_for_release(parent);
++
++ mutex_unlock(&container_mutex);
++ /* Drop the active superblock reference that we took when we
++ * created the container */
++ deactivate_super(sb);
++ return 0;
++}
++
++static void container_init_subsys(struct container_subsys *ss)
++{
++ int retval;
++ struct container_subsys_state *css;
++ struct list_head *l;
++ printk(KERN_ERR "Initializing container subsys %s\n", ss->name);
++
++ /* Create the top container state for this subsystem */
++ ss->root = &rootnode;
++ retval = ss->create(ss, dummytop);
++ BUG_ON(retval);
++ BUG_ON(!dummytop->subsys[ss->subsys_id]);
++ init_container_css(ss, dummytop);
++ css = dummytop->subsys[ss->subsys_id];
++
++ /* Update all container groups to contain a subsys
++ * pointer to this state - since the subsystem is
++ * newly registered, all tasks and hence all container
++ * groups are in the subsystem's top container. */
++ write_lock(&css_group_lock);
++ l = &init_css_group.list;
++ do {
++ struct css_group *cg =
++ list_entry(l, struct css_group, list);
++ cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
++ l = l->next;
++ } while (l != &init_css_group.list);
++ write_unlock(&css_group_lock);
++
++ /* If this subsystem requested that it be notified with fork
++ * events, we should send it one now for every process in the
++ * system */
++ if (ss->fork) {
++ struct task_struct *g, *p;
++
++ read_lock(&tasklist_lock);
++ do_each_thread(g, p) {
++ ss->fork(ss, p);
++ } while_each_thread(g, p);
++ read_unlock(&tasklist_lock);
++ }
++
++ need_forkexit_callback |= ss->fork || ss->exit;
++
++ ss->active = 1;
++}
++
++/**
++ * container_init_early - initialize containers at system boot, and
++ * initialize any subsystems that request early init.
++ */
++int __init container_init_early(void)
++{
++ int i;
++ kref_init(&init_css_group.ref);
++ kref_get(&init_css_group.ref);
++ INIT_LIST_HEAD(&init_css_group.list);
++ INIT_LIST_HEAD(&init_css_group.cg_links);
++ INIT_LIST_HEAD(&init_css_group.tasks);
++ css_group_count = 1;
++ init_container_root(&rootnode);
++ init_task.containers = &init_css_group;
++
++ init_css_group_link.cg = &init_css_group;
++ list_add(&init_css_group_link.cont_link_list,
++ &rootnode.top_container.css_groups);
++ list_add(&init_css_group_link.cg_link_list,
++ &init_css_group.cg_links);
++
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++
++ BUG_ON(!ss->name);
++ BUG_ON(strlen(ss->name) > MAX_CONTAINER_TYPE_NAMELEN);
++ BUG_ON(!ss->create);
++ BUG_ON(!ss->destroy);
++ if (ss->subsys_id != i) {
++ printk(KERN_ERR "Subsys %s id == %d\n",
++ ss->name, ss->subsys_id);
++ BUG();
++ }
++
++ if (ss->early_init)
++ container_init_subsys(ss);
++ }
++ return 0;
++}
++
++/**
++ * container_init - register container filesystem and /proc file, and
++ * initialize any subsystems that didn't request early init.
++ */
++int __init container_init(void)
++{
++ int err;
++ int i;
++ struct proc_dir_entry *entry;
++
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++ if (!ss->early_init)
++ container_init_subsys(ss);
++ }
++
++ err = register_filesystem(&container_fs_type);
++ if (err < 0)
++ goto out;
++
++ entry = create_proc_entry("containers", 0, NULL);
++ if (entry)
++ entry->proc_fops = &proc_containerstats_operations;
++
++out:
++ return err;
++}
++
++/*
++ * proc_container_show()
++ * - Print task's container paths into seq_file, one line for each hierarchy
++ * - Used for /proc/<pid>/container.
++ * - No need to task_lock(tsk) on this tsk->container reference, as it
++ * doesn't really matter if tsk->container changes after we read it,
++ * and we take container_mutex, keeping attach_task() from changing it
++ * anyway. No need to check that tsk->container != NULL, thanks to
++ * the_top_container_hack in container_exit(), which sets an exiting tasks
++ * container to top_container.
++ */
++
++/* TODO: Use a proper seq_file iterator */
++static int proc_container_show(struct seq_file *m, void *v)
++{
++ struct pid *pid;
++ struct task_struct *tsk;
++ char *buf;
++ int retval;
++ struct containerfs_root *root;
++
++ retval = -ENOMEM;
++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!buf)
++ goto out;
++
++ retval = -ESRCH;
++ pid = m->private;
++ tsk = get_pid_task(pid, PIDTYPE_PID);
++ if (!tsk)
++ goto out_free;
++
++ retval = 0;
++
++ mutex_lock(&container_mutex);
++
++ for_each_root(root) {
++ struct container_subsys *ss;
++ struct container *cont;
++ int subsys_id;
++ int count = 0;
++
++ /* Skip this hierarchy if it has no active subsystems */
++ if (!root->subsys_bits)
++ continue;
++ for_each_subsys(root, ss)
++ seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
++ seq_putc(m, ':');
++ get_first_subsys(&root->top_container, NULL, &subsys_id);
++ cont = task_container(tsk, subsys_id);
++ retval = container_path(cont, buf, PAGE_SIZE);
++ if (retval < 0)
++ goto out_unlock;
++ seq_puts(m, buf);
++ seq_putc(m, '\n');
++ }
++
++out_unlock:
++ mutex_unlock(&container_mutex);
++ put_task_struct(tsk);
++out_free:
++ kfree(buf);
++out:
++ return retval;
++}
++
++static int container_open(struct inode *inode, struct file *file)
++{
++ struct pid *pid = PROC_I(inode)->pid;
++ return single_open(file, proc_container_show, pid);
++}
++
++struct file_operations proc_container_operations = {
++ .open = container_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
++/* Display information about each subsystem and each hierarchy */
++static int proc_containerstats_show(struct seq_file *m, void *v)
++{
++ int i;
++ struct containerfs_root *root;
++
++ mutex_lock(&container_mutex);
++ seq_puts(m, "Hierarchies:\n");
++ for_each_root(root) {
++ struct container_subsys *ss;
++ int first = 1;
++ seq_printf(m, "%p: bits=%lx containers=%d (", root,
++ root->subsys_bits, root->number_of_containers);
++ for_each_subsys(root, ss) {
++ seq_printf(m, "%s%s", first ? "" : ", ", ss->name);
++ first = false;
++ }
++ seq_putc(m, ')');
++ if (root->sb) {
++ seq_printf(m, " s_active=%d",
++ atomic_read(&root->sb->s_active));
++ }
++ seq_putc(m, '\n');
++ }
++ seq_puts(m, "Subsystems:\n");
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++ seq_printf(m, "%d: name=%s hierarchy=%p\n",
++ i, ss->name, ss->root);
++ }
++ seq_printf(m, "Container groups: %d\n", css_group_count);
++ mutex_unlock(&container_mutex);
++ return 0;
++}
++
++static int containerstats_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, proc_containerstats_show, 0);
++}
++
++static struct file_operations proc_containerstats_operations = {
++ .open = containerstats_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
++/**
++ * container_fork - attach newly forked task to its parents container.
++ * @tsk: pointer to task_struct of forking parent process.
++ *
++ * Description: A task inherits its parent's container at fork().
++ *
++ * A pointer to the shared css_group was automatically copied in
++ * fork.c by dup_task_struct(). However, we ignore that copy, since
++ * it was not made under the protection of RCU or container_mutex, so
++ * might no longer be a valid container pointer. attach_task() might
++ * have already changed current->containers, allowing the previously
++ * referenced container group to be removed and freed.
++ *
++ * At the point that container_fork() is called, 'current' is the parent
++ * task, and the passed argument 'child' points to the child task.
++ */
++void container_fork(struct task_struct *child)
++{
++ write_lock(&css_group_lock);
++ child->containers = current->containers;
++ get_css_group(child->containers);
++ list_add(&child->cg_list, &child->containers->tasks);
++ write_unlock(&css_group_lock);
++}
++
++/**
++ * container_fork_callbacks - called on a new task very soon before
++ * adding it to the tasklist. No need to take any locks since no-one
++ * can be operating on this task
++ */
++void container_fork_callbacks(struct task_struct *child)
++{
++ if (need_forkexit_callback) {
++ int i;
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++ if (ss->fork)
++ ss->fork(ss, child);
++ }
++ }
++}
++
++/**
++ * container_exit - detach container from exiting task
++ * @tsk: pointer to task_struct of exiting process
++ *
++ * Description: Detach container from @tsk and release it.
++ *
++ * Note that containers marked notify_on_release force every task in
++ * them to take the global container_mutex mutex when exiting.
++ * This could impact scaling on very large systems. Be reluctant to
++ * use notify_on_release containers where very high task exit scaling
++ * is required on large systems.
++ *
++ * the_top_container_hack:
++ *
++ * Set the exiting tasks container to the root container (top_container).
++ *
++ * We call container_exit() while the task is still competent to
++ * handle notify_on_release(), then leave the task attached to the
++ * root container in each hierarchy for the remainder of its exit.
++ *
++ * To do this properly, we would increment the reference count on
++ * top_container, and near the very end of the kernel/exit.c do_exit()
++ * code we would add a second container function call, to drop that
++ * reference. This would just create an unnecessary hot spot on
++ * the top_container reference count, to no avail.
++ *
++ * Normally, holding a reference to a container without bumping its
++ * count is unsafe. The container could go away, or someone could
++ * attach us to a different container, decrementing the count on
++ * the first container that we never incremented. But in this case,
++ * top_container isn't going away, and either task has PF_EXITING set,
++ * which wards off any attach_task() attempts, or task is a failed
++ * fork, never visible to attach_task.
++ *
++ */
++void container_exit(struct task_struct *tsk, int run_callbacks)
++{
++ int i;
++ struct css_group *cg = NULL;
++
++ if (run_callbacks && need_forkexit_callback) {
++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++ struct container_subsys *ss = subsys[i];
++ if (ss->exit)
++ ss->exit(ss, tsk);
++ }
++ }
++
++ /* Reassign the task to the init_css_group. */
++ task_lock(tsk);
++ write_lock(&css_group_lock);
++ list_del(&tsk->cg_list);
++ write_unlock(&css_group_lock);
++
++ cg = tsk->containers;
++ tsk->containers = &init_css_group;
++ task_unlock(tsk);
++ if (cg)
++ put_css_group_taskexit(cg);
++}
++
++/**
++ * container_clone - duplicate the current container in the hierarchy
++ * that the given subsystem is attached to, and move this task into
++ * the new child
++ */
++int container_clone(struct task_struct *tsk, struct container_subsys *subsys)
++{
++ struct dentry *dentry;
++ int ret = 0;
++ char nodename[MAX_CONTAINER_TYPE_NAMELEN];
++ struct container *parent, *child;
++ struct inode *inode;
++ struct css_group *cg;
++ struct containerfs_root *root;
++ struct container_subsys *ss;
++
++ /* We shouldn't be called by an unregistered subsystem */
++ BUG_ON(!subsys->active);
++
++ /* First figure out what hierarchy and container we're dealing
++ * with, and pin them so we can drop container_mutex */
++ mutex_lock(&container_mutex);
++ again:
++ root = subsys->root;
++ if (root == &rootnode) {
++ printk(KERN_INFO
++ "Not cloning container for unused subsystem %s\n",
++ subsys->name);
++ mutex_unlock(&container_mutex);
++ return 0;
++ }
++ cg = tsk->containers;
++ parent = task_container(tsk, subsys->subsys_id);
++
++ snprintf(nodename, MAX_CONTAINER_TYPE_NAMELEN, "node_%d", tsk->pid);
++
++ /* Pin the hierarchy */
++ atomic_inc(&parent->root->sb->s_active);
++
++ /* Keep the container alive */
++ get_css_group(cg);
++ mutex_unlock(&container_mutex);
++
++ /* Now do the VFS work to create a container */
++ inode = parent->dentry->d_inode;
++
++ /* Hold the parent directory mutex across this operation to
++ * stop anyone else deleting the new container */
++ mutex_lock(&inode->i_mutex);
++ dentry = container_get_dentry(parent->dentry, nodename);
++ if (IS_ERR(dentry)) {
++ printk(KERN_INFO
++ "Couldn't allocate dentry for %s: %ld\n", nodename,
++ PTR_ERR(dentry));
++ ret = PTR_ERR(dentry);
++ goto out_release;
++ }
++
++ /* Create the container directory, which also creates the container */
++ ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755, NULL);
++ child = __d_cont(dentry);
++ dput(dentry);
++ if (ret) {
++ printk(KERN_INFO
++ "Failed to create container %s: %d\n", nodename,
++ ret);
++ goto out_release;
++ }
++
++ if (!child) {
++ printk(KERN_INFO
++ "Couldn't find new container %s\n", nodename);
++ ret = -ENOMEM;
++ goto out_release;
++ }
++
++ /* The container now exists. Retake container_mutex and check
++ * that we're still in the same state that we thought we
++ * were. */
++ mutex_lock(&container_mutex);
++ if ((root != subsys->root) ||
++ (parent != task_container(tsk, subsys->subsys_id))) {
++ /* Aargh, we raced ... */
++ mutex_unlock(&inode->i_mutex);
++ put_css_group(cg);
++
++ deactivate_super(parent->root->sb);
++ /* The container is still accessible in the VFS, but
++ * we're not going to try to rmdir() it at this
++ * point. */
++ printk(KERN_INFO
++ "Race in container_clone() - leaking container %s\n",
++ nodename);
++ goto again;
++ }
++
++ /* do any required auto-setup */
++ for_each_subsys(root, ss) {
++ if (ss->post_clone)
++ ss->post_clone(ss, child);
++ }
++
++ /* All seems fine. Finish by moving the task into the new container */
++ ret = attach_task(child, tsk);
++ mutex_unlock(&container_mutex);
++
++ out_release:
++ mutex_unlock(&inode->i_mutex);
++
++ mutex_lock(&container_mutex);
++ put_css_group(cg);
++ mutex_unlock(&container_mutex);
++ deactivate_super(parent->root->sb);
++ return ret;
++}
++
++/* See if "cont" is a descendant of the current task's container in
++ * the appropriate hierarchy */
++
++int container_is_descendant(const struct container *cont)
++{
++ int ret;
++ struct container *target;
++ int subsys_id;
++
++ if (cont == dummytop)
++ return 1;
++ get_first_subsys(cont, NULL, &subsys_id);
++ target = task_container(current, subsys_id);
++ while (cont != target && cont!= cont->top_container) {
++ cont = cont->parent;
++ }
++ ret = (cont == target);
++ return ret;
++}
++
++static void check_for_release(struct container *cont)
++{
++ BUG_ON(!mutex_is_locked(&container_mutex));
++ if (container_is_releasable(cont) && !atomic_read(&cont->count)
++ && list_empty(&cont->children) && !container_has_css_refs(cont)) {
++ /* Container is currently removeable. If it's not
++ * already queued for a userspace notification, queue
++ * it now */
++ if (list_empty(&cont->release_list)) {
++ list_add(&cont->release_list, &release_list);
++ schedule_work(&release_agent_work);
++ }
++ }
++}
++
++void css_put(struct container_subsys_state *css)
++{
++ struct container *cont = css->container;
++ if (notify_on_release(cont)) {
++ mutex_lock(&container_mutex);
++ set_bit(CONT_RELEASABLE, &cont->flags);
++ if (atomic_dec_and_test(&css->refcnt)) {
++ check_for_release(cont);
++ }
++ mutex_unlock(&container_mutex);
++ } else {
++ atomic_dec(&css->refcnt);
++ }
++}
++
++void container_set_release_agent_path(struct container_subsys *ss,
++ const char *path)
++{
++ mutex_lock(&container_mutex);
++ strcpy(ss->root->release_agent_path, path);
++ mutex_unlock(&container_mutex);
++}
++
++/*
++ * Notify userspace when a container is released, by running the
++ * configured release agent with the name of the container (path
++ * relative to the root of container file system) as the argument.
++ *
++ * Most likely, this user command will try to rmdir this container.
++ *
++ * This races with the possibility that some other task will be
++ * attached to this container before it is removed, or that some other
++ * user task will 'mkdir' a child container of this container. That's ok.
++ * The presumed 'rmdir' will fail quietly if this container is no longer
++ * unused, and this container will be reprieved from its death sentence,
++ * to continue to serve a useful existence. Next time it's released,
++ * we will get notified again, if it still has 'notify_on_release' set.
++ *
++ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
++ * means only wait until the task is successfully execve()'d. The
++ * separate release agent task is forked by call_usermodehelper(),
++ * then control in this thread returns here, without waiting for the
++ * release agent task. We don't bother to wait because the caller of
++ * this routine has no use for the exit status of the release agent
++ * task, so no sense holding our caller up for that.
++ *
++ */
++
++static void container_release_agent(struct work_struct *work)
++{
++ BUG_ON(work != &release_agent_work);
++ mutex_lock(&container_mutex);
++ while (!list_empty(&release_list)) {
++ char *argv[3], *envp[3];
++ int i;
++ char *pathbuf;
++ struct container *cont = list_entry(release_list.next,
++ struct container,
++ release_list);
++ list_del_init(&cont->release_list);
++
++ pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (!pathbuf)
++ continue;
++
++ if (container_path(cont, pathbuf, PAGE_SIZE) < 0) {
++ kfree(pathbuf);
++ continue;
++ }
++
++ i = 0;
++ argv[i++] = cont->root->release_agent_path;
++ argv[i++] = (char *)pathbuf;
++ argv[i] = NULL;
++
++ i = 0;
++ /* minimal command environment */
++ envp[i++] = "HOME=/";
++ envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
++ envp[i] = NULL;
++
++ /* Drop the lock while we invoke the usermode helper,
++ * since the exec could involve hitting disk and hence
++ * be a slow process */
++ mutex_unlock(&container_mutex);
++ call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
++ kfree(pathbuf);
++ mutex_lock(&container_mutex);
++ }
++ mutex_unlock(&container_mutex);
++}
+diff -Nurb linux-2.6.22-570/kernel/container_debug.c linux-2.6.22-591/kernel/container_debug.c
+--- linux-2.6.22-570/kernel/container_debug.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/container_debug.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,89 @@
++/*
++ * kernel/ccontainer_debug.c - Example container subsystem that
++ * exposes debug info
++ *
++ * Copyright (C) Google Inc, 2007
++ *
++ * Developed by Paul Menage (menage@google.com)
++ *
++ */
++
++#include <linux/container.h>
++#include <linux/fs.h>
++
++static int debug_create(struct container_subsys *ss, struct container *cont)
++{
++ struct container_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
++ if (!css)
++ return -ENOMEM;
++ cont->subsys[debug_subsys_id] = css;
++ return 0;
++}
++
++static void debug_destroy(struct container_subsys *ss, struct container *cont)
++{
++ kfree(cont->subsys[debug_subsys_id]);
++}
++
++static u64 container_refcount_read(struct container *cont, struct cftype *cft)
++{
++ return atomic_read(&cont->count);
++}
++
++static u64 taskcount_read(struct container *cont, struct cftype *cft)
++{
++ u64 count;
++ container_lock();
++ count = container_task_count(cont);
++ container_unlock();
++ return count;
++}
++
++static u64 current_css_group_read(struct container *cont, struct cftype *cft)
++{
++ return (u64) current->containers;
++}
++
++static u64 current_css_group_refcount_read(struct container *cont,
++ struct cftype *cft)
++{
++ u64 count;
++ rcu_read_lock();
++ count = atomic_read(¤t->containers->ref.refcount);
++ rcu_read_unlock();
++ return count;
++}
++
++static struct cftype files[] = {
++ {
++ .name = "debug.container_refcount",
++ .read_uint = container_refcount_read,
++ },
++ {
++ .name = "debug.taskcount",
++ .read_uint = taskcount_read,
++ },
++
++ {
++ .name = "debug.current_css_group",
++ .read_uint = current_css_group_read,
++ },
++
++ {
++ .name = "debug.current_css_group_refcount",
++ .read_uint = current_css_group_refcount_read,
++ },
++};
++
++static int debug_populate(struct container_subsys *ss, struct container *cont)
++{
++ return container_add_files(cont, files, ARRAY_SIZE(files));
++}
++
++struct container_subsys debug_subsys = {
++ .name = "debug",
++ .create = debug_create,
++ .destroy = debug_destroy,
++ .populate = debug_populate,
++ .subsys_id = debug_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/cpu_acct.c linux-2.6.22-591/kernel/cpu_acct.c
+--- linux-2.6.22-570/kernel/cpu_acct.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/cpu_acct.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,185 @@
++/*
++ * kernel/cpu_acct.c - CPU accounting container subsystem
++ *
++ * Copyright (C) Google Inc, 2006
++ *
++ * Developed by Paul Menage (menage@google.com) and Balbir Singh
++ * (balbir@in.ibm.com)
++ *
++ */
++
++/*
++ * Example container subsystem for reporting total CPU usage of tasks in a
++ * container, along with percentage load over a time interval
++ */
++
++#include <linux/module.h>
++#include <linux/container.h>
++#include <linux/fs.h>
++#include <asm/div64.h>
++
++struct cpuacct {
++ struct container_subsys_state css;
++ spinlock_t lock;
++ /* total time used by this class */
++ cputime64_t time;
++
++ /* time when next load calculation occurs */
++ u64 next_interval_check;
++
++ /* time used in current period */
++ cputime64_t current_interval_time;
++
++ /* time used in last period */
++ cputime64_t last_interval_time;
++};
++
++struct container_subsys cpuacct_subsys;
++
++static inline struct cpuacct *container_ca(struct container *cont)
++{
++ return container_of(container_subsys_state(cont, cpuacct_subsys_id),
++ struct cpuacct, css);
++}
++
++static inline struct cpuacct *task_ca(struct task_struct *task)
++{
++ return container_of(task_subsys_state(task, cpuacct_subsys_id),
++ struct cpuacct, css);
++}
++
++#define INTERVAL (HZ * 10)
++
++static inline u64 next_interval_boundary(u64 now) {
++ /* calculate the next interval boundary beyond the
++ * current time */
++ do_div(now, INTERVAL);
++ return (now + 1) * INTERVAL;
++}
++
++static int cpuacct_create(struct container_subsys *ss, struct container *cont)
++{
++ struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
++ if (!ca)
++ return -ENOMEM;
++ spin_lock_init(&ca->lock);
++ ca->next_interval_check = next_interval_boundary(get_jiffies_64());
++ cont->subsys[cpuacct_subsys_id] = &ca->css;
++ return 0;
++}
++
++static void cpuacct_destroy(struct container_subsys *ss,
++ struct container *cont)
++{
++ kfree(container_ca(cont));
++}
++
++/* Lazily update the load calculation if necessary. Called with ca locked */
++static void cpuusage_update(struct cpuacct *ca)
++{
++ u64 now = get_jiffies_64();
++ /* If we're not due for an update, return */
++ if (ca->next_interval_check > now)
++ return;
++
++ if (ca->next_interval_check <= (now - INTERVAL)) {
++ /* If it's been more than an interval since the last
++ * check, then catch up - the last interval must have
++ * been zero load */
++ ca->last_interval_time = 0;
++ ca->next_interval_check = next_interval_boundary(now);
++ } else {
++ /* If a steal takes the last interval time negative,
++ * then we just ignore it */
++ if ((s64)ca->current_interval_time > 0) {
++ ca->last_interval_time = ca->current_interval_time;
++ } else {
++ ca->last_interval_time = 0;
++ }
++ ca->next_interval_check += INTERVAL;
++ }
++ ca->current_interval_time = 0;
++}
++
++static u64 cpuusage_read(struct container *cont,
++ struct cftype *cft)
++{
++ struct cpuacct *ca = container_ca(cont);
++ u64 time;
++
++ spin_lock_irq(&ca->lock);
++ cpuusage_update(ca);
++ time = cputime64_to_jiffies64(ca->time);
++ spin_unlock_irq(&ca->lock);
++
++ /* Convert 64-bit jiffies to seconds */
++ time *= 1000;
++ do_div(time, HZ);
++ return time;
++}
++
++static u64 load_read(struct container *cont,
++ struct cftype *cft)
++{
++ struct cpuacct *ca = container_ca(cont);
++ u64 time;
++
++ /* Find the time used in the previous interval */
++ spin_lock_irq(&ca->lock);
++ cpuusage_update(ca);
++ time = cputime64_to_jiffies64(ca->last_interval_time);
++ spin_unlock_irq(&ca->lock);
++
++ /* Convert time to a percentage, to give the load in the
++ * previous period */
++ time *= 100;
++ do_div(time, INTERVAL);
++
++ return time;
++}
++
++static struct cftype files[] = {
++ {
++ .name = "cpuacct.usage",
++ .read_uint = cpuusage_read,
++ },
++ {
++ .name = "cpuacct.load",
++ .read_uint = load_read,
++ }
++};
++
++static int cpuacct_populate(struct container_subsys *ss,
++ struct container *cont)
++{
++ return container_add_files(cont, files, ARRAY_SIZE(files));
++}
++
++void cpuacct_charge(struct task_struct *task, cputime_t cputime)
++{
++
++ struct cpuacct *ca;
++ unsigned long flags;
++
++ if (!cpuacct_subsys.active)
++ return;
++ rcu_read_lock();
++ ca = task_ca(task);
++ if (ca) {
++ spin_lock_irqsave(&ca->lock, flags);
++ cpuusage_update(ca);
++ ca->time = cputime64_add(ca->time, cputime);
++ ca->current_interval_time =
++ cputime64_add(ca->current_interval_time, cputime);
++ spin_unlock_irqrestore(&ca->lock, flags);
++ }
++ rcu_read_unlock();
++}
++
++struct container_subsys cpuacct_subsys = {
++ .name = "cpuacct",
++ .create = cpuacct_create,
++ .destroy = cpuacct_destroy,
++ .populate = cpuacct_populate,
++ .subsys_id = cpuacct_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/cpuset.c linux-2.6.22-591/kernel/cpuset.c
+--- linux-2.6.22-570/kernel/cpuset.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/cpuset.c 2007-12-21 15:36:12.000000000 -0500
+@@ -5,6 +5,7 @@
+ *
+ * Copyright (C) 2003 BULL SA.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ * Copyright (C) 2006 Google, Inc
+ *
+ * Portions derived from Patrick Mochel's sysfs code.
+ * sysfs is Copyright (c) 2001-3 Patrick Mochel
+@@ -12,6 +13,7 @@
+ * 2003-10-10 Written by Simon Derr.
+ * 2003-10-22 Updates by Stephen Hemminger.
+ * 2004 May-July Rework by Paul Jackson.
++ * 2006 Rework by Paul Menage to use generic containers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+@@ -53,8 +55,6 @@
+ #include <asm/atomic.h>
+ #include <linux/mutex.h>
+
+-#define CPUSET_SUPER_MAGIC 0x27e0eb
+-
+ /*
+ * Tracks how many cpusets are currently defined in system.
+ * When there is only one cpuset (the root cpuset) we can
+@@ -62,6 +62,10 @@
+ */
+ int number_of_cpusets __read_mostly;
+
++/* Retrieve the cpuset from a container */
++struct container_subsys cpuset_subsys;
++struct cpuset;
++
+ /* See "Frequency meter" comments, below. */
+
+ struct fmeter {
+@@ -72,24 +76,13 @@
+ };
+
+ struct cpuset {
++ struct container_subsys_state css;
++
+ unsigned long flags; /* "unsigned long" so bitops work */
+ cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
+ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
+
+- /*
+- * Count is atomic so can incr (fork) or decr (exit) without a lock.
+- */
+- atomic_t count; /* count tasks using this cpuset */
+-
+- /*
+- * We link our 'sibling' struct into our parents 'children'.
+- * Our children link their 'sibling' into our 'children'.
+- */
+- struct list_head sibling; /* my parents children */
+- struct list_head children; /* my children */
+-
+ struct cpuset *parent; /* my parent */
+- struct dentry *dentry; /* cpuset fs entry */
+
+ /*
+ * Copy of global cpuset_mems_generation as of the most
+@@ -100,13 +93,32 @@
+ struct fmeter fmeter; /* memory_pressure filter */
+ };
+
++/* Update the cpuset for a container */
++static inline void set_container_cs(struct container *cont, struct cpuset *cs)
++{
++ cont->subsys[cpuset_subsys_id] = &cs->css;
++}
++
++/* Retrieve the cpuset for a container */
++static inline struct cpuset *container_cs(struct container *cont)
++{
++ return container_of(container_subsys_state(cont, cpuset_subsys_id),
++ struct cpuset, css);
++}
++
++/* Retrieve the cpuset for a task */
++static inline struct cpuset *task_cs(struct task_struct *task)
++{
++ return container_of(task_subsys_state(task, cpuset_subsys_id),
++ struct cpuset, css);
++}
++
++
+ /* bits in struct cpuset flags field */
+ typedef enum {
+ CS_CPU_EXCLUSIVE,
+ CS_MEM_EXCLUSIVE,
+ CS_MEMORY_MIGRATE,
+- CS_REMOVED,
+- CS_NOTIFY_ON_RELEASE,
+ CS_SPREAD_PAGE,
+ CS_SPREAD_SLAB,
+ } cpuset_flagbits_t;
+@@ -122,16 +134,6 @@
+ return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
+ }
+
+-static inline int is_removed(const struct cpuset *cs)
+-{
+- return test_bit(CS_REMOVED, &cs->flags);
+-}
+-
+-static inline int notify_on_release(const struct cpuset *cs)
+-{
+- return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+-}
+-
+ static inline int is_memory_migrate(const struct cpuset *cs)
+ {
+ return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+@@ -172,14 +174,8 @@
+ .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
+ .cpus_allowed = CPU_MASK_ALL,
+ .mems_allowed = NODE_MASK_ALL,
+- .count = ATOMIC_INIT(0),
+- .sibling = LIST_HEAD_INIT(top_cpuset.sibling),
+- .children = LIST_HEAD_INIT(top_cpuset.children),
+ };
+
+-static struct vfsmount *cpuset_mount;
+-static struct super_block *cpuset_sb;
+-
+ /*
+ * We have two global cpuset mutexes below. They can nest.
+ * It is ok to first take manage_mutex, then nest callback_mutex. We also
+@@ -263,297 +259,36 @@
+ * the routine cpuset_update_task_memory_state().
+ */
+
+-static DEFINE_MUTEX(manage_mutex);
+ static DEFINE_MUTEX(callback_mutex);
+
+-/*
+- * A couple of forward declarations required, due to cyclic reference loop:
+- * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
+- * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
+- */
+-
+-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
+-
+-static struct backing_dev_info cpuset_backing_dev_info = {
+- .ra_pages = 0, /* No readahead */
+- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+-};
+-
+-static struct inode *cpuset_new_inode(mode_t mode)
+-{
+- struct inode *inode = new_inode(cpuset_sb);
+-
+- if (inode) {
+- inode->i_mode = mode;
+- inode->i_uid = current->fsuid;
+- inode->i_gid = current->fsgid;
+- inode->i_blocks = 0;
+- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+- inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
+- }
+- return inode;
+-}
+-
+-static void cpuset_diput(struct dentry *dentry, struct inode *inode)
+-{
+- /* is dentry a directory ? if so, kfree() associated cpuset */
+- if (S_ISDIR(inode->i_mode)) {
+- struct cpuset *cs = dentry->d_fsdata;
+- BUG_ON(!(is_removed(cs)));
+- kfree(cs);
+- }
+- iput(inode);
+-}
+-
+-static struct dentry_operations cpuset_dops = {
+- .d_iput = cpuset_diput,
+-};
+-
+-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
+-{
+- struct dentry *d = lookup_one_len(name, parent, strlen(name));
+- if (!IS_ERR(d))
+- d->d_op = &cpuset_dops;
+- return d;
+-}
+-
+-static void remove_dir(struct dentry *d)
+-{
+- struct dentry *parent = dget(d->d_parent);
+-
+- d_delete(d);
+- simple_rmdir(parent->d_inode, d);
+- dput(parent);
+-}
+-
+-/*
+- * NOTE : the dentry must have been dget()'ed
+- */
+-static void cpuset_d_remove_dir(struct dentry *dentry)
+-{
+- struct list_head *node;
+-
+- spin_lock(&dcache_lock);
+- node = dentry->d_subdirs.next;
+- while (node != &dentry->d_subdirs) {
+- struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+- list_del_init(node);
+- if (d->d_inode) {
+- d = dget_locked(d);
+- spin_unlock(&dcache_lock);
+- d_delete(d);
+- simple_unlink(dentry->d_inode, d);
+- dput(d);
+- spin_lock(&dcache_lock);
+- }
+- node = dentry->d_subdirs.next;
+- }
+- list_del_init(&dentry->d_u.d_child);
+- spin_unlock(&dcache_lock);
+- remove_dir(dentry);
+-}
+-
+-static struct super_operations cpuset_ops = {
+- .statfs = simple_statfs,
+- .drop_inode = generic_delete_inode,
+-};
+-
+-static int cpuset_fill_super(struct super_block *sb, void *unused_data,
+- int unused_silent)
+-{
+- struct inode *inode;
+- struct dentry *root;
+-
+- sb->s_blocksize = PAGE_CACHE_SIZE;
+- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+- sb->s_magic = CPUSET_SUPER_MAGIC;
+- sb->s_op = &cpuset_ops;
+- cpuset_sb = sb;
+-
+- inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
+- if (inode) {
+- inode->i_op = &simple_dir_inode_operations;
+- inode->i_fop = &simple_dir_operations;
+- /* directories start off with i_nlink == 2 (for "." entry) */
+- inc_nlink(inode);
+- } else {
+- return -ENOMEM;
+- }
+-
+- root = d_alloc_root(inode);
+- if (!root) {
+- iput(inode);
+- return -ENOMEM;
+- }
+- sb->s_root = root;
+- return 0;
+-}
+-
++/* This is ugly, but preserves the userspace API for existing cpuset
++ * users. If someone tries to mount the "cpuset" filesystem, we
++ * silently switch it to mount "container" instead */
+ static int cpuset_get_sb(struct file_system_type *fs_type,
+ int flags, const char *unused_dev_name,
+ void *data, struct vfsmount *mnt)
+ {
+- return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
++ struct file_system_type *container_fs = get_fs_type("container");
++ int ret = -ENODEV;
++ if (container_fs) {
++ ret = container_fs->get_sb(container_fs, flags,
++ unused_dev_name,
++ "cpuset", mnt);
++ put_filesystem(container_fs);
++ if (!ret) {
++ container_set_release_agent_path(
++ &cpuset_subsys,
++ "/sbin/cpuset_release_agent");
++ }
++ }
++ return ret;
+ }
+
+ static struct file_system_type cpuset_fs_type = {
+ .name = "cpuset",
+ .get_sb = cpuset_get_sb,
+- .kill_sb = kill_litter_super,
+-};
+-
+-/* struct cftype:
+- *
+- * The files in the cpuset filesystem mostly have a very simple read/write
+- * handling, some common function will take care of it. Nevertheless some cases
+- * (read tasks) are special and therefore I define this structure for every
+- * kind of file.
+- *
+- *
+- * When reading/writing to a file:
+- * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+- * - the 'cftype' of the file is file->f_path.dentry->d_fsdata
+- */
+-
+-struct cftype {
+- char *name;
+- int private;
+- int (*open) (struct inode *inode, struct file *file);
+- ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
+- loff_t *ppos);
+- int (*write) (struct file *file, const char __user *buf, size_t nbytes,
+- loff_t *ppos);
+- int (*release) (struct inode *inode, struct file *file);
+ };
+
+-static inline struct cpuset *__d_cs(struct dentry *dentry)
+-{
+- return dentry->d_fsdata;
+-}
+-
+-static inline struct cftype *__d_cft(struct dentry *dentry)
+-{
+- return dentry->d_fsdata;
+-}
+-
+-/*
+- * Call with manage_mutex held. Writes path of cpuset into buf.
+- * Returns 0 on success, -errno on error.
+- */
+-
+-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
+-{
+- char *start;
+-
+- start = buf + buflen;
+-
+- *--start = '\0';
+- for (;;) {
+- int len = cs->dentry->d_name.len;
+- if ((start -= len) < buf)
+- return -ENAMETOOLONG;
+- memcpy(start, cs->dentry->d_name.name, len);
+- cs = cs->parent;
+- if (!cs)
+- break;
+- if (!cs->parent)
+- continue;
+- if (--start < buf)
+- return -ENAMETOOLONG;
+- *start = '/';
+- }
+- memmove(buf, start, buf + buflen - start);
+- return 0;
+-}
+-
+-/*
+- * Notify userspace when a cpuset is released, by running
+- * /sbin/cpuset_release_agent with the name of the cpuset (path
+- * relative to the root of cpuset file system) as the argument.
+- *
+- * Most likely, this user command will try to rmdir this cpuset.
+- *
+- * This races with the possibility that some other task will be
+- * attached to this cpuset before it is removed, or that some other
+- * user task will 'mkdir' a child cpuset of this cpuset. That's ok.
+- * The presumed 'rmdir' will fail quietly if this cpuset is no longer
+- * unused, and this cpuset will be reprieved from its death sentence,
+- * to continue to serve a useful existence. Next time it's released,
+- * we will get notified again, if it still has 'notify_on_release' set.
+- *
+- * The final arg to call_usermodehelper() is 0, which means don't
+- * wait. The separate /sbin/cpuset_release_agent task is forked by
+- * call_usermodehelper(), then control in this thread returns here,
+- * without waiting for the release agent task. We don't bother to
+- * wait because the caller of this routine has no use for the exit
+- * status of the /sbin/cpuset_release_agent task, so no sense holding
+- * our caller up for that.
+- *
+- * When we had only one cpuset mutex, we had to call this
+- * without holding it, to avoid deadlock when call_usermodehelper()
+- * allocated memory. With two locks, we could now call this while
+- * holding manage_mutex, but we still don't, so as to minimize
+- * the time manage_mutex is held.
+- */
+-
+-static void cpuset_release_agent(const char *pathbuf)
+-{
+- char *argv[3], *envp[3];
+- int i;
+-
+- if (!pathbuf)
+- return;
+-
+- i = 0;
+- argv[i++] = "/sbin/cpuset_release_agent";
+- argv[i++] = (char *)pathbuf;
+- argv[i] = NULL;
+-
+- i = 0;
+- /* minimal command environment */
+- envp[i++] = "HOME=/";
+- envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+- envp[i] = NULL;
+-
+- call_usermodehelper(argv[0], argv, envp, 0);
+- kfree(pathbuf);
+-}
+-
+-/*
+- * Either cs->count of using tasks transitioned to zero, or the
+- * cs->children list of child cpusets just became empty. If this
+- * cs is notify_on_release() and now both the user count is zero and
+- * the list of children is empty, prepare cpuset path in a kmalloc'd
+- * buffer, to be returned via ppathbuf, so that the caller can invoke
+- * cpuset_release_agent() with it later on, once manage_mutex is dropped.
+- * Call here with manage_mutex held.
+- *
+- * This check_for_release() routine is responsible for kmalloc'ing
+- * pathbuf. The above cpuset_release_agent() is responsible for
+- * kfree'ing pathbuf. The caller of these routines is responsible
+- * for providing a pathbuf pointer, initialized to NULL, then
+- * calling check_for_release() with manage_mutex held and the address
+- * of the pathbuf pointer, then dropping manage_mutex, then calling
+- * cpuset_release_agent() with pathbuf, as set by check_for_release().
+- */
+-
+-static void check_for_release(struct cpuset *cs, char **ppathbuf)
+-{
+- if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
+- list_empty(&cs->children)) {
+- char *buf;
+-
+- buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+- if (!buf)
+- return;
+- if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
+- kfree(buf);
+- else
+- *ppathbuf = buf;
+- }
+-}
+-
+ /*
+ * Return in *pmask the portion of a cpusets's cpus_allowed that
+ * are online. If none are online, walk up the cpuset hierarchy
+@@ -651,20 +386,19 @@
+ struct task_struct *tsk = current;
+ struct cpuset *cs;
+
+- if (tsk->cpuset == &top_cpuset) {
++ if (task_cs(tsk) == &top_cpuset) {
+ /* Don't need rcu for top_cpuset. It's never freed. */
+ my_cpusets_mem_gen = top_cpuset.mems_generation;
+ } else {
+ rcu_read_lock();
+- cs = rcu_dereference(tsk->cpuset);
+- my_cpusets_mem_gen = cs->mems_generation;
++ my_cpusets_mem_gen = task_cs(current)->mems_generation;
+ rcu_read_unlock();
+ }
+
+ if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
+ mutex_lock(&callback_mutex);
+ task_lock(tsk);
+- cs = tsk->cpuset; /* Maybe changed when task not locked */
++ cs = task_cs(tsk); /* Maybe changed when task not locked */
+ guarantee_online_mems(cs, &tsk->mems_allowed);
+ tsk->cpuset_mems_generation = cs->mems_generation;
+ if (is_spread_page(cs))
+@@ -719,11 +453,12 @@
+
+ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
+ {
++ struct container *cont;
+ struct cpuset *c, *par;
+
+ /* Each of our child cpusets must be a subset of us */
+- list_for_each_entry(c, &cur->children, sibling) {
+- if (!is_cpuset_subset(c, trial))
++ list_for_each_entry(cont, &cur->css.container->children, sibling) {
++ if (!is_cpuset_subset(container_cs(cont), trial))
+ return -EBUSY;
+ }
+
+@@ -738,7 +473,8 @@
+ return -EACCES;
+
+ /* If either I or some sibling (!= me) is exclusive, we can't overlap */
+- list_for_each_entry(c, &par->children, sibling) {
++ list_for_each_entry(cont, &par->css.container->children, sibling) {
++ c = container_cs(cont);
+ if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
+ c != cur &&
+ cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+@@ -753,68 +489,13 @@
+ }
+
+ /*
+- * For a given cpuset cur, partition the system as follows
+- * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
+- * exclusive child cpusets
+- * b. All cpus in the current cpuset's cpus_allowed that are not part of any
+- * exclusive child cpusets
+- * Build these two partitions by calling partition_sched_domains
+- *
+- * Call with manage_mutex held. May nest a call to the
+- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+- * Must not be called holding callback_mutex, because we must
+- * not call lock_cpu_hotplug() while holding callback_mutex.
+- */
+-
+-static void update_cpu_domains(struct cpuset *cur)
+-{
+- struct cpuset *c, *par = cur->parent;
+- cpumask_t pspan, cspan;
+-
+- if (par == NULL || cpus_empty(cur->cpus_allowed))
+- return;
+-
+- /*
+- * Get all cpus from parent's cpus_allowed not part of exclusive
+- * children
+- */
+- pspan = par->cpus_allowed;
+- list_for_each_entry(c, &par->children, sibling) {
+- if (is_cpu_exclusive(c))
+- cpus_andnot(pspan, pspan, c->cpus_allowed);
+- }
+- if (!is_cpu_exclusive(cur)) {
+- cpus_or(pspan, pspan, cur->cpus_allowed);
+- if (cpus_equal(pspan, cur->cpus_allowed))
+- return;
+- cspan = CPU_MASK_NONE;
+- } else {
+- if (cpus_empty(pspan))
+- return;
+- cspan = cur->cpus_allowed;
+- /*
+- * Get all cpus from current cpuset's cpus_allowed not part
+- * of exclusive children
+- */
+- list_for_each_entry(c, &cur->children, sibling) {
+- if (is_cpu_exclusive(c))
+- cpus_andnot(cspan, cspan, c->cpus_allowed);
+- }
+- }
+-
+- lock_cpu_hotplug();
+- partition_sched_domains(&pspan, &cspan);
+- unlock_cpu_hotplug();
+-}
+-
+-/*
+ * Call with manage_mutex held. May take callback_mutex during call.
+ */
+
+ static int update_cpumask(struct cpuset *cs, char *buf)
+ {
+ struct cpuset trialcs;
+- int retval, cpus_unchanged;
++ int retval;
+
+ /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+ if (cs == &top_cpuset)
+@@ -836,17 +517,15 @@
+ }
+ cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
+ /* cpus_allowed cannot be empty for a cpuset with attached tasks. */
+- if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
++ if (container_task_count(cs->css.container) &&
++ cpus_empty(trialcs.cpus_allowed))
+ return -ENOSPC;
+ retval = validate_change(cs, &trialcs);
+ if (retval < 0)
+ return retval;
+- cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
+ mutex_lock(&callback_mutex);
+ cs->cpus_allowed = trialcs.cpus_allowed;
+ mutex_unlock(&callback_mutex);
+- if (is_cpu_exclusive(cs) && !cpus_unchanged)
+- update_cpu_domains(cs);
+ return 0;
+ }
+
+@@ -895,7 +574,7 @@
+ do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+
+ mutex_lock(&callback_mutex);
+- guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
++ guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
+ mutex_unlock(&callback_mutex);
+ }
+
+@@ -913,16 +592,19 @@
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+
++static void *cpuset_being_rebound;
++
+ static int update_nodemask(struct cpuset *cs, char *buf)
+ {
+ struct cpuset trialcs;
+ nodemask_t oldmem;
+- struct task_struct *g, *p;
++ struct task_struct *p;
+ struct mm_struct **mmarray;
+ int i, n, ntasks;
+ int migrate;
+ int fudge;
+ int retval;
++ struct container_iter it;
+
+ /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+ if (cs == &top_cpuset)
+@@ -949,7 +631,8 @@
+ goto done;
+ }
+ /* mems_allowed cannot be empty for a cpuset with attached tasks. */
+- if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
++ if (container_task_count(cs->css.container) &&
++ nodes_empty(trialcs.mems_allowed)) {
+ retval = -ENOSPC;
+ goto done;
+ }
+@@ -962,7 +645,7 @@
+ cs->mems_generation = cpuset_mems_generation++;
+ mutex_unlock(&callback_mutex);
+
+- set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */
++ cpuset_being_rebound = cs; /* causes mpol_copy() rebind */
+
+ fudge = 10; /* spare mmarray[] slots */
+ fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+@@ -976,37 +659,37 @@
+ * enough mmarray[] w/o using GFP_ATOMIC.
+ */
+ while (1) {
+- ntasks = atomic_read(&cs->count); /* guess */
++ ntasks = container_task_count(cs->css.container); /* guess */
+ ntasks += fudge;
+ mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
+ if (!mmarray)
+ goto done;
+- write_lock_irq(&tasklist_lock); /* block fork */
+- if (atomic_read(&cs->count) <= ntasks)
++ read_lock(&tasklist_lock); /* block fork */
++ if (container_task_count(cs->css.container) <= ntasks)
+ break; /* got enough */
+- write_unlock_irq(&tasklist_lock); /* try again */
++ read_unlock(&tasklist_lock); /* try again */
+ kfree(mmarray);
+ }
+
+ n = 0;
+
+ /* Load up mmarray[] with mm reference for each task in cpuset. */
+- do_each_thread(g, p) {
++ container_iter_start(cs->css.container, &it);
++ while ((p = container_iter_next(cs->css.container, &it))) {
+ struct mm_struct *mm;
+
+ if (n >= ntasks) {
+ printk(KERN_WARNING
+ "Cpuset mempolicy rebind incomplete.\n");
+- continue;
++ break;
+ }
+- if (p->cpuset != cs)
+- continue;
+ mm = get_task_mm(p);
+ if (!mm)
+ continue;
+ mmarray[n++] = mm;
+- } while_each_thread(g, p);
+- write_unlock_irq(&tasklist_lock);
++ }
++ container_iter_end(cs->css.container, &it);
++ read_unlock(&tasklist_lock);
+
+ /*
+ * Now that we've dropped the tasklist spinlock, we can
+@@ -1033,12 +716,17 @@
+
+ /* We're done rebinding vma's to this cpusets new mems_allowed. */
+ kfree(mmarray);
+- set_cpuset_being_rebound(NULL);
++ cpuset_being_rebound = NULL;
+ retval = 0;
+ done:
+ return retval;
+ }
+
++int current_cpuset_is_being_rebound(void)
++{
++ return task_cs(current) == cpuset_being_rebound;
++}
++
+ /*
+ * Call with manage_mutex held.
+ */
+@@ -1067,7 +755,7 @@
+ {
+ int turning_on;
+ struct cpuset trialcs;
+- int err, cpu_exclusive_changed;
++ int err;
+
+ turning_on = (simple_strtoul(buf, NULL, 10) != 0);
+
+@@ -1080,14 +768,10 @@
+ err = validate_change(cs, &trialcs);
+ if (err < 0)
+ return err;
+- cpu_exclusive_changed =
+- (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
+ mutex_lock(&callback_mutex);
+ cs->flags = trialcs.flags;
+ mutex_unlock(&callback_mutex);
+
+- if (cpu_exclusive_changed)
+- update_cpu_domains(cs);
+ return 0;
+ }
+
+@@ -1189,85 +873,34 @@
+ return val;
+ }
+
+-/*
+- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
+- * writing the path of the old cpuset in 'ppathbuf' if it needs to be
+- * notified on release.
+- *
+- * Call holding manage_mutex. May take callback_mutex and task_lock of
+- * the task 'pid' during call.
+- */
+-
+-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
++int cpuset_can_attach(struct container_subsys *ss,
++ struct container *cont, struct task_struct *tsk)
+ {
+- pid_t pid;
+- struct task_struct *tsk;
+- struct cpuset *oldcs;
+- cpumask_t cpus;
+- nodemask_t from, to;
+- struct mm_struct *mm;
+- int retval;
++ struct cpuset *cs = container_cs(cont);
+
+- if (sscanf(pidbuf, "%d", &pid) != 1)
+- return -EIO;
+ if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+ return -ENOSPC;
+
+- if (pid) {
+- read_lock(&tasklist_lock);
+-
+- tsk = find_task_by_pid(pid);
+- if (!tsk || tsk->flags & PF_EXITING) {
+- read_unlock(&tasklist_lock);
+- return -ESRCH;
+- }
+-
+- get_task_struct(tsk);
+- read_unlock(&tasklist_lock);
+-
+- if ((current->euid) && (current->euid != tsk->uid)
+- && (current->euid != tsk->suid)) {
+- put_task_struct(tsk);
+- return -EACCES;
+- }
+- } else {
+- tsk = current;
+- get_task_struct(tsk);
+- }
++ return security_task_setscheduler(tsk, 0, NULL);
++}
+
+- retval = security_task_setscheduler(tsk, 0, NULL);
+- if (retval) {
+- put_task_struct(tsk);
+- return retval;
+- }
++void cpuset_attach(struct container_subsys *ss,
++ struct container *cont, struct container *oldcont,
++ struct task_struct *tsk)
++{
++ cpumask_t cpus;
++ nodemask_t from, to;
++ struct mm_struct *mm;
++ struct cpuset *cs = container_cs(cont);
++ struct cpuset *oldcs = container_cs(oldcont);
+
+ mutex_lock(&callback_mutex);
+-
+- task_lock(tsk);
+- oldcs = tsk->cpuset;
+- /*
+- * After getting 'oldcs' cpuset ptr, be sure still not exiting.
+- * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
+- * then fail this attach_task(), to avoid breaking top_cpuset.count.
+- */
+- if (tsk->flags & PF_EXITING) {
+- task_unlock(tsk);
+- mutex_unlock(&callback_mutex);
+- put_task_struct(tsk);
+- return -ESRCH;
+- }
+- atomic_inc(&cs->count);
+- rcu_assign_pointer(tsk->cpuset, cs);
+- task_unlock(tsk);
+-
+ guarantee_online_cpus(cs, &cpus);
+ set_cpus_allowed(tsk, cpus);
++ mutex_unlock(&callback_mutex);
+
+ from = oldcs->mems_allowed;
+ to = cs->mems_allowed;
+-
+- mutex_unlock(&callback_mutex);
+-
+ mm = get_task_mm(tsk);
+ if (mm) {
+ mpol_rebind_mm(mm, &to);
+@@ -1276,40 +909,31 @@
+ mmput(mm);
+ }
+
+- put_task_struct(tsk);
+- synchronize_rcu();
+- if (atomic_dec_and_test(&oldcs->count))
+- check_for_release(oldcs, ppathbuf);
+- return 0;
+ }
+
+ /* The various types of files and directories in a cpuset file system */
+
+ typedef enum {
+- FILE_ROOT,
+- FILE_DIR,
+ FILE_MEMORY_MIGRATE,
+ FILE_CPULIST,
+ FILE_MEMLIST,
+ FILE_CPU_EXCLUSIVE,
+ FILE_MEM_EXCLUSIVE,
+- FILE_NOTIFY_ON_RELEASE,
+ FILE_MEMORY_PRESSURE_ENABLED,
+ FILE_MEMORY_PRESSURE,
+ FILE_SPREAD_PAGE,
+ FILE_SPREAD_SLAB,
+- FILE_TASKLIST,
+ } cpuset_filetype_t;
+
+-static ssize_t cpuset_common_file_write(struct file *file,
++static ssize_t cpuset_common_file_write(struct container *cont,
++ struct cftype *cft,
++ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos)
+ {
+- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+- struct cftype *cft = __d_cft(file->f_path.dentry);
++ struct cpuset *cs = container_cs(cont);
+ cpuset_filetype_t type = cft->private;
+ char *buffer;
+- char *pathbuf = NULL;
+ int retval = 0;
+
+ /* Crude upper limit on largest legitimate cpulist user might write. */
+@@ -1326,9 +950,9 @@
+ }
+ buffer[nbytes] = 0; /* nul-terminate */
+
+- mutex_lock(&manage_mutex);
++ container_lock();
+
+- if (is_removed(cs)) {
++ if (container_is_removed(cont)) {
+ retval = -ENODEV;
+ goto out2;
+ }
+@@ -1346,9 +970,6 @@
+ case FILE_MEM_EXCLUSIVE:
+ retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
+ break;
+- case FILE_NOTIFY_ON_RELEASE:
+- retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
+- break;
+ case FILE_MEMORY_MIGRATE:
+ retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
+ break;
+@@ -1366,9 +987,6 @@
+ retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
+ cs->mems_generation = cpuset_mems_generation++;
+ break;
+- case FILE_TASKLIST:
+- retval = attach_task(cs, buffer, &pathbuf);
+- break;
+ default:
+ retval = -EINVAL;
+ goto out2;
+@@ -1377,30 +995,12 @@
+ if (retval == 0)
+ retval = nbytes;
+ out2:
+- mutex_unlock(&manage_mutex);
+- cpuset_release_agent(pathbuf);
++ container_unlock();
+ out1:
+ kfree(buffer);
+ return retval;
+ }
+
+-static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
+- size_t nbytes, loff_t *ppos)
+-{
+- ssize_t retval = 0;
+- struct cftype *cft = __d_cft(file->f_path.dentry);
+- if (!cft)
+- return -ENODEV;
+-
+- /* special function ? */
+- if (cft->write)
+- retval = cft->write(file, buf, nbytes, ppos);
+- else
+- retval = cpuset_common_file_write(file, buf, nbytes, ppos);
+-
+- return retval;
+-}
+-
+ /*
+ * These ascii lists should be read in a single call, by using a user
+ * buffer large enough to hold the entire map. If read in smaller
+@@ -1435,17 +1035,19 @@
+ return nodelist_scnprintf(page, PAGE_SIZE, mask);
+ }
+
+-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
++static ssize_t cpuset_common_file_read(struct container *cont,
++ struct cftype *cft,
++ struct file *file,
++ char __user *buf,
+ size_t nbytes, loff_t *ppos)
+ {
+- struct cftype *cft = __d_cft(file->f_path.dentry);
+- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
++ struct cpuset *cs = container_cs(cont);
+ cpuset_filetype_t type = cft->private;
+ char *page;
+ ssize_t retval = 0;
+ char *s;
+
+- if (!(page = (char *)__get_free_page(GFP_KERNEL)))
++ if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
+ return -ENOMEM;
+
+ s = page;
+@@ -1463,9 +1065,6 @@
+ case FILE_MEM_EXCLUSIVE:
+ *s++ = is_mem_exclusive(cs) ? '1' : '0';
+ break;
+- case FILE_NOTIFY_ON_RELEASE:
+- *s++ = notify_on_release(cs) ? '1' : '0';
+- break;
+ case FILE_MEMORY_MIGRATE:
+ *s++ = is_memory_migrate(cs) ? '1' : '0';
+ break;
+@@ -1493,390 +1092,140 @@
+ return retval;
+ }
+
+-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
+- loff_t *ppos)
+-{
+- ssize_t retval = 0;
+- struct cftype *cft = __d_cft(file->f_path.dentry);
+- if (!cft)
+- return -ENODEV;
+-
+- /* special function ? */
+- if (cft->read)
+- retval = cft->read(file, buf, nbytes, ppos);
+- else
+- retval = cpuset_common_file_read(file, buf, nbytes, ppos);
+-
+- return retval;
+-}
+-
+-static int cpuset_file_open(struct inode *inode, struct file *file)
+-{
+- int err;
+- struct cftype *cft;
+-
+- err = generic_file_open(inode, file);
+- if (err)
+- return err;
+-
+- cft = __d_cft(file->f_path.dentry);
+- if (!cft)
+- return -ENODEV;
+- if (cft->open)
+- err = cft->open(inode, file);
+- else
+- err = 0;
+-
+- return err;
+-}
+-
+-static int cpuset_file_release(struct inode *inode, struct file *file)
+-{
+- struct cftype *cft = __d_cft(file->f_path.dentry);
+- if (cft->release)
+- return cft->release(inode, file);
+- return 0;
+-}
+-
+-/*
+- * cpuset_rename - Only allow simple rename of directories in place.
+- */
+-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
+-{
+- if (!S_ISDIR(old_dentry->d_inode->i_mode))
+- return -ENOTDIR;
+- if (new_dentry->d_inode)
+- return -EEXIST;
+- if (old_dir != new_dir)
+- return -EIO;
+- return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+-}
+-
+-static const struct file_operations cpuset_file_operations = {
+- .read = cpuset_file_read,
+- .write = cpuset_file_write,
+- .llseek = generic_file_llseek,
+- .open = cpuset_file_open,
+- .release = cpuset_file_release,
+-};
+-
+-static const struct inode_operations cpuset_dir_inode_operations = {
+- .lookup = simple_lookup,
+- .mkdir = cpuset_mkdir,
+- .rmdir = cpuset_rmdir,
+- .rename = cpuset_rename,
+-};
+-
+-static int cpuset_create_file(struct dentry *dentry, int mode)
+-{
+- struct inode *inode;
+-
+- if (!dentry)
+- return -ENOENT;
+- if (dentry->d_inode)
+- return -EEXIST;
+-
+- inode = cpuset_new_inode(mode);
+- if (!inode)
+- return -ENOMEM;
+-
+- if (S_ISDIR(mode)) {
+- inode->i_op = &cpuset_dir_inode_operations;
+- inode->i_fop = &simple_dir_operations;
+-
+- /* start off with i_nlink == 2 (for "." entry) */
+- inc_nlink(inode);
+- } else if (S_ISREG(mode)) {
+- inode->i_size = 0;
+- inode->i_fop = &cpuset_file_operations;
+- }
+-
+- d_instantiate(dentry, inode);
+- dget(dentry); /* Extra count - pin the dentry in core */
+- return 0;
+-}
+-
+-/*
+- * cpuset_create_dir - create a directory for an object.
+- * cs: the cpuset we create the directory for.
+- * It must have a valid ->parent field
+- * And we are going to fill its ->dentry field.
+- * name: The name to give to the cpuset directory. Will be copied.
+- * mode: mode to set on new directory.
+- */
+-
+-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
+-{
+- struct dentry *dentry = NULL;
+- struct dentry *parent;
+- int error = 0;
+-
+- parent = cs->parent->dentry;
+- dentry = cpuset_get_dentry(parent, name);
+- if (IS_ERR(dentry))
+- return PTR_ERR(dentry);
+- error = cpuset_create_file(dentry, S_IFDIR | mode);
+- if (!error) {
+- dentry->d_fsdata = cs;
+- inc_nlink(parent->d_inode);
+- cs->dentry = dentry;
+- }
+- dput(dentry);
+-
+- return error;
+-}
+-
+-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
+-{
+- struct dentry *dentry;
+- int error;
+-
+- mutex_lock(&dir->d_inode->i_mutex);
+- dentry = cpuset_get_dentry(dir, cft->name);
+- if (!IS_ERR(dentry)) {
+- error = cpuset_create_file(dentry, 0644 | S_IFREG);
+- if (!error)
+- dentry->d_fsdata = (void *)cft;
+- dput(dentry);
+- } else
+- error = PTR_ERR(dentry);
+- mutex_unlock(&dir->d_inode->i_mutex);
+- return error;
+-}
+-
+-/*
+- * Stuff for reading the 'tasks' file.
+- *
+- * Reading this file can return large amounts of data if a cpuset has
+- * *lots* of attached tasks. So it may need several calls to read(),
+- * but we cannot guarantee that the information we produce is correct
+- * unless we produce it entirely atomically.
+- *
+- * Upon tasks file open(), a struct ctr_struct is allocated, that
+- * will have a pointer to an array (also allocated here). The struct
+- * ctr_struct * is stored in file->private_data. Its resources will
+- * be freed by release() when the file is closed. The array is used
+- * to sprintf the PIDs and then used by read().
+- */
+-
+-/* cpusets_tasks_read array */
+-
+-struct ctr_struct {
+- char *buf;
+- int bufsz;
+-};
+-
+-/*
+- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
+- * Return actual number of pids loaded. No need to task_lock(p)
+- * when reading out p->cpuset, as we don't really care if it changes
+- * on the next cycle, and we are not going to try to dereference it.
+- */
+-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
+-{
+- int n = 0;
+- struct task_struct *g, *p;
+-
+- read_lock(&tasklist_lock);
+-
+- do_each_thread(g, p) {
+- if (p->cpuset == cs) {
+- if (unlikely(n == npids))
+- goto array_full;
+- pidarray[n++] = p->pid;
+- }
+- } while_each_thread(g, p);
+-
+-array_full:
+- read_unlock(&tasklist_lock);
+- return n;
+-}
+-
+-static int cmppid(const void *a, const void *b)
+-{
+- return *(pid_t *)a - *(pid_t *)b;
+-}
+-
+-/*
+- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+- * decimal pids in 'buf'. Don't write more than 'sz' chars, but return
+- * count 'cnt' of how many chars would be written if buf were large enough.
+- */
+-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+-{
+- int cnt = 0;
+- int i;
+-
+- for (i = 0; i < npids; i++)
+- cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+- return cnt;
+-}
+-
+-/*
+- * Handle an open on 'tasks' file. Prepare a buffer listing the
+- * process id's of tasks currently attached to the cpuset being opened.
+- *
+- * Does not require any specific cpuset mutexes, and does not take any.
+- */
+-static int cpuset_tasks_open(struct inode *unused, struct file *file)
+-{
+- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+- struct ctr_struct *ctr;
+- pid_t *pidarray;
+- int npids;
+- char c;
+-
+- if (!(file->f_mode & FMODE_READ))
+- return 0;
+-
+- ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
+- if (!ctr)
+- goto err0;
+-
+- /*
+- * If cpuset gets more users after we read count, we won't have
+- * enough space - tough. This race is indistinguishable to the
+- * caller from the case that the additional cpuset users didn't
+- * show up until sometime later on.
+- */
+- npids = atomic_read(&cs->count);
+- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+- if (!pidarray)
+- goto err1;
+-
+- npids = pid_array_load(pidarray, npids, cs);
+- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+-
+- /* Call pid_array_to_buf() twice, first just to get bufsz */
+- ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
+- ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
+- if (!ctr->buf)
+- goto err2;
+- ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
+-
+- kfree(pidarray);
+- file->private_data = ctr;
+- return 0;
+-
+-err2:
+- kfree(pidarray);
+-err1:
+- kfree(ctr);
+-err0:
+- return -ENOMEM;
+-}
+-
+-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
+- size_t nbytes, loff_t *ppos)
+-{
+- struct ctr_struct *ctr = file->private_data;
+
+- return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+-}
+
+-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
+-{
+- struct ctr_struct *ctr;
+
+- if (file->f_mode & FMODE_READ) {
+- ctr = file->private_data;
+- kfree(ctr->buf);
+- kfree(ctr);
+- }
+- return 0;
+-}
+
+ /*
+ * for the common functions, 'private' gives the type of file
+ */
+
+-static struct cftype cft_tasks = {
+- .name = "tasks",
+- .open = cpuset_tasks_open,
+- .read = cpuset_tasks_read,
+- .release = cpuset_tasks_release,
+- .private = FILE_TASKLIST,
+-};
+-
+ static struct cftype cft_cpus = {
+ .name = "cpus",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_CPULIST,
+ };
+
+ static struct cftype cft_mems = {
+ .name = "mems",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_MEMLIST,
+ };
+
+ static struct cftype cft_cpu_exclusive = {
+ .name = "cpu_exclusive",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_CPU_EXCLUSIVE,
+ };
+
+ static struct cftype cft_mem_exclusive = {
+ .name = "mem_exclusive",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_MEM_EXCLUSIVE,
+ };
+
+-static struct cftype cft_notify_on_release = {
+- .name = "notify_on_release",
+- .private = FILE_NOTIFY_ON_RELEASE,
+-};
+-
+ static struct cftype cft_memory_migrate = {
+ .name = "memory_migrate",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_MEMORY_MIGRATE,
+ };
+
+ static struct cftype cft_memory_pressure_enabled = {
+ .name = "memory_pressure_enabled",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_MEMORY_PRESSURE_ENABLED,
+ };
+
+ static struct cftype cft_memory_pressure = {
+ .name = "memory_pressure",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_MEMORY_PRESSURE,
+ };
+
+ static struct cftype cft_spread_page = {
+ .name = "memory_spread_page",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_SPREAD_PAGE,
+ };
+
+ static struct cftype cft_spread_slab = {
+ .name = "memory_spread_slab",
++ .read = cpuset_common_file_read,
++ .write = cpuset_common_file_write,
+ .private = FILE_SPREAD_SLAB,
+ };
+
+-static int cpuset_populate_dir(struct dentry *cs_dentry)
++int cpuset_populate(struct container_subsys *ss, struct container *cont)
+ {
+ int err;
+
+- if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)
++ if ((err = container_add_file(cont, &cft_cpus)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)
++ if ((err = container_add_file(cont, &cft_mems)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)
++ if ((err = container_add_file(cont, &cft_cpu_exclusive)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
++ if ((err = container_add_file(cont, &cft_mem_exclusive)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
++ if ((err = container_add_file(cont, &cft_memory_migrate)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
++ if ((err = container_add_file(cont, &cft_memory_pressure)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
++ if ((err = container_add_file(cont, &cft_spread_page)) < 0)
+ return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+- return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
+- return err;
+- if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
++ if ((err = container_add_file(cont, &cft_spread_slab)) < 0)
+ return err;
++ /* memory_pressure_enabled is in root cpuset only */
++ if (err == 0 && !cont->parent)
++ err = container_add_file(cont, &cft_memory_pressure_enabled);
+ return 0;
+ }
+
+ /*
++ * post_clone() is called at the end of container_clone().
++ * 'container' was just created automatically as a result of
++ * a container_clone(), and the current task is about to
++ * be moved into 'container'.
++ *
++ * Currently we refuse to set up the container - thereby
++ * refusing the task to be entered, and as a result refusing
++ * the sys_unshare() or clone() which initiated it - if any
++ * sibling cpusets have exclusive cpus or mem.
++ *
++ * If this becomes a problem for some users who wish to
++ * allow that scenario, then cpuset_post_clone() could be
++ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
++ * (and likewise for mems) to the new container.
++ */
++void cpuset_post_clone(struct container_subsys *ss,
++ struct container *container)
++{
++ struct container *parent, *child;
++ struct cpuset *cs, *parent_cs;
++
++ parent = container->parent;
++ list_for_each_entry(child, &parent->children, sibling) {
++ cs = container_cs(child);
++ if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
++ return;
++ }
++ cs = container_cs(container);
++ parent_cs = container_cs(parent);
++
++ cs->mems_allowed = parent_cs->mems_allowed;
++ cs->cpus_allowed = parent_cs->cpus_allowed;
++ return;
++}
++
++/*
+ * cpuset_create - create a cpuset
+ * parent: cpuset that will be parent of the new cpuset.
+ * name: name of the new cpuset. Will be strcpy'ed.
+@@ -1885,124 +1234,62 @@
+ * Must be called with the mutex on the parent inode held
+ */
+
+-static long cpuset_create(struct cpuset *parent, const char *name, int mode)
++int cpuset_create(struct container_subsys *ss, struct container *cont)
+ {
+ struct cpuset *cs;
+- int err;
++ struct cpuset *parent;
+
++ if (!cont->parent) {
++ /* This is early initialization for the top container */
++ set_container_cs(cont, &top_cpuset);
++ top_cpuset.css.container = cont;
++ top_cpuset.mems_generation = cpuset_mems_generation++;
++ return 0;
++ }
++ parent = container_cs(cont->parent);
+ cs = kmalloc(sizeof(*cs), GFP_KERNEL);
+ if (!cs)
+ return -ENOMEM;
+
+- mutex_lock(&manage_mutex);
+ cpuset_update_task_memory_state();
+ cs->flags = 0;
+- if (notify_on_release(parent))
+- set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+ if (is_spread_page(parent))
+ set_bit(CS_SPREAD_PAGE, &cs->flags);
+ if (is_spread_slab(parent))
+ set_bit(CS_SPREAD_SLAB, &cs->flags);
+ cs->cpus_allowed = CPU_MASK_NONE;
+ cs->mems_allowed = NODE_MASK_NONE;
+- atomic_set(&cs->count, 0);
+- INIT_LIST_HEAD(&cs->sibling);
+- INIT_LIST_HEAD(&cs->children);
+ cs->mems_generation = cpuset_mems_generation++;
+ fmeter_init(&cs->fmeter);
+
+ cs->parent = parent;
+-
+- mutex_lock(&callback_mutex);
+- list_add(&cs->sibling, &cs->parent->children);
++ set_container_cs(cont, cs);
++ cs->css.container = cont;
+ number_of_cpusets++;
+- mutex_unlock(&callback_mutex);
+-
+- err = cpuset_create_dir(cs, name, mode);
+- if (err < 0)
+- goto err;
+-
+- /*
+- * Release manage_mutex before cpuset_populate_dir() because it
+- * will down() this new directory's i_mutex and if we race with
+- * another mkdir, we might deadlock.
+- */
+- mutex_unlock(&manage_mutex);
+-
+- err = cpuset_populate_dir(cs->dentry);
+- /* If err < 0, we have a half-filled directory - oh well ;) */
+ return 0;
+-err:
+- list_del(&cs->sibling);
+- mutex_unlock(&manage_mutex);
+- kfree(cs);
+- return err;
+-}
+-
+-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+-{
+- struct cpuset *c_parent = dentry->d_parent->d_fsdata;
+-
+- /* the vfs holds inode->i_mutex already */
+- return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
+ }
+
+-/*
+- * Locking note on the strange update_flag() call below:
+- *
+- * If the cpuset being removed is marked cpu_exclusive, then simulate
+- * turning cpu_exclusive off, which will call update_cpu_domains().
+- * The lock_cpu_hotplug() call in update_cpu_domains() must not be
+- * made while holding callback_mutex. Elsewhere the kernel nests
+- * callback_mutex inside lock_cpu_hotplug() calls. So the reverse
+- * nesting would risk an ABBA deadlock.
+- */
+-
+-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
++void cpuset_destroy(struct container_subsys *ss, struct container *cont)
+ {
+- struct cpuset *cs = dentry->d_fsdata;
+- struct dentry *d;
+- struct cpuset *parent;
+- char *pathbuf = NULL;
+-
+- /* the vfs holds both inode->i_mutex already */
++ struct cpuset *cs = container_cs(cont);
+
+- mutex_lock(&manage_mutex);
+ cpuset_update_task_memory_state();
+- if (atomic_read(&cs->count) > 0) {
+- mutex_unlock(&manage_mutex);
+- return -EBUSY;
+- }
+- if (!list_empty(&cs->children)) {
+- mutex_unlock(&manage_mutex);
+- return -EBUSY;
+- }
+- if (is_cpu_exclusive(cs)) {
+- int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
+- if (retval < 0) {
+- mutex_unlock(&manage_mutex);
+- return retval;
+- }
+- }
+- parent = cs->parent;
+- mutex_lock(&callback_mutex);
+- set_bit(CS_REMOVED, &cs->flags);
+- list_del(&cs->sibling); /* delete my sibling from parent->children */
+- spin_lock(&cs->dentry->d_lock);
+- d = dget(cs->dentry);
+- cs->dentry = NULL;
+- spin_unlock(&d->d_lock);
+- cpuset_d_remove_dir(d);
+- dput(d);
+ number_of_cpusets--;
+- mutex_unlock(&callback_mutex);
+- if (list_empty(&parent->children))
+- check_for_release(parent, &pathbuf);
+- mutex_unlock(&manage_mutex);
+- cpuset_release_agent(pathbuf);
+- return 0;
++ kfree(cs);
+ }
+
++struct container_subsys cpuset_subsys = {
++ .name = "cpuset",
++ .create = cpuset_create,
++ .destroy = cpuset_destroy,
++ .can_attach = cpuset_can_attach,
++ .attach = cpuset_attach,
++ .populate = cpuset_populate,
++ .post_clone = cpuset_post_clone,
++ .subsys_id = cpuset_subsys_id,
++ .early_init = 1,
++};
++
+ /*
+ * cpuset_init_early - just enough so that the calls to
+ * cpuset_update_task_memory_state() in early init code
+@@ -2011,13 +1298,11 @@
+
+ int __init cpuset_init_early(void)
+ {
+- struct task_struct *tsk = current;
+-
+- tsk->cpuset = &top_cpuset;
+- tsk->cpuset->mems_generation = cpuset_mems_generation++;
++ top_cpuset.mems_generation = cpuset_mems_generation++;
+ return 0;
+ }
+
++
+ /**
+ * cpuset_init - initialize cpusets at system boot
+ *
+@@ -2026,8 +1311,7 @@
+
+ int __init cpuset_init(void)
+ {
+- struct dentry *root;
+- int err;
++ int err = 0;
+
+ top_cpuset.cpus_allowed = CPU_MASK_ALL;
+ top_cpuset.mems_allowed = NODE_MASK_ALL;
+@@ -2035,30 +1319,12 @@
+ fmeter_init(&top_cpuset.fmeter);
+ top_cpuset.mems_generation = cpuset_mems_generation++;
+
+- init_task.cpuset = &top_cpuset;
+-
+ err = register_filesystem(&cpuset_fs_type);
+ if (err < 0)
+- goto out;
+- cpuset_mount = kern_mount(&cpuset_fs_type);
+- if (IS_ERR(cpuset_mount)) {
+- printk(KERN_ERR "cpuset: could not mount!\n");
+- err = PTR_ERR(cpuset_mount);
+- cpuset_mount = NULL;
+- goto out;
+- }
+- root = cpuset_mount->mnt_sb->s_root;
+- root->d_fsdata = &top_cpuset;
+- inc_nlink(root->d_inode);
+- top_cpuset.dentry = root;
+- root->d_inode->i_op = &cpuset_dir_inode_operations;
+- number_of_cpusets = 1;
+- err = cpuset_populate_dir(root);
+- /* memory_pressure_enabled is in root cpuset only */
+- if (err == 0)
+- err = cpuset_add_file(root, &cft_memory_pressure_enabled);
+-out:
+ return err;
++
++ number_of_cpusets = 1;
++ return 0;
+ }
+
+ /*
+@@ -2084,10 +1350,12 @@
+
+ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
+ {
++ struct container *cont;
+ struct cpuset *c;
+
+ /* Each of our child cpusets mems must be online */
+- list_for_each_entry(c, &cur->children, sibling) {
++ list_for_each_entry(cont, &cur->css.container->children, sibling) {
++ c = container_cs(cont);
+ guarantee_online_cpus_mems_in_subtree(c);
+ if (!cpus_empty(c->cpus_allowed))
+ guarantee_online_cpus(c, &c->cpus_allowed);
+@@ -2114,7 +1382,7 @@
+
+ static void common_cpu_mem_hotplug_unplug(void)
+ {
+- mutex_lock(&manage_mutex);
++ container_lock();
+ mutex_lock(&callback_mutex);
+
+ guarantee_online_cpus_mems_in_subtree(&top_cpuset);
+@@ -2122,7 +1390,7 @@
+ top_cpuset.mems_allowed = node_online_map;
+
+ mutex_unlock(&callback_mutex);
+- mutex_unlock(&manage_mutex);
++ container_unlock();
+ }
+
+ /*
+@@ -2170,109 +1438,7 @@
+ }
+
+ /**
+- * cpuset_fork - attach newly forked task to its parents cpuset.
+- * @tsk: pointer to task_struct of forking parent process.
+- *
+- * Description: A task inherits its parent's cpuset at fork().
+- *
+- * A pointer to the shared cpuset was automatically copied in fork.c
+- * by dup_task_struct(). However, we ignore that copy, since it was
+- * not made under the protection of task_lock(), so might no longer be
+- * a valid cpuset pointer. attach_task() might have already changed
+- * current->cpuset, allowing the previously referenced cpuset to
+- * be removed and freed. Instead, we task_lock(current) and copy
+- * its present value of current->cpuset for our freshly forked child.
+- *
+- * At the point that cpuset_fork() is called, 'current' is the parent
+- * task, and the passed argument 'child' points to the child task.
+- **/
+
+-void cpuset_fork(struct task_struct *child)
+-{
+- task_lock(current);
+- child->cpuset = current->cpuset;
+- atomic_inc(&child->cpuset->count);
+- task_unlock(current);
+-}
+-
+-/**
+- * cpuset_exit - detach cpuset from exiting task
+- * @tsk: pointer to task_struct of exiting process
+- *
+- * Description: Detach cpuset from @tsk and release it.
+- *
+- * Note that cpusets marked notify_on_release force every task in
+- * them to take the global manage_mutex mutex when exiting.
+- * This could impact scaling on very large systems. Be reluctant to
+- * use notify_on_release cpusets where very high task exit scaling
+- * is required on large systems.
+- *
+- * Don't even think about derefencing 'cs' after the cpuset use count
+- * goes to zero, except inside a critical section guarded by manage_mutex
+- * or callback_mutex. Otherwise a zero cpuset use count is a license to
+- * any other task to nuke the cpuset immediately, via cpuset_rmdir().
+- *
+- * This routine has to take manage_mutex, not callback_mutex, because
+- * it is holding that mutex while calling check_for_release(),
+- * which calls kmalloc(), so can't be called holding callback_mutex().
+- *
+- * the_top_cpuset_hack:
+- *
+- * Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+- *
+- * Don't leave a task unable to allocate memory, as that is an
+- * accident waiting to happen should someone add a callout in
+- * do_exit() after the cpuset_exit() call that might allocate.
+- * If a task tries to allocate memory with an invalid cpuset,
+- * it will oops in cpuset_update_task_memory_state().
+- *
+- * We call cpuset_exit() while the task is still competent to
+- * handle notify_on_release(), then leave the task attached to
+- * the root cpuset (top_cpuset) for the remainder of its exit.
+- *
+- * To do this properly, we would increment the reference count on
+- * top_cpuset, and near the very end of the kernel/exit.c do_exit()
+- * code we would add a second cpuset function call, to drop that
+- * reference. This would just create an unnecessary hot spot on
+- * the top_cpuset reference count, to no avail.
+- *
+- * Normally, holding a reference to a cpuset without bumping its
+- * count is unsafe. The cpuset could go away, or someone could
+- * attach us to a different cpuset, decrementing the count on
+- * the first cpuset that we never incremented. But in this case,
+- * top_cpuset isn't going away, and either task has PF_EXITING set,
+- * which wards off any attach_task() attempts, or task is a failed
+- * fork, never visible to attach_task.
+- *
+- * Another way to do this would be to set the cpuset pointer
+- * to NULL here, and check in cpuset_update_task_memory_state()
+- * for a NULL pointer. This hack avoids that NULL check, for no
+- * cost (other than this way too long comment ;).
+- **/
+-
+-void cpuset_exit(struct task_struct *tsk)
+-{
+- struct cpuset *cs;
+-
+- task_lock(current);
+- cs = tsk->cpuset;
+- tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */
+- task_unlock(current);
+-
+- if (notify_on_release(cs)) {
+- char *pathbuf = NULL;
+-
+- mutex_lock(&manage_mutex);
+- if (atomic_dec_and_test(&cs->count))
+- check_for_release(cs, &pathbuf);
+- mutex_unlock(&manage_mutex);
+- cpuset_release_agent(pathbuf);
+- } else {
+- atomic_dec(&cs->count);
+- }
+-}
+-
+-/**
+ * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
+ * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
+ *
+@@ -2288,7 +1454,7 @@
+
+ mutex_lock(&callback_mutex);
+ task_lock(tsk);
+- guarantee_online_cpus(tsk->cpuset, &mask);
++ guarantee_online_cpus(task_cs(tsk), &mask);
+ task_unlock(tsk);
+ mutex_unlock(&callback_mutex);
+
+@@ -2316,7 +1482,7 @@
+
+ mutex_lock(&callback_mutex);
+ task_lock(tsk);
+- guarantee_online_mems(tsk->cpuset, &mask);
++ guarantee_online_mems(task_cs(tsk), &mask);
+ task_unlock(tsk);
+ mutex_unlock(&callback_mutex);
+
+@@ -2447,7 +1613,7 @@
+ mutex_lock(&callback_mutex);
+
+ task_lock(current);
+- cs = nearest_exclusive_ancestor(current->cpuset);
++ cs = nearest_exclusive_ancestor(task_cs(current));
+ task_unlock(current);
+
+ allowed = node_isset(node, cs->mems_allowed);
+@@ -2584,7 +1750,7 @@
+ task_unlock(current);
+ goto done;
+ }
+- cs1 = nearest_exclusive_ancestor(current->cpuset);
++ cs1 = nearest_exclusive_ancestor(task_cs(current));
+ task_unlock(current);
+
+ task_lock((struct task_struct *)p);
+@@ -2592,7 +1758,7 @@
+ task_unlock((struct task_struct *)p);
+ goto done;
+ }
+- cs2 = nearest_exclusive_ancestor(p->cpuset);
++ cs2 = nearest_exclusive_ancestor(task_cs((struct task_struct *)p));
+ task_unlock((struct task_struct *)p);
+
+ overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
+@@ -2628,14 +1794,12 @@
+
+ void __cpuset_memory_pressure_bump(void)
+ {
+- struct cpuset *cs;
+-
+ task_lock(current);
+- cs = current->cpuset;
+- fmeter_markevent(&cs->fmeter);
++ fmeter_markevent(&task_cs(current)->fmeter);
+ task_unlock(current);
+ }
+
++#ifdef CONFIG_PROC_PID_CPUSET
+ /*
+ * proc_cpuset_show()
+ * - Print tasks cpuset path into seq_file.
+@@ -2652,6 +1816,7 @@
+ struct pid *pid;
+ struct task_struct *tsk;
+ char *buf;
++ struct container_subsys_state *css;
+ int retval;
+
+ retval = -ENOMEM;
+@@ -2666,15 +1831,15 @@
+ goto out_free;
+
+ retval = -EINVAL;
+- mutex_lock(&manage_mutex);
+-
+- retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
++ container_lock();
++ css = task_subsys_state(tsk, cpuset_subsys_id);
++ retval = container_path(css->container, buf, PAGE_SIZE);
+ if (retval < 0)
+ goto out_unlock;
+ seq_puts(m, buf);
+ seq_putc(m, '\n');
+ out_unlock:
+- mutex_unlock(&manage_mutex);
++ container_unlock();
+ put_task_struct(tsk);
+ out_free:
+ kfree(buf);
+@@ -2694,6 +1859,7 @@
+ .llseek = seq_lseek,
+ .release = single_release,
+ };
++#endif /* CONFIG_PROC_PID_CPUSET */
+
+ /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
+ char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
+diff -Nurb linux-2.6.22-570/kernel/exit.c linux-2.6.22-591/kernel/exit.c
+--- linux-2.6.22-570/kernel/exit.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/exit.c 2007-12-21 15:36:12.000000000 -0500
+@@ -31,7 +31,8 @@
+ #include <linux/mempolicy.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/delayacct.h>
+-#include <linux/cpuset.h>
++#include <linux/freezer.h>
++#include <linux/container.h>
+ #include <linux/syscalls.h>
+ #include <linux/signal.h>
+ #include <linux/posix-timers.h>
+@@ -393,6 +394,11 @@
+ * they would be locked into memory.
+ */
+ exit_mm(current);
++ /*
++ * We don't want to have TIF_FREEZE set if the system-wide hibernation
++ * or suspend transition begins right now.
++ */
++ current->flags |= PF_NOFREEZE;
+
+ set_special_pids(1, 1);
+ proc_clear_tty(current);
+@@ -875,6 +881,34 @@
+ release_task(tsk);
+ }
+
++#ifdef CONFIG_DEBUG_STACK_USAGE
++static void check_stack_usage(void)
++{
++ static DEFINE_SPINLOCK(low_water_lock);
++ static int lowest_to_date = THREAD_SIZE;
++ unsigned long *n = end_of_stack(current);
++ unsigned long free;
++
++ while (*n == 0)
++ n++;
++ free = (unsigned long)n - (unsigned long)end_of_stack(current);
++
++ if (free >= lowest_to_date)
++ return;
++
++ spin_lock(&low_water_lock);
++ if (free < lowest_to_date) {
++ printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
++ "left\n",
++ current->comm, free);
++ lowest_to_date = free;
++ }
++ spin_unlock(&low_water_lock);
++}
++#else
++static inline void check_stack_usage(void) {}
++#endif
++
+ fastcall NORET_TYPE void do_exit(long code)
+ {
+ struct task_struct *tsk = current;
+@@ -966,8 +1000,9 @@
+ exit_sem(tsk);
+ __exit_files(tsk);
+ __exit_fs(tsk);
++ check_stack_usage();
+ exit_thread();
+- cpuset_exit(tsk);
++ container_exit(tsk, 1);
+ exit_keys(tsk);
+
+ if (group_dead && tsk->signal->leader)
+diff -Nurb linux-2.6.22-570/kernel/fork.c linux-2.6.22-591/kernel/fork.c
+--- linux-2.6.22-570/kernel/fork.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/fork.c 2007-12-21 15:36:15.000000000 -0500
+@@ -29,7 +29,7 @@
+ #include <linux/nsproxy.h>
+ #include <linux/capability.h>
+ #include <linux/cpu.h>
+-#include <linux/cpuset.h>
++#include <linux/container.h>
+ #include <linux/security.h>
+ #include <linux/swap.h>
+ #include <linux/syscalls.h>
+@@ -342,6 +342,8 @@
+ atomic_set(&mm->mm_count, 1);
+ init_rwsem(&mm->mmap_sem);
+ INIT_LIST_HEAD(&mm->mmlist);
++ mm->flags = (current->mm) ? current->mm->flags
++ : MMF_DUMP_FILTER_DEFAULT;
+ mm->core_waiters = 0;
+ mm->nr_ptes = 0;
+ __set_mm_counter(mm, file_rss, 0);
+@@ -936,7 +938,7 @@
+ {
+ unsigned long new_flags = p->flags;
+
+- new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
++ new_flags &= ~PF_SUPERPRIV;
+ new_flags |= PF_FORKNOEXEC;
+ if (!(clone_flags & CLONE_PTRACE))
+ p->ptrace = 0;
+@@ -977,6 +979,7 @@
+ {
+ int retval;
+ struct task_struct *p = NULL;
++ int container_callbacks_done = 0;
+ struct vx_info *vxi;
+ struct nx_info *nxi;
+
+@@ -1061,11 +1064,6 @@
+ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
+ copy_flags(clone_flags, p);
+ p->pid = pid_nr(pid);
+- retval = -EFAULT;
+- if (clone_flags & CLONE_PARENT_SETTID)
+- if (put_user(p->pid, parent_tidptr))
+- goto bad_fork_cleanup_delays_binfmt;
+-
+ INIT_LIST_HEAD(&p->children);
+ INIT_LIST_HEAD(&p->sibling);
+ p->vfork_done = NULL;
+@@ -1095,17 +1093,19 @@
+
+ p->lock_depth = -1; /* -1 = no lock */
+ do_posix_clock_monotonic_gettime(&p->start_time);
++ p->real_start_time = p->start_time;
++ monotonic_to_bootbased(&p->real_start_time);
+ p->security = NULL;
+ p->io_context = NULL;
+ p->io_wait = NULL;
+ p->audit_context = NULL;
+- cpuset_fork(p);
++ container_fork(p);
+ #ifdef CONFIG_NUMA
+ p->mempolicy = mpol_copy(p->mempolicy);
+ if (IS_ERR(p->mempolicy)) {
+ retval = PTR_ERR(p->mempolicy);
+ p->mempolicy = NULL;
+- goto bad_fork_cleanup_cpuset;
++ goto bad_fork_cleanup_container;
+ }
+ mpol_fix_fork_child_flag(p);
+ #endif
+@@ -1215,6 +1215,12 @@
+ /* Perform scheduler related setup. Assign this task to a CPU. */
+ sched_fork(p, clone_flags);
+
++ /* Now that the task is set up, run container callbacks if
++ * necessary. We need to run them before the task is visible
++ * on the tasklist. */
++ container_fork_callbacks(p);
++ container_callbacks_done = 1;
++
+ /* Need tasklist lock for parent etc handling! */
+ write_lock_irq(&tasklist_lock);
+
+@@ -1314,6 +1320,14 @@
+ if (nxi)
+ claim_nx_info(nxi, p);
+ write_unlock_irq(&tasklist_lock);
++
++ /*
++ * Now that we know the fork has succeeded, record the new
++ * TID. It's too late to back out if this fails.
++ */
++ if (clone_flags & CLONE_PARENT_SETTID)
++ put_user(p->pid, parent_tidptr);
++
+ proc_fork_connector(p);
+ return p;
+
+@@ -1341,10 +1355,9 @@
+ bad_fork_cleanup_policy:
+ #ifdef CONFIG_NUMA
+ mpol_free(p->mempolicy);
+-bad_fork_cleanup_cpuset:
++bad_fork_cleanup_container:
+ #endif
+- cpuset_exit(p);
+-bad_fork_cleanup_delays_binfmt:
++ container_exit(p, container_callbacks_done);
+ delayacct_tsk_free(p);
+ if (p->binfmt)
+ module_put(p->binfmt->module);
+@@ -1661,7 +1674,7 @@
+ err = -EINVAL;
+ if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+ CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+- CLONE_NEWUTS|CLONE_NEWIPC))
++ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
+ goto bad_unshare_out;
+
+ if ((err = unshare_thread(unshare_flags)))
+diff -Nurb linux-2.6.22-570/kernel/kgdb.c linux-2.6.22-591/kernel/kgdb.c
+--- linux-2.6.22-570/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/kgdb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,1866 @@
++/*
++ * kernel/kgdb.c
++ *
++ * Maintainer: Jason Wessel <jason.wessel@windriver.com>
++ *
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2002-2004 Timesys Corporation
++ * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
++ * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
++ * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
++ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
++ * Copyright (C) 2005-2007 Wind River Systems, Inc.
++ *
++ * Contributors at various stages not listed above:
++ * Jason Wessel ( jason.wessel@windriver.com )
++ * George Anzinger <george@mvista.com>
++ * Anurekh Saxena (anurekh.saxena@timesys.com)
++ * Lake Stevens Instrument Division (Glenn Engel)
++ * Jim Kingdon, Cygnus Support.
++ *
++ * Original KGDB stub: David Grothe <dave@gcom.com>,
++ * Tigran Aivazian <tigran@sco.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <linux/mm.h>
++#include <linux/threads.h>
++#include <linux/reboot.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <linux/kgdb.h>
++#include <asm/atomic.h>
++#include <linux/notifier.h>
++#include <linux/module.h>
++#include <asm/cacheflush.h>
++#include <linux/init.h>
++#include <linux/sysrq.h>
++#include <linux/console.h>
++#include <linux/sched.h>
++#include <linux/pid_namespace.h>
++#include <asm/byteorder.h>
++
++extern int pid_max;
++/* How many times to count all of the waiting CPUs */
++#define ROUNDUP_WAIT 640000 /* Arbitrary, increase if needed. */
++#define BUF_THREAD_ID_SIZE 16
++
++/*
++ * kgdb_initialized with a value of 1 indicates that kgdb is setup and is
++ * all ready to serve breakpoints and other kernel exceptions. A value of
++ * -1 indicates that we have tried to initialize early, and need to try
++ * again later.
++ */
++int kgdb_initialized;
++/* Is a host GDB connected to us? */
++int kgdb_connected;
++/* Could we be about to try and access a bad memory location? If so we
++ * also need to flag this has happend. */
++int kgdb_may_fault;
++#ifdef CONFIG_PREEMPT
++static int kgdb_fault_preempt_count;
++#endif
++
++/* All the KGDB handlers are installed */
++int kgdb_from_module_registered = 0;
++/* Guard for recursive entry */
++static int exception_level = 0;
++
++/* We provide a kgdb_io_ops structure that may be overriden. */
++struct kgdb_io __attribute__ ((weak)) kgdb_io_ops;
++
++static struct kgdb_io kgdb_io_ops_prev[MAX_KGDB_IO_HANDLERS];
++static int kgdb_io_handler_cnt = 0;
++
++/* Export the following symbols for use with kernel modules */
++EXPORT_SYMBOL(kgdb_io_ops);
++EXPORT_SYMBOL(kgdb_tasklet_breakpoint);
++EXPORT_SYMBOL(kgdb_connected);
++EXPORT_SYMBOL(kgdb_register_io_module);
++EXPORT_SYMBOL(kgdb_unregister_io_module);
++EXPORT_SYMBOL(debugger_active);
++
++/*
++ * Holds information about breakpoints in a kernel. These breakpoints are
++ * added and removed by gdb.
++ */
++struct kgdb_bkpt kgdb_break[MAX_BREAKPOINTS];
++
++struct kgdb_arch *kgdb_ops = &arch_kgdb_ops;
++
++static const char hexchars[] = "0123456789abcdef";
++
++static spinlock_t slavecpulocks[NR_CPUS];
++static atomic_t procindebug[NR_CPUS];
++atomic_t kgdb_setting_breakpoint;
++EXPORT_SYMBOL(kgdb_setting_breakpoint);
++struct task_struct *kgdb_usethread, *kgdb_contthread;
++
++int debugger_step;
++atomic_t debugger_active;
++
++/* Our I/O buffers. */
++static char remcom_in_buffer[BUFMAX];
++static char remcom_out_buffer[BUFMAX];
++/* Storage for the registers, in GDB format. */
++static unsigned long gdb_regs[(NUMREGBYTES + sizeof(unsigned long) - 1) /
++ sizeof(unsigned long)];
++/* Storage of registers for handling a fault. */
++unsigned long kgdb_fault_jmp_regs[NUMCRITREGBYTES / sizeof(unsigned long)]
++ JMP_REGS_ALIGNMENT;
++static int kgdb_notify_reboot(struct notifier_block *this,
++ unsigned long code ,void *x);
++struct debuggerinfo_struct {
++ void *debuggerinfo;
++ struct task_struct *task;
++} kgdb_info[NR_CPUS];
++
++/* to keep track of the CPU which is doing the single stepping*/
++atomic_t cpu_doing_single_step = ATOMIC_INIT(-1);
++
++atomic_t kgdb_sync_softlockup[NR_CPUS] = {ATOMIC_INIT(0)};
++
++/* reboot notifier block */
++static struct notifier_block kgdb_reboot_notifier = {
++ .notifier_call = kgdb_notify_reboot,
++ .next = NULL,
++ .priority = INT_MAX,
++};
++
++int __attribute__ ((weak))
++ kgdb_validate_break_address(unsigned long addr)
++{
++ int error = 0;
++ char tmp_variable[BREAK_INSTR_SIZE];
++ error = kgdb_get_mem((char *)addr, tmp_variable, BREAK_INSTR_SIZE);
++ return error;
++}
++
++int __attribute__ ((weak))
++ kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
++{
++ int error = 0;
++ if ((error = kgdb_get_mem((char *)addr,
++ saved_instr, BREAK_INSTR_SIZE)) < 0)
++ return error;
++
++ if ((error = kgdb_set_mem((char *)addr, kgdb_ops->gdb_bpt_instr,
++ BREAK_INSTR_SIZE)) < 0)
++ return error;
++ return 0;
++}
++
++int __attribute__ ((weak))
++ kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
++{
++
++ int error = 0;
++ if ((error =kgdb_set_mem((char *)addr, (char *)bundle,
++ BREAK_INSTR_SIZE)) < 0)
++ return error;
++ return 0;
++}
++
++unsigned long __attribute__ ((weak))
++ kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++ return instruction_pointer(regs);
++}
++
++static int hex(char ch)
++{
++ if ((ch >= 'a') && (ch <= 'f'))
++ return (ch - 'a' + 10);
++ if ((ch >= '0') && (ch <= '9'))
++ return (ch - '0');
++ if ((ch >= 'A') && (ch <= 'F'))
++ return (ch - 'A' + 10);
++ return (-1);
++}
++
++/* scan for the sequence $<data>#<checksum> */
++static void get_packet(char *buffer)
++{
++ unsigned char checksum;
++ unsigned char xmitcsum;
++ int count;
++ char ch;
++ if (!kgdb_io_ops.read_char)
++ return;
++ do {
++ /* Spin and wait around for the start character, ignore all
++ * other characters */
++ while ((ch = (kgdb_io_ops.read_char())) != '$') ;
++ kgdb_connected = 1;
++ checksum = 0;
++ xmitcsum = -1;
++
++ count = 0;
++
++ /* now, read until a # or end of buffer is found */
++ while (count < (BUFMAX - 1)) {
++ ch = kgdb_io_ops.read_char();
++ if (ch == '#')
++ break;
++ checksum = checksum + ch;
++ buffer[count] = ch;
++ count = count + 1;
++ }
++ buffer[count] = 0;
++
++ if (ch == '#') {
++ xmitcsum = hex(kgdb_io_ops.read_char()) << 4;
++ xmitcsum += hex(kgdb_io_ops.read_char());
++
++ if (checksum != xmitcsum)
++ /* failed checksum */
++ kgdb_io_ops.write_char('-');
++ else
++ /* successful transfer */
++ kgdb_io_ops.write_char('+');
++ if (kgdb_io_ops.flush)
++ kgdb_io_ops.flush();
++ }
++ } while (checksum != xmitcsum);
++}
++
++static void kgdb_set_may_fault(void) {
++ kgdb_may_fault = 1;
++#ifdef CONFIG_PREEMPT
++ kgdb_fault_preempt_count = preempt_count();
++#endif
++}
++
++static void kgdb_unset_may_fault(void) {
++ kgdb_may_fault = 0;
++#ifdef CONFIG_PREEMPT
++ preempt_count() = kgdb_fault_preempt_count;
++#endif
++}
++
++/*
++ * Send the packet in buffer.
++ * Check for gdb connection if asked for.
++ */
++static void put_packet(char *buffer)
++{
++ unsigned char checksum;
++ int count;
++ char ch;
++
++ if (!kgdb_io_ops.write_char)
++ return;
++ /* $<packet info>#<checksum>. */
++ while (1) {
++ kgdb_io_ops.write_char('$');
++ checksum = 0;
++ count = 0;
++
++ while ((ch = buffer[count])) {
++ kgdb_io_ops.write_char(ch);
++ checksum += ch;
++ count++;
++ }
++
++ kgdb_io_ops.write_char('#');
++ kgdb_io_ops.write_char(hexchars[checksum >> 4]);
++ kgdb_io_ops.write_char(hexchars[checksum % 16]);
++ if (kgdb_io_ops.flush)
++ kgdb_io_ops.flush();
++
++ /* Now see what we get in reply. */
++ ch = kgdb_io_ops.read_char();
++
++ if (ch == 3)
++ ch = kgdb_io_ops.read_char();
++
++ /* If we get an ACK, we are done. */
++ if (ch == '+')
++ return;
++
++ /* If we get the start of another packet, this means
++ * that GDB is attempting to reconnect. We will NAK
++ * the packet being sent, and stop trying to send this
++ * packet. */
++ if (ch == '$') {
++ kgdb_io_ops.write_char('-');
++ if (kgdb_io_ops.flush)
++ kgdb_io_ops.flush();
++ return;
++ }
++ }
++}
++
++/*
++ * convert the memory pointed to by mem into hex, placing result in buf
++ * return a pointer to the last char put in buf (null). May return an error.
++ */
++char *kgdb_mem2hex(char *mem, char *buf, int count)
++{
++ kgdb_set_may_fault();
++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++ kgdb_unset_may_fault();
++ return ERR_PTR(-EINVAL);
++ }
++ /* Accessing some registers in a single load instruction is
++ * required to avoid bad side effects for some I/O registers.
++ */
++ if ((count == 2) && (((long)mem & 1) == 0)) {
++ unsigned short tmp_s = *(unsigned short *)mem;
++ mem += 2;
++#ifdef __BIG_ENDIAN
++ *buf++ = hexchars[(tmp_s >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_s >> 8) & 0xf];
++ *buf++ = hexchars[(tmp_s >> 4) & 0xf];
++ *buf++ = hexchars[tmp_s & 0xf];
++#else
++ *buf++ = hexchars[(tmp_s >> 4) & 0xf];
++ *buf++ = hexchars[tmp_s & 0xf];
++ *buf++ = hexchars[(tmp_s >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_s >> 8) & 0xf];
++#endif
++ } else if ((count == 4) && (((long)mem & 3) == 0)) {
++ unsigned long tmp_l = *(unsigned int *)mem;
++ mem += 4;
++#ifdef __BIG_ENDIAN
++ *buf++ = hexchars[(tmp_l >> 28) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 24) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 20) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 16) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 8) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 4) & 0xf];
++ *buf++ = hexchars[tmp_l & 0xf];
++#else
++ *buf++ = hexchars[(tmp_l >> 4) & 0xf];
++ *buf++ = hexchars[tmp_l & 0xf];
++ *buf++ = hexchars[(tmp_l >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 8) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 20) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 16) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 28) & 0xf];
++ *buf++ = hexchars[(tmp_l >> 24) & 0xf];
++#endif
++#ifdef CONFIG_64BIT
++ } else if ((count == 8) && (((long)mem & 7) == 0)) {
++ unsigned long long tmp_ll = *(unsigned long long *)mem;
++ mem += 8;
++#ifdef __BIG_ENDIAN
++ *buf++ = hexchars[(tmp_ll >> 60) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 56) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 52) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 48) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 44) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 40) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 36) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 32) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 28) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 24) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 20) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 16) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 8) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 4) & 0xf];
++ *buf++ = hexchars[tmp_ll & 0xf];
++#else
++ *buf++ = hexchars[(tmp_ll >> 4) & 0xf];
++ *buf++ = hexchars[tmp_ll & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 12) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 8) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 20) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 16) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 28) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 24) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 36) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 32) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 44) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 40) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 52) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 48) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 60) & 0xf];
++ *buf++ = hexchars[(tmp_ll >> 56) & 0xf];
++#endif
++#endif
++ } else {
++ while (count-- > 0) {
++ unsigned char ch = *mem++;
++ *buf++ = hexchars[ch >> 4];
++ *buf++ = hexchars[ch & 0xf];
++ }
++ }
++ kgdb_unset_may_fault();
++ *buf = 0;
++ return (buf);
++}
++
++/*
++ * Copy the binary array pointed to by buf into mem. Fix $, #, and
++ * 0x7d escaped with 0x7d. Return a pointer to the character after
++ * the last byte written.
++ */
++static char *kgdb_ebin2mem(char *buf, char *mem, int count)
++{
++ kgdb_set_may_fault();
++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++ kgdb_unset_may_fault();
++ return ERR_PTR(-EINVAL);
++ }
++ for (; count > 0; count--, buf++) {
++ if (*buf == 0x7d)
++ *mem++ = *(++buf) ^ 0x20;
++ else
++ *mem++ = *buf;
++ }
++ kgdb_unset_may_fault();
++ return mem;
++}
++
++/*
++ * convert the hex array pointed to by buf into binary to be placed in mem
++ * return a pointer to the character AFTER the last byte written
++ * May return an error.
++ */
++char *kgdb_hex2mem(char *buf, char *mem, int count)
++{
++ kgdb_set_may_fault();
++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++ kgdb_unset_may_fault();
++ return ERR_PTR(-EINVAL);
++ }
++ if ((count == 2) && (((long)mem & 1) == 0)) {
++ unsigned short tmp_s = 0;
++#ifdef __BIG_ENDIAN
++ tmp_s |= hex(*buf++) << 12;
++ tmp_s |= hex(*buf++) << 8;
++ tmp_s |= hex(*buf++) << 4;
++ tmp_s |= hex(*buf++);
++#else
++ tmp_s |= hex(*buf++) << 4;
++ tmp_s |= hex(*buf++);
++ tmp_s |= hex(*buf++) << 12;
++ tmp_s |= hex(*buf++) << 8;
++#endif
++ *(unsigned short *)mem = tmp_s;
++ mem += 2;
++ } else if ((count == 4) && (((long)mem & 3) == 0)) {
++ unsigned long tmp_l = 0;
++#ifdef __BIG_ENDIAN
++ tmp_l |= hex(*buf++) << 28;
++ tmp_l |= hex(*buf++) << 24;
++ tmp_l |= hex(*buf++) << 20;
++ tmp_l |= hex(*buf++) << 16;
++ tmp_l |= hex(*buf++) << 12;
++ tmp_l |= hex(*buf++) << 8;
++ tmp_l |= hex(*buf++) << 4;
++ tmp_l |= hex(*buf++);
++#else
++ tmp_l |= hex(*buf++) << 4;
++ tmp_l |= hex(*buf++);
++ tmp_l |= hex(*buf++) << 12;
++ tmp_l |= hex(*buf++) << 8;
++ tmp_l |= hex(*buf++) << 20;
++ tmp_l |= hex(*buf++) << 16;
++ tmp_l |= hex(*buf++) << 28;
++ tmp_l |= hex(*buf++) << 24;
++#endif
++ *(unsigned long *)mem = tmp_l;
++ mem += 4;
++ } else {
++ int i;
++ for (i = 0; i < count; i++) {
++ unsigned char ch = hex(*buf++) << 4;
++ ch |= hex(*buf++);
++ *mem++ = ch;
++ }
++ }
++ kgdb_unset_may_fault();
++ return (mem);
++}
++
++/*
++ * While we find nice hex chars, build a long_val.
++ * Return number of chars processed.
++ */
++int kgdb_hex2long(char **ptr, long *long_val)
++{
++ int hex_val, num = 0;
++
++ *long_val = 0;
++
++ while (**ptr) {
++ hex_val = hex(**ptr);
++ if (hex_val >= 0) {
++ *long_val = (*long_val << 4) | hex_val;
++ num++;
++ } else
++ break;
++
++ (*ptr)++;
++ }
++
++ return (num);
++}
++
++/* Write memory due to an 'M' or 'X' packet. */
++static char *write_mem_msg(int binary)
++{
++ char *ptr = &remcom_in_buffer[1];
++ unsigned long addr, length;
++
++ if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
++ kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
++ if (binary)
++ ptr = kgdb_ebin2mem(ptr, (char *)addr, length);
++ else
++ ptr = kgdb_hex2mem(ptr, (char *)addr, length);
++ if (CACHE_FLUSH_IS_SAFE)
++ flush_icache_range(addr, addr + length + 1);
++ if (IS_ERR(ptr))
++ return ptr;
++ return NULL;
++ }
++
++ return ERR_PTR(-EINVAL);
++}
++
++static inline char *pack_hex_byte(char *pkt, int byte)
++{
++ *pkt++ = hexchars[(byte >> 4) & 0xf];
++ *pkt++ = hexchars[(byte & 0xf)];
++ return pkt;
++}
++
++static inline void error_packet(char *pkt, int error)
++{
++ error = -error;
++ pkt[0] = 'E';
++ pkt[1] = hexchars[(error / 10)];
++ pkt[2] = hexchars[(error % 10)];
++ pkt[3] = '\0';
++}
++
++static char *pack_threadid(char *pkt, threadref * id)
++{
++ char *limit;
++ unsigned char *altid;
++
++ altid = (unsigned char *)id;
++ limit = pkt + BUF_THREAD_ID_SIZE;
++ while (pkt < limit)
++ pkt = pack_hex_byte(pkt, *altid++);
++
++ return pkt;
++}
++
++void int_to_threadref(threadref * id, int value)
++{
++ unsigned char *scan;
++ int i = 4;
++
++ scan = (unsigned char *)id;
++ while (i--)
++ *scan++ = 0;
++ *scan++ = (value >> 24) & 0xff;
++ *scan++ = (value >> 16) & 0xff;
++ *scan++ = (value >> 8) & 0xff;
++ *scan++ = (value & 0xff);
++}
++
++static struct task_struct *getthread(struct pt_regs *regs, int tid)
++{
++ if (init_pid_ns.last_pid == 0)
++ return current;
++
++ if (num_online_cpus() &&
++ (tid >= pid_max + num_online_cpus() + kgdb_ops->shadowth))
++ return NULL;
++
++ if (kgdb_ops->shadowth && (tid >= pid_max + num_online_cpus()))
++ return kgdb_get_shadow_thread(regs, tid - pid_max -
++ num_online_cpus());
++
++ if (tid >= pid_max)
++ return idle_task(tid - pid_max);
++
++ if (!tid)
++ return NULL;
++
++ return find_task_by_pid(tid);
++}
++
++#ifdef CONFIG_SMP
++static void kgdb_wait(struct pt_regs *regs)
++{
++ unsigned long flags;
++ int processor;
++
++ local_irq_save(flags);
++ processor = raw_smp_processor_id();
++ kgdb_info[processor].debuggerinfo = regs;
++ kgdb_info[processor].task = current;
++ atomic_set(&procindebug[processor], 1);
++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1);
++
++ /* Wait till master processor goes completely into the debugger.
++ * FIXME: this looks racy */
++ while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])) {
++ int i = 10; /* an arbitrary number */
++
++ while (--i)
++ cpu_relax();
++ }
++
++ /* Wait till master processor is done with debugging */
++ spin_lock(&slavecpulocks[processor]);
++
++ kgdb_info[processor].debuggerinfo = NULL;
++ kgdb_info[processor].task = NULL;
++
++ /* fix up hardware debug registers on local cpu */
++ if (kgdb_ops->correct_hw_break)
++ kgdb_ops->correct_hw_break();
++ /* Signal the master processor that we are done */
++ atomic_set(&procindebug[processor], 0);
++ spin_unlock(&slavecpulocks[processor]);
++ local_irq_restore(flags);
++}
++#endif
++
++int kgdb_get_mem(char *addr, unsigned char *buf, int count)
++{
++ kgdb_set_may_fault();
++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++ kgdb_unset_may_fault();
++ return -EINVAL;
++ }
++ while (count) {
++ if ((unsigned long)addr < TASK_SIZE) {
++ kgdb_unset_may_fault();
++ return -EINVAL;
++ }
++ *buf++ = *addr++;
++ count--;
++ }
++ kgdb_unset_may_fault();
++ return 0;
++}
++
++int kgdb_set_mem(char *addr, unsigned char *buf, int count)
++{
++ kgdb_set_may_fault();
++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++ kgdb_unset_may_fault();
++ return -EINVAL;
++ }
++ while (count) {
++ if ((unsigned long)addr < TASK_SIZE) {
++ kgdb_unset_may_fault();
++ return -EINVAL;
++ }
++ *addr++ = *buf++;
++ count--;
++ }
++ kgdb_unset_may_fault();
++ return 0;
++}
++int kgdb_activate_sw_breakpoints(void)
++{
++ int i;
++ int error = 0;
++ unsigned long addr;
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if (kgdb_break[i].state != bp_set)
++ continue;
++ addr = kgdb_break[i].bpt_addr;
++ if ((error = kgdb_arch_set_breakpoint(addr,
++ kgdb_break[i].saved_instr)))
++ return error;
++
++ if (CACHE_FLUSH_IS_SAFE) {
++ if (current->mm && addr < TASK_SIZE)
++ flush_cache_range(current->mm->mmap_cache,
++ addr, addr + BREAK_INSTR_SIZE);
++ else
++ flush_icache_range(addr, addr +
++ BREAK_INSTR_SIZE);
++ }
++
++ kgdb_break[i].state = bp_active;
++ }
++ return 0;
++}
++
++static int kgdb_set_sw_break(unsigned long addr)
++{
++ int i, breakno = -1;
++ int error = 0;
++ if ((error = kgdb_validate_break_address(addr)) < 0)
++ return error;
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if ((kgdb_break[i].state == bp_set) &&
++ (kgdb_break[i].bpt_addr == addr))
++ return -EEXIST;
++ }
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if (kgdb_break[i].state == bp_removed &&
++ kgdb_break[i].bpt_addr == addr) {
++ breakno = i;
++ break;
++ }
++ }
++
++ if (breakno == -1) {
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if (kgdb_break[i].state == bp_none) {
++ breakno = i;
++ break;
++ }
++ }
++ }
++ if (breakno == -1)
++ return -E2BIG;
++
++ kgdb_break[breakno].state = bp_set;
++ kgdb_break[breakno].type = bp_breakpoint;
++ kgdb_break[breakno].bpt_addr = addr;
++
++ return 0;
++}
++
++int kgdb_deactivate_sw_breakpoints(void)
++{
++ int i;
++ int error = 0;
++ unsigned long addr;
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if (kgdb_break[i].state != bp_active)
++ continue;
++ addr = kgdb_break[i].bpt_addr;
++ if ((error = kgdb_arch_remove_breakpoint(addr,
++ kgdb_break[i].saved_instr)))
++ return error;
++
++ if (CACHE_FLUSH_IS_SAFE && current->mm &&
++ addr < TASK_SIZE)
++ flush_cache_range(current->mm->mmap_cache,
++ addr, addr + BREAK_INSTR_SIZE);
++ else if (CACHE_FLUSH_IS_SAFE)
++ flush_icache_range(addr,
++ addr + BREAK_INSTR_SIZE);
++ kgdb_break[i].state = bp_set;
++ }
++ return 0;
++}
++
++static int kgdb_remove_sw_break(unsigned long addr)
++{
++ int i;
++
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if ((kgdb_break[i].state == bp_set) &&
++ (kgdb_break[i].bpt_addr == addr)) {
++ kgdb_break[i].state = bp_removed;
++ return 0;
++ }
++ }
++ return -ENOENT;
++}
++
++int kgdb_isremovedbreak(unsigned long addr)
++{
++ int i;
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if ((kgdb_break[i].state == bp_removed) &&
++ (kgdb_break[i].bpt_addr == addr)) {
++ return 1;
++ }
++ }
++ return 0;
++}
++
++int remove_all_break(void)
++{
++ int i;
++ int error;
++ unsigned long addr;
++
++ /* Clear memory breakpoints. */
++ for (i = 0; i < MAX_BREAKPOINTS; i++) {
++ if (kgdb_break[i].state != bp_set)
++ continue;
++ addr = kgdb_break[i].bpt_addr;
++ if ((error = kgdb_arch_remove_breakpoint(addr,
++ kgdb_break[i].saved_instr)))
++ return error;
++ kgdb_break[i].state = bp_removed;
++ }
++
++ /* Clear hardware breakpoints. */
++ if (kgdb_ops->remove_all_hw_break)
++ kgdb_ops->remove_all_hw_break();
++
++ return 0;
++}
++
++static inline int shadow_pid(int realpid)
++{
++ if (realpid) {
++ return realpid;
++ }
++ return pid_max + raw_smp_processor_id();
++}
++
++static char gdbmsgbuf[BUFMAX + 1];
++static void kgdb_msg_write(const char *s, int len)
++{
++ int i;
++ int wcount;
++ char *bufptr;
++
++ /* 'O'utput */
++ gdbmsgbuf[0] = 'O';
++
++ /* Fill and send buffers... */
++ while (len > 0) {
++ bufptr = gdbmsgbuf + 1;
++
++ /* Calculate how many this time */
++ if ((len << 1) > (BUFMAX - 2))
++ wcount = (BUFMAX - 2) >> 1;
++ else
++ wcount = len;
++
++ /* Pack in hex chars */
++ for (i = 0; i < wcount; i++)
++ bufptr = pack_hex_byte(bufptr, s[i]);
++ *bufptr = '\0';
++
++ /* Move up */
++ s += wcount;
++ len -= wcount;
++
++ /* Write packet */
++ put_packet(gdbmsgbuf);
++ }
++}
++
++/*
++ * This function does all command procesing for interfacing to gdb.
++ *
++ * Locking hierarchy:
++ * interface locks, if any (begin_session)
++ * kgdb lock (debugger_active)
++ *
++ * Note that since we can be in here prior to our cpumask being filled
++ * out, we err on the side of caution and loop over NR_CPUS instead
++ * of a for_each_online_cpu.
++ *
++ */
++int kgdb_handle_exception(int ex_vector, int signo, int err_code,
++ struct pt_regs *linux_regs)
++{
++ unsigned long length, addr;
++ char *ptr;
++ unsigned long flags;
++ unsigned i;
++ long threadid;
++ threadref thref;
++ struct task_struct *thread = NULL;
++ unsigned procid;
++ int numshadowth = num_online_cpus() + kgdb_ops->shadowth;
++ long kgdb_usethreadid = 0;
++ int error = 0, all_cpus_synced = 0;
++ struct pt_regs *shadowregs;
++ int processor = raw_smp_processor_id();
++ void *local_debuggerinfo;
++
++ /* Panic on recursive debugger calls. */
++ if (atomic_read(&debugger_active) == raw_smp_processor_id() + 1) {
++ exception_level++;
++ addr = kgdb_arch_pc(ex_vector, linux_regs);
++ kgdb_deactivate_sw_breakpoints();
++ if (kgdb_remove_sw_break(addr) == 0) {
++ /* If the break point removed ok at the place exception
++ * occurred, try to recover and print a warning to the end
++ * user because the user planted a breakpoint in a place
++ * that KGDB needs in order to function.
++ */
++ exception_level = 0;
++ kgdb_skipexception(ex_vector, linux_regs);
++ kgdb_activate_sw_breakpoints();
++ printk(KERN_CRIT "KGDB: re-enter exception: breakpoint removed\n");
++ WARN_ON(1);
++ return 0;
++ }
++ remove_all_break();
++ kgdb_skipexception(ex_vector, linux_regs);
++ if (exception_level > 1)
++ panic("Recursive entry to debugger");
++
++ printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints removed\n");
++ panic("Recursive entry to debugger");
++ return 0;
++ }
++
++ acquirelock:
++
++ /*
++ * Interrupts will be restored by the 'trap return' code, except when
++ * single stepping.
++ */
++ local_irq_save(flags);
++
++ /* Hold debugger_active */
++ procid = raw_smp_processor_id();
++
++ while (cmpxchg(&atomic_read(&debugger_active), 0, (procid + 1)) != 0) {
++ int i = 25; /* an arbitrary number */
++
++ while (--i)
++ cpu_relax();
++
++ if (atomic_read(&cpu_doing_single_step) != -1 &&
++ atomic_read(&cpu_doing_single_step) != procid)
++ udelay(1);
++ }
++
++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1);
++
++ /*
++ * Don't enter if the last instance of the exception handler wanted to
++ * come into the debugger again.
++ */
++ if (atomic_read(&cpu_doing_single_step) != -1 &&
++ atomic_read(&cpu_doing_single_step) != procid) {
++ atomic_set(&debugger_active, 0);
++ local_irq_restore(flags);
++ goto acquirelock;
++ }
++
++ /*
++ * Don't enter if we have hit a removed breakpoint.
++ */
++ if (kgdb_skipexception(ex_vector, linux_regs))
++ goto kgdb_restore;
++
++ /*
++ * Call the I/O drivers pre_exception routine
++ * if the I/O driver defined one
++ */
++ if (kgdb_io_ops.pre_exception)
++ kgdb_io_ops.pre_exception();
++
++ kgdb_info[processor].debuggerinfo = linux_regs;
++ kgdb_info[processor].task = current;
++
++ kgdb_disable_hw_debug(linux_regs);
++
++ if (!debugger_step || !kgdb_contthread)
++ for (i = 0; i < NR_CPUS; i++)
++ spin_lock(&slavecpulocks[i]);
++
++#ifdef CONFIG_SMP
++ /* Make sure we get the other CPUs */
++ if (!debugger_step || !kgdb_contthread)
++ kgdb_roundup_cpus(flags);
++#endif
++
++ /* spin_lock code is good enough as a barrier so we don't
++ * need one here */
++ atomic_set(&procindebug[processor], 1);
++
++ /* Wait a reasonable time for the other CPUs to be notified and
++ * be waiting for us. Very early on this could be imperfect
++ * as num_online_cpus() could be 0.*/
++ for (i = 0; i < ROUNDUP_WAIT; i++) {
++ int cpu, num = 0;
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ if (atomic_read(&procindebug[cpu]))
++ num++;
++ }
++ if (num >= num_online_cpus()) {
++ all_cpus_synced = 1;
++ break;
++ }
++ }
++
++ /* Clear the out buffer. */
++ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
++
++ /* Master processor is completely in the debugger */
++ kgdb_post_master_code(linux_regs, ex_vector, err_code);
++ kgdb_deactivate_sw_breakpoints();
++ debugger_step = 0;
++ kgdb_contthread = NULL;
++ exception_level = 0;
++
++ if (kgdb_connected) {
++ /* If we're still unable to roundup all of the CPUs,
++ * send an 'O' packet informing the user again. */
++ if (!all_cpus_synced)
++ kgdb_msg_write("Not all CPUs have been synced for "
++ "KGDB\n", 39);
++ /* Reply to host that an exception has occurred */
++ ptr = remcom_out_buffer;
++ *ptr++ = 'T';
++ *ptr++ = hexchars[(signo >> 4) % 16];
++ *ptr++ = hexchars[signo % 16];
++ ptr += strlen(strcpy(ptr, "thread:"));
++ int_to_threadref(&thref, shadow_pid(current->pid));
++ ptr = pack_threadid(ptr, &thref);
++ *ptr++ = ';';
++
++ put_packet(remcom_out_buffer);
++ }
++
++ kgdb_usethread = kgdb_info[processor].task;
++ kgdb_usethreadid = shadow_pid(kgdb_info[processor].task->pid);
++
++ while (kgdb_io_ops.read_char) {
++ char *bpt_type;
++ error = 0;
++
++ /* Clear the out buffer. */
++ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
++
++ get_packet(remcom_in_buffer);
++
++ switch (remcom_in_buffer[0]) {
++ case '?':
++ /* We know that this packet is only sent
++ * during initial connect. So to be safe,
++ * we clear out our breakpoints now incase
++ * GDB is reconnecting. */
++ remove_all_break();
++ /* Also, if we haven't been able to roundup all
++ * CPUs, send an 'O' packet informing the user
++ * as much. Only need to do this once. */
++ if (!all_cpus_synced)
++ kgdb_msg_write("Not all CPUs have been "
++ "synced for KGDB\n", 39);
++ remcom_out_buffer[0] = 'S';
++ remcom_out_buffer[1] = hexchars[signo >> 4];
++ remcom_out_buffer[2] = hexchars[signo % 16];
++ break;
++
++ case 'g': /* return the value of the CPU registers */
++ thread = kgdb_usethread;
++
++ if (!thread) {
++ thread = kgdb_info[processor].task;
++ local_debuggerinfo =
++ kgdb_info[processor].debuggerinfo;
++ } else {
++ local_debuggerinfo = NULL;
++ for (i = 0; i < NR_CPUS; i++) {
++ /* Try to find the task on some other
++ * or possibly this node if we do not
++ * find the matching task then we try
++ * to approximate the results.
++ */
++ if (thread == kgdb_info[i].task)
++ local_debuggerinfo =
++ kgdb_info[i].debuggerinfo;
++ }
++ }
++
++ /* All threads that don't have debuggerinfo should be
++ * in __schedule() sleeping, since all other CPUs
++ * are in kgdb_wait, and thus have debuggerinfo. */
++ if (kgdb_ops->shadowth &&
++ kgdb_usethreadid >= pid_max + num_online_cpus()) {
++ shadowregs = kgdb_shadow_regs(linux_regs,
++ kgdb_usethreadid -
++ pid_max -
++ num_online_cpus
++ ());
++ if (!shadowregs) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ regs_to_gdb_regs(gdb_regs, shadowregs);
++ } else if (local_debuggerinfo)
++ regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
++ else {
++ /* Pull stuff saved during
++ * switch_to; nothing else is
++ * accessible (or even particularly relevant).
++ * This should be enough for a stack trace. */
++ sleeping_thread_to_gdb_regs(gdb_regs, thread);
++ }
++ kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer,
++ NUMREGBYTES);
++ break;
++
++ /* set the value of the CPU registers - return OK */
++ case 'G':
++ kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs,
++ NUMREGBYTES);
++
++ if (kgdb_usethread && kgdb_usethread != current)
++ error_packet(remcom_out_buffer, -EINVAL);
++ else {
++ gdb_regs_to_regs(gdb_regs, linux_regs);
++ strcpy(remcom_out_buffer, "OK");
++ }
++ break;
++
++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
++ case 'm':
++ ptr = &remcom_in_buffer[1];
++ if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
++ kgdb_hex2long(&ptr, &length) > 0) {
++ if (IS_ERR(ptr = kgdb_mem2hex((char *)addr,
++ remcom_out_buffer,
++ length)))
++ error_packet(remcom_out_buffer,
++ PTR_ERR(ptr));
++ } else
++ error_packet(remcom_out_buffer, -EINVAL);
++ break;
++
++ /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
++ case 'M':
++ if (IS_ERR(ptr = write_mem_msg(0)))
++ error_packet(remcom_out_buffer, PTR_ERR(ptr));
++ else
++ strcpy(remcom_out_buffer, "OK");
++ break;
++ /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
++ case 'X':
++ if (IS_ERR(ptr = write_mem_msg(1)))
++ error_packet(remcom_out_buffer, PTR_ERR(ptr));
++ else
++ strcpy(remcom_out_buffer, "OK");
++ break;
++
++ /* kill or detach. KGDB should treat this like a
++ * continue.
++ */
++ case 'D':
++ if ((error = remove_all_break()) < 0) {
++ error_packet(remcom_out_buffer, error);
++ } else {
++ strcpy(remcom_out_buffer, "OK");
++ kgdb_connected = 0;
++ }
++ put_packet(remcom_out_buffer);
++ goto default_handle;
++
++ case 'k':
++ /* Don't care about error from remove_all_break */
++ remove_all_break();
++ kgdb_connected = 0;
++ goto default_handle;
++
++ /* Reboot */
++ case 'R':
++ /* For now, only honor R0 */
++ if (strcmp(remcom_in_buffer, "R0") == 0) {
++ printk(KERN_CRIT "Executing reboot\n");
++ strcpy(remcom_out_buffer, "OK");
++ put_packet(remcom_out_buffer);
++ emergency_sync();
++ /* Execution should not return from
++ * machine_restart()
++ */
++ machine_restart(NULL);
++ kgdb_connected = 0;
++ goto default_handle;
++ }
++
++ /* query */
++ case 'q':
++ switch (remcom_in_buffer[1]) {
++ case 's':
++ case 'f':
++ if (memcmp(remcom_in_buffer + 2, "ThreadInfo",
++ 10)) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++
++ /*
++ * If we have not yet completed in
++ * pidhash_init() there isn't much we
++ * can give back.
++ */
++ if (init_pid_ns.last_pid == 0) {
++ if (remcom_in_buffer[1] == 'f')
++ strcpy(remcom_out_buffer,
++ "m0000000000000001");
++ break;
++ }
++
++ if (remcom_in_buffer[1] == 'f') {
++ threadid = 1;
++ }
++ remcom_out_buffer[0] = 'm';
++ ptr = remcom_out_buffer + 1;
++ for (i = 0; i < 17 && threadid < pid_max +
++ numshadowth; threadid++) {
++ thread = getthread(linux_regs,
++ threadid);
++ if (thread) {
++ int_to_threadref(&thref,
++ threadid);
++ pack_threadid(ptr, &thref);
++ ptr += 16;
++ *(ptr++) = ',';
++ i++;
++ }
++ }
++ *(--ptr) = '\0';
++ break;
++
++ case 'C':
++ /* Current thread id */
++ strcpy(remcom_out_buffer, "QC");
++
++ threadid = shadow_pid(current->pid);
++
++ int_to_threadref(&thref, threadid);
++ pack_threadid(remcom_out_buffer + 2, &thref);
++ break;
++ case 'T':
++ if (memcmp(remcom_in_buffer + 1,
++ "ThreadExtraInfo,", 16)) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ threadid = 0;
++ ptr = remcom_in_buffer + 17;
++ kgdb_hex2long(&ptr, &threadid);
++ if (!getthread(linux_regs, threadid)) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ if (threadid < pid_max) {
++ kgdb_mem2hex(getthread(linux_regs,
++ threadid)->comm,
++ remcom_out_buffer, 16);
++ } else if (threadid >= pid_max +
++ num_online_cpus()) {
++ kgdb_shadowinfo(linux_regs,
++ remcom_out_buffer,
++ threadid - pid_max -
++ num_online_cpus());
++ } else {
++ static char tmpstr[23 +
++ BUF_THREAD_ID_SIZE];
++ sprintf(tmpstr, "Shadow task %d"
++ " for pid 0",
++ (int)(threadid - pid_max));
++ kgdb_mem2hex(tmpstr, remcom_out_buffer,
++ strlen(tmpstr));
++ }
++ break;
++ }
++ break;
++
++ /* task related */
++ case 'H':
++ switch (remcom_in_buffer[1]) {
++ case 'g':
++ ptr = &remcom_in_buffer[2];
++ kgdb_hex2long(&ptr, &threadid);
++ thread = getthread(linux_regs, threadid);
++ if (!thread && threadid > 0) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ kgdb_usethread = thread;
++ kgdb_usethreadid = threadid;
++ strcpy(remcom_out_buffer, "OK");
++ break;
++
++ case 'c':
++ ptr = &remcom_in_buffer[2];
++ kgdb_hex2long(&ptr, &threadid);
++ if (!threadid) {
++ kgdb_contthread = NULL;
++ } else {
++ thread = getthread(linux_regs,
++ threadid);
++ if (!thread && threadid > 0) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ kgdb_contthread = thread;
++ }
++ strcpy(remcom_out_buffer, "OK");
++ break;
++ }
++ break;
++
++ /* Query thread status */
++ case 'T':
++ ptr = &remcom_in_buffer[1];
++ kgdb_hex2long(&ptr, &threadid);
++ thread = getthread(linux_regs, threadid);
++ if (thread)
++ strcpy(remcom_out_buffer, "OK");
++ else
++ error_packet(remcom_out_buffer, -EINVAL);
++ break;
++ /* Since GDB-5.3, it's been drafted that '0' is a software
++ * breakpoint, '1' is a hardware breakpoint, so let's do
++ * that.
++ */
++ case 'z':
++ case 'Z':
++ bpt_type = &remcom_in_buffer[1];
++ ptr = &remcom_in_buffer[2];
++
++ if (kgdb_ops->set_hw_breakpoint && *bpt_type >= '1') {
++ /* Unsupported */
++ if (*bpt_type > '4')
++ break;
++ } else if (*bpt_type != '0' && *bpt_type != '1')
++ /* Unsupported. */
++ break;
++ /* Test if this is a hardware breakpoint, and
++ * if we support it. */
++ if (*bpt_type == '1' &&
++ !(kgdb_ops->flags & KGDB_HW_BREAKPOINT))
++ /* Unsupported. */
++ break;
++
++ if (*(ptr++) != ',') {
++ error_packet(remcom_out_buffer, -EINVAL);
++ break;
++ } else if (kgdb_hex2long(&ptr, &addr)) {
++ if (*(ptr++) != ',' ||
++ !kgdb_hex2long(&ptr, &length)) {
++ error_packet(remcom_out_buffer,
++ -EINVAL);
++ break;
++ }
++ } else {
++ error_packet(remcom_out_buffer, -EINVAL);
++ break;
++ }
++
++ if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
++ error = kgdb_set_sw_break(addr);
++ else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
++ error = kgdb_remove_sw_break(addr);
++ else if (remcom_in_buffer[0] == 'Z')
++ error = kgdb_ops->set_hw_breakpoint(addr,
++ (int)length,
++ *bpt_type);
++ else if (remcom_in_buffer[0] == 'z')
++ error = kgdb_ops->remove_hw_breakpoint(addr,
++ (int)
++ length,
++ *bpt_type);
++
++ if (error == 0)
++ strcpy(remcom_out_buffer, "OK");
++ else
++ error_packet(remcom_out_buffer, error);
++
++ break;
++ case 'c':
++ case 's':
++ if (kgdb_contthread && kgdb_contthread != current) {
++ /* Can't switch threads in kgdb */
++ error_packet(remcom_out_buffer, -EINVAL);
++ break;
++ }
++ kgdb_activate_sw_breakpoints();
++ /* Followthrough to default processing */
++ default:
++ default_handle:
++ error = kgdb_arch_handle_exception(ex_vector, signo,
++ err_code,
++ remcom_in_buffer,
++ remcom_out_buffer,
++ linux_regs);
++
++ if (error >= 0 || remcom_in_buffer[0] == 'D' ||
++ remcom_in_buffer[0] == 'k')
++ goto kgdb_exit;
++
++ } /* switch */
++
++ /* reply to the request */
++ put_packet(remcom_out_buffer);
++ }
++
++ kgdb_exit:
++ /*
++ * Call the I/O driver's post_exception routine
++ * if the I/O driver defined one.
++ */
++ if (kgdb_io_ops.post_exception)
++ kgdb_io_ops.post_exception();
++
++ kgdb_info[processor].debuggerinfo = NULL;
++ kgdb_info[processor].task = NULL;
++ atomic_set(&procindebug[processor], 0);
++
++ if (!debugger_step || !kgdb_contthread) {
++ for (i = 0; i < NR_CPUS; i++)
++ spin_unlock(&slavecpulocks[i]);
++ /* Wait till all the processors have quit
++ * from the debugger. */
++ for (i = 0; i < NR_CPUS; i++) {
++ while (atomic_read(&procindebug[i])) {
++ int j = 10; /* an arbitrary number */
++
++ while (--j)
++ cpu_relax();
++ }
++ }
++ }
++
++#ifdef CONFIG_SMP
++ /* This delay has a real purpose. The problem is that if you
++ * are single-stepping, you are sending an NMI to all the
++ * other processors to stop them. Interrupts come in, but
++ * don't get handled. Then you let them go just long enough
++ * to get into their interrupt routines and use up some stack.
++ * You stop them again, and then do the same thing. After a
++ * while you blow the stack on the other processors. This
++ * delay gives some time for interrupts to be cleared out on
++ * the other processors.
++ */
++ if (debugger_step)
++ mdelay(2);
++#endif
++ kgdb_restore:
++ /* Free debugger_active */
++ atomic_set(&debugger_active, 0);
++ local_irq_restore(flags);
++
++ return error;
++}
++
++/*
++ * GDB places a breakpoint at this function to know dynamically
++ * loaded objects. It's not defined static so that only one instance with this
++ * name exists in the kernel.
++ */
++
++int module_event(struct notifier_block *self, unsigned long val, void *data)
++{
++ return 0;
++}
++
++static struct notifier_block kgdb_module_load_nb = {
++ .notifier_call = module_event,
++};
++
++void kgdb_nmihook(int cpu, void *regs)
++{
++#ifdef CONFIG_SMP
++ if (!atomic_read(&procindebug[cpu]) && atomic_read(&debugger_active) != (cpu + 1))
++ kgdb_wait((struct pt_regs *)regs);
++#endif
++}
++
++/*
++ * This is called when a panic happens. All we need to do is
++ * breakpoint().
++ */
++static int kgdb_panic_notify(struct notifier_block *self, unsigned long cmd,
++ void *ptr)
++{
++ breakpoint();
++
++ return 0;
++}
++
++static struct notifier_block kgdb_panic_notifier = {
++ .notifier_call = kgdb_panic_notify,
++};
++
++/*
++ * Initialization that needs to be done in either of our entry points.
++ */
++static void __init kgdb_internal_init(void)
++{
++ int i;
++
++ /* Initialize our spinlocks. */
++ for (i = 0; i < NR_CPUS; i++)
++ spin_lock_init(&slavecpulocks[i]);
++
++ for (i = 0; i < MAX_BREAKPOINTS; i++)
++ kgdb_break[i].state = bp_none;
++
++ /* Initialize the I/O handles */
++ memset(&kgdb_io_ops_prev, 0, sizeof(kgdb_io_ops_prev));
++
++ /* We can't do much if this fails */
++ register_module_notifier(&kgdb_module_load_nb);
++
++ kgdb_initialized = 1;
++}
++
++static void kgdb_register_for_panic(void)
++{
++ /* Register for panics(). */
++ /* The registration is done in the kgdb_register_for_panic
++ * routine because KGDB should not try to handle a panic when
++ * there are no kgdb_io_ops setup. It is assumed that the
++ * kgdb_io_ops are setup at the time this method is called.
++ */
++ if (!kgdb_from_module_registered) {
++ atomic_notifier_chain_register(&panic_notifier_list,
++ &kgdb_panic_notifier);
++ kgdb_from_module_registered = 1;
++ }
++}
++
++static void kgdb_unregister_for_panic(void)
++{
++ /* When this routine is called KGDB should unregister from the
++ * panic handler and clean up, making sure it is not handling any
++ * break exceptions at the time.
++ */
++ if (kgdb_from_module_registered) {
++ kgdb_from_module_registered = 0;
++ atomic_notifier_chain_unregister(&panic_notifier_list,
++ &kgdb_panic_notifier);
++ }
++}
++
++int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops)
++{
++
++ if (kgdb_connected) {
++ printk(KERN_ERR "kgdb: Cannot load I/O module while KGDB "
++ "connected.\n");
++ return -EINVAL;
++ }
++
++ /* Save the old values so they can be restored */
++ if (kgdb_io_handler_cnt >= MAX_KGDB_IO_HANDLERS) {
++ printk(KERN_ERR "kgdb: No more I/O handles available.\n");
++ return -EINVAL;
++ }
++
++ /* Check to see if there is an existing driver and if so save its
++ * values. Also check to make sure the same driver was not trying
++ * to re-register.
++ */
++ if (kgdb_io_ops.read_char != NULL &&
++ kgdb_io_ops.read_char != local_kgdb_io_ops->read_char) {
++ memcpy(&kgdb_io_ops_prev[kgdb_io_handler_cnt],
++ &kgdb_io_ops, sizeof(struct kgdb_io));
++ kgdb_io_handler_cnt++;
++ }
++
++ /* Initialize the io values for this module */
++ memcpy(&kgdb_io_ops, local_kgdb_io_ops, sizeof(struct kgdb_io));
++
++ /* Make the call to register kgdb if is not initialized */
++ kgdb_register_for_panic();
++
++ return 0;
++}
++
++void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops)
++{
++ int i;
++
++ /* Unregister KGDB if there were no other prior io hooks, else
++ * restore the io hooks.
++ */
++ if (kgdb_io_handler_cnt > 0 && kgdb_io_ops_prev[0].read_char != NULL) {
++ /* First check if the hook that is in use is the one being
++ * removed */
++ if (kgdb_io_ops.read_char == local_kgdb_io_ops->read_char) {
++ /* Set 'i' to the value of where the list should be
++ * shifed */
++ i = kgdb_io_handler_cnt - 1;
++ memcpy(&kgdb_io_ops, &kgdb_io_ops_prev[i],
++ sizeof(struct kgdb_io));
++ } else {
++ /* Simple case to remove an entry for an I/O handler
++ * that is not in use */
++ for (i = 0; i < kgdb_io_handler_cnt; i++) {
++ if (kgdb_io_ops_prev[i].read_char ==
++ local_kgdb_io_ops->read_char)
++ break;
++ }
++ }
++
++ /* Shift all the entries in the handler array so it is
++ * ordered from oldest to newest.
++ */
++ kgdb_io_handler_cnt--;
++ for (; i < kgdb_io_handler_cnt; i++) {
++ memcpy(&kgdb_io_ops_prev[i], &kgdb_io_ops_prev[i + 1],
++ sizeof(struct kgdb_io));
++ }
++ /* Handle the case if we are on the last element and set it
++ * to NULL; */
++ memset(&kgdb_io_ops_prev[kgdb_io_handler_cnt], 0,
++ sizeof(struct kgdb_io));
++
++ if (kgdb_connected)
++ printk(KERN_ERR "kgdb: WARNING: I/O method changed "
++ "while kgdb was connected state.\n");
++ } else {
++ /* KGDB is no longer able to communicate out, so
++ * unregister our hooks and reset state. */
++ kgdb_unregister_for_panic();
++ if (kgdb_connected) {
++ printk(KERN_CRIT "kgdb: I/O module was unloaded while "
++ "a debugging session was running. "
++ "KGDB will be reset.\n");
++ if (remove_all_break() < 0)
++ printk(KERN_CRIT "kgdb: Reset failed.\n");
++ kgdb_connected = 0;
++ }
++ memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io));
++ }
++}
++
++/*
++ * There are times we need to call a tasklet to cause a breakpoint
++ * as calling breakpoint() at that point might be fatal. We have to
++ * check that the exception stack is setup, as tasklets may be scheduled
++ * prior to this. When that happens, it is up to the architecture to
++ * schedule this when it is safe to run.
++ */
++static void kgdb_tasklet_bpt(unsigned long ing)
++{
++ if (CHECK_EXCEPTION_STACK())
++ breakpoint();
++}
++
++DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0);
++
++/*
++ * This function can be called very early, either via early_param() or
++ * an explicit breakpoint() early on.
++ */
++static void __init kgdb_early_entry(void)
++{
++ /* Let the architecture do any setup that it needs to. */
++ kgdb_arch_init();
++
++ /*
++ * Don't try and do anything until the architecture is able to
++ * setup the exception stack. In this case, it is up to the
++ * architecture to hook in and look at us when they are ready.
++ */
++
++ if (!CHECK_EXCEPTION_STACK()) {
++ kgdb_initialized = -1;
++ /* any kind of break point is deferred to late_init */
++ return;
++ }
++
++ /* Now try the I/O. */
++ /* For early entry kgdb_io_ops.init must be defined */
++ if (!kgdb_io_ops.init || kgdb_io_ops.init()) {
++ /* Try again later. */
++ kgdb_initialized = -1;
++ return;
++ }
++
++ /* Finish up. */
++ kgdb_internal_init();
++
++ /* KGDB can assume that if kgdb_io_ops.init was defined that the
++ * panic registion should be performed at this time. This means
++ * kgdb_io_ops.init did not come from a kernel module and was
++ * initialized statically by a built in.
++ */
++ if (kgdb_io_ops.init)
++ kgdb_register_for_panic();
++}
++
++/*
++ * This function will always be invoked to make sure that KGDB will grab
++ * what it needs to so that if something happens while the system is
++ * running, KGDB will get involved. If kgdb_early_entry() has already
++ * been invoked, there is little we need to do.
++ */
++static int __init kgdb_late_entry(void)
++{
++ int need_break = 0;
++
++ /* If kgdb_initialized is -1 then we were passed kgdbwait. */
++ if (kgdb_initialized == -1)
++ need_break = 1;
++
++ /*
++ * If we haven't tried to initialize KGDB yet, we need to call
++ * kgdb_arch_init before moving onto the I/O.
++ */
++ if (!kgdb_initialized)
++ kgdb_arch_init();
++
++ if (kgdb_initialized != 1) {
++ if (kgdb_io_ops.init && kgdb_io_ops.init()) {
++ /* When KGDB allows I/O via modules and the core
++ * I/O init fails KGDB must default to defering the
++ * I/O setup, and appropriately print an error about
++ * it.
++ */
++ printk(KERN_ERR "kgdb: Could not setup core I/O "
++ "for KGDB.\n");
++ printk(KERN_INFO "kgdb: Defering I/O setup to kernel "
++ "module.\n");
++ memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io));
++ }
++
++ kgdb_internal_init();
++
++ /* KGDB can assume that if kgdb_io_ops.init was defined that
++ * panic registion should be performed at this time. This means
++ * kgdb_io_ops.init did not come from a kernel module and was
++ * initialized statically by a built in.
++ */
++ if (kgdb_io_ops.init)
++ kgdb_register_for_panic();
++ }
++
++ /* Registering to reboot notifier list*/
++ register_reboot_notifier(&kgdb_reboot_notifier);
++
++ /* Now do any late init of the I/O. */
++ if (kgdb_io_ops.late_init)
++ kgdb_io_ops.late_init();
++
++ if (need_break) {
++ printk(KERN_CRIT "kgdb: Waiting for connection from remote"
++ " gdb...\n");
++ breakpoint();
++ }
++
++ return 0;
++}
++
++late_initcall(kgdb_late_entry);
++
++/*
++ * This function will generate a breakpoint exception. It is used at the
++ * beginning of a program to sync up with a debugger and can be used
++ * otherwise as a quick means to stop program execution and "break" into
++ * the debugger.
++ */
++void breakpoint(void)
++{
++ atomic_set(&kgdb_setting_breakpoint, 1);
++ wmb();
++ BREAKPOINT();
++ wmb();
++ atomic_set(&kgdb_setting_breakpoint, 0);
++}
++
++EXPORT_SYMBOL(breakpoint);
++
++#ifdef CONFIG_MAGIC_SYSRQ
++static void sysrq_handle_gdb(int key, struct tty_struct *tty)
++{
++ printk("Entering GDB stub\n");
++ breakpoint();
++}
++static struct sysrq_key_op sysrq_gdb_op = {
++ .handler = sysrq_handle_gdb,
++ .help_msg = "Gdb",
++ .action_msg = "GDB",
++};
++
++static int gdb_register_sysrq(void)
++{
++ printk("Registering GDB sysrq handler\n");
++ register_sysrq_key('g', &sysrq_gdb_op);
++ return 0;
++}
++
++module_init(gdb_register_sysrq);
++#endif
++
++static int kgdb_notify_reboot(struct notifier_block *this,
++ unsigned long code, void *x)
++{
++
++ unsigned long flags;
++
++ /* If we're debugging, or KGDB has not connected, don't try
++ * and print. */
++ if (!kgdb_connected || atomic_read(&debugger_active) != 0)
++ return 0;
++ if ((code == SYS_RESTART) || (code == SYS_HALT) || (code == SYS_POWER_OFF)){
++ local_irq_save(flags);
++ put_packet("X00");
++ local_irq_restore(flags);
++ }
++ return NOTIFY_DONE;
++}
++
++#ifdef CONFIG_KGDB_CONSOLE
++void kgdb_console_write(struct console *co, const char *s, unsigned count)
++{
++ unsigned long flags;
++
++ /* If we're debugging, or KGDB has not connected, don't try
++ * and print. */
++ if (!kgdb_connected || atomic_read(&debugger_active) != 0)
++ return;
++
++ local_irq_save(flags);
++ kgdb_msg_write(s, count);
++ local_irq_restore(flags);
++}
++
++struct console kgdbcons = {
++ .name = "kgdb",
++ .write = kgdb_console_write,
++ .flags = CON_PRINTBUFFER | CON_ENABLED,
++};
++static int __init kgdb_console_init(void)
++{
++ register_console(&kgdbcons);
++ return 0;
++}
++
++console_initcall(kgdb_console_init);
++#endif
++
++static int __init opt_kgdb_enter(char *str)
++{
++ /* We've already done this by an explicit breakpoint() call. */
++ if (kgdb_initialized)
++ return 0;
++
++ kgdb_early_entry();
++ if (kgdb_initialized == 1)
++ printk(KERN_CRIT "Waiting for connection from remote "
++ "gdb...\n");
++ else {
++ printk(KERN_CRIT "KGDB cannot initialize I/O yet.\n");
++ return 0;
++ }
++
++ breakpoint();
++
++ return 0;
++}
++
++early_param("kgdbwait", opt_kgdb_enter);
+diff -Nurb linux-2.6.22-570/kernel/kmod.c linux-2.6.22-591/kernel/kmod.c
+--- linux-2.6.22-570/kernel/kmod.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/kmod.c 2007-12-21 15:36:12.000000000 -0500
+@@ -119,9 +119,10 @@
+ char **argv;
+ char **envp;
+ struct key *ring;
+- int wait;
++ enum umh_wait wait;
+ int retval;
+ struct file *stdin;
++ void (*cleanup)(char **argv, char **envp);
+ };
+
+ /*
+@@ -180,6 +181,14 @@
+ do_exit(0);
+ }
+
++void call_usermodehelper_freeinfo(struct subprocess_info *info)
++{
++ if (info->cleanup)
++ (*info->cleanup)(info->argv, info->envp);
++ kfree(info);
++}
++EXPORT_SYMBOL(call_usermodehelper_freeinfo);
++
+ /* Keventd can't block, but this (a child) can. */
+ static int wait_for_helper(void *data)
+ {
+@@ -216,8 +225,8 @@
+ sub_info->retval = ret;
+ }
+
+- if (sub_info->wait < 0)
+- kfree(sub_info);
++ if (sub_info->wait == UMH_NO_WAIT)
++ call_usermodehelper_freeinfo(sub_info);
+ else
+ complete(sub_info->complete);
+ return 0;
+@@ -229,101 +238,102 @@
+ struct subprocess_info *sub_info =
+ container_of(work, struct subprocess_info, work);
+ pid_t pid;
+- int wait = sub_info->wait;
++ enum umh_wait wait = sub_info->wait;
+
+ /* CLONE_VFORK: wait until the usermode helper has execve'd
+ * successfully We need the data structures to stay around
+ * until that is done. */
+- if (wait)
++ if (wait == UMH_WAIT_PROC)
+ pid = kernel_thread(wait_for_helper, sub_info,
+ CLONE_FS | CLONE_FILES | SIGCHLD);
+ else
+ pid = kernel_thread(____call_usermodehelper, sub_info,
+ CLONE_VFORK | SIGCHLD);
+
+- if (wait < 0)
+- return;
+-
+- if (pid < 0) {
++ switch(wait) {
++ case UMH_NO_WAIT:
++ break;
++
++ case UMH_WAIT_PROC:
++ if (pid > 0)
++ break;
+ sub_info->retval = pid;
++ /* FALLTHROUGH */
++
++ case UMH_WAIT_EXEC:
+ complete(sub_info->complete);
+- } else if (!wait)
+- complete(sub_info->complete);
++ }
+ }
+
+ /**
+- * call_usermodehelper_keys - start a usermode application
+- * @path: pathname for the application
+- * @argv: null-terminated argument list
+- * @envp: null-terminated environment list
+- * @session_keyring: session keyring for process (NULL for an empty keyring)
+- * @wait: wait for the application to finish and return status.
+- * when -1 don't wait at all, but you get no useful error back when
+- * the program couldn't be exec'ed. This makes it safe to call
+- * from interrupt context.
++ * call_usermodehelper_setup - prepare to call a usermode helper
++ * @path - path to usermode executable
++ * @argv - arg vector for process
++ * @envp - environment for process
+ *
+- * Runs a user-space application. The application is started
+- * asynchronously if wait is not set, and runs as a child of keventd.
+- * (ie. it runs with full root capabilities).
+- *
+- * Must be called from process context. Returns a negative error code
+- * if program was not execed successfully, or 0.
++ * Returns either NULL on allocation failure, or a subprocess_info
++ * structure. This should be passed to call_usermodehelper_exec to
++ * exec the process and free the structure.
+ */
+-int call_usermodehelper_keys(char *path, char **argv, char **envp,
+- struct key *session_keyring, int wait)
++struct subprocess_info *call_usermodehelper_setup(char *path,
++ char **argv, char **envp)
+ {
+- DECLARE_COMPLETION_ONSTACK(done);
+ struct subprocess_info *sub_info;
+- int retval;
+-
+- if (!khelper_wq)
+- return -EBUSY;
+-
+- if (path[0] == '\0')
+- return 0;
+-
+ sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
+ if (!sub_info)
+- return -ENOMEM;
++ goto out;
+
+ INIT_WORK(&sub_info->work, __call_usermodehelper);
+- sub_info->complete = &done;
+ sub_info->path = path;
+ sub_info->argv = argv;
+ sub_info->envp = envp;
+- sub_info->ring = session_keyring;
+- sub_info->wait = wait;
+
+- queue_work(khelper_wq, &sub_info->work);
+- if (wait < 0) /* task has freed sub_info */
+- return 0;
+- wait_for_completion(&done);
+- retval = sub_info->retval;
+- kfree(sub_info);
+- return retval;
++ out:
++ return sub_info;
+ }
+-EXPORT_SYMBOL(call_usermodehelper_keys);
++EXPORT_SYMBOL(call_usermodehelper_setup);
+
+-int call_usermodehelper_pipe(char *path, char **argv, char **envp,
+- struct file **filp)
++/**
++ * call_usermodehelper_setkeys - set the session keys for usermode helper
++ * @info: a subprocess_info returned by call_usermodehelper_setup
++ * @session_keyring: the session keyring for the process
++ */
++void call_usermodehelper_setkeys(struct subprocess_info *info,
++ struct key *session_keyring)
+ {
+- DECLARE_COMPLETION(done);
+- struct subprocess_info sub_info = {
+- .work = __WORK_INITIALIZER(sub_info.work,
+- __call_usermodehelper),
+- .complete = &done,
+- .path = path,
+- .argv = argv,
+- .envp = envp,
+- .retval = 0,
+- };
+- struct file *f;
++ info->ring = session_keyring;
++}
++EXPORT_SYMBOL(call_usermodehelper_setkeys);
+
+- if (!khelper_wq)
+- return -EBUSY;
++/**
++ * call_usermodehelper_setcleanup - set a cleanup function
++ * @info: a subprocess_info returned by call_usermodehelper_setup
++ * @cleanup: a cleanup function
++ *
++ * The cleanup function is just befor ethe subprocess_info is about to
++ * be freed. This can be used for freeing the argv and envp. The
++ * Function must be runnable in either a process context or the
++ * context in which call_usermodehelper_exec is called.
++ */
++void call_usermodehelper_setcleanup(struct subprocess_info *info,
++ void (*cleanup)(char **argv, char **envp))
++{
++ info->cleanup = cleanup;
++}
++EXPORT_SYMBOL(call_usermodehelper_setcleanup);
+
+- if (path[0] == '\0')
+- return 0;
++/**
++ * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
++ * @sub_info: a subprocess_info returned by call_usermodehelper_setup
++ * @filp: set to the write-end of a pipe
++ *
++ * This constructs a pipe, and sets the read end to be the stdin of the
++ * subprocess, and returns the write-end in *@filp.
++ */
++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
++ struct file **filp)
++{
++ struct file *f;
+
+ f = create_write_pipe();
+ if (IS_ERR(f))
+@@ -335,11 +345,85 @@
+ free_write_pipe(*filp);
+ return PTR_ERR(f);
+ }
+- sub_info.stdin = f;
++ sub_info->stdin = f;
++
++ return 0;
++}
++EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
+
+- queue_work(khelper_wq, &sub_info.work);
++/**
++ * call_usermodehelper_exec - start a usermode application
++ * @sub_info: information about the subprocessa
++ * @wait: wait for the application to finish and return status.
++ * when -1 don't wait at all, but you get no useful error back when
++ * the program couldn't be exec'ed. This makes it safe to call
++ * from interrupt context.
++ *
++ * Runs a user-space application. The application is started
++ * asynchronously if wait is not set, and runs as a child of keventd.
++ * (ie. it runs with full root capabilities).
++ */
++int call_usermodehelper_exec(struct subprocess_info *sub_info,
++ enum umh_wait wait)
++{
++ DECLARE_COMPLETION_ONSTACK(done);
++ int retval;
++
++ if (sub_info->path[0] == '\0') {
++ retval = 0;
++ goto out;
++ }
++
++ if (!khelper_wq) {
++ retval = -EBUSY;
++ goto out;
++ }
++
++ sub_info->complete = &done;
++ sub_info->wait = wait;
++
++ queue_work(khelper_wq, &sub_info->work);
++ if (wait == UMH_NO_WAIT) /* task has freed sub_info */
++ return 0;
+ wait_for_completion(&done);
+- return sub_info.retval;
++ retval = sub_info->retval;
++
++ out:
++ call_usermodehelper_freeinfo(sub_info);
++ return retval;
++}
++EXPORT_SYMBOL(call_usermodehelper_exec);
++
++/**
++ * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
++ * @path: path to usermode executable
++ * @argv: arg vector for process
++ * @envp: environment for process
++ * @filp: set to the write-end of a pipe
++ *
++ * This is a simple wrapper which executes a usermode-helper function
++ * with a pipe as stdin. It is implemented entirely in terms of
++ * lower-level call_usermodehelper_* functions.
++ */
++int call_usermodehelper_pipe(char *path, char **argv, char **envp,
++ struct file **filp)
++{
++ struct subprocess_info *sub_info;
++ int ret;
++
++ sub_info = call_usermodehelper_setup(path, argv, envp);
++ if (sub_info == NULL)
++ return -ENOMEM;
++
++ ret = call_usermodehelper_stdinpipe(sub_info, filp);
++ if (ret < 0)
++ goto out;
++
++ return call_usermodehelper_exec(sub_info, 1);
++
++ out:
++ call_usermodehelper_freeinfo(sub_info);
++ return ret;
+ }
+ EXPORT_SYMBOL(call_usermodehelper_pipe);
+
+diff -Nurb linux-2.6.22-570/kernel/module.c linux-2.6.22-591/kernel/module.c
+--- linux-2.6.22-570/kernel/module.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/module.c 2007-12-21 15:36:12.000000000 -0500
+@@ -67,6 +67,7 @@
+ /* List of modules, protected by module_mutex AND modlist_lock */
+ static DEFINE_MUTEX(module_mutex);
+ static LIST_HEAD(modules);
++static DECLARE_MUTEX(notify_mutex);
+
+ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
+
+@@ -488,8 +489,7 @@
+ mod->field = NULL; \
+ } \
+ static struct module_attribute modinfo_##field = { \
+- .attr = { .name = __stringify(field), .mode = 0444, \
+- .owner = THIS_MODULE }, \
++ .attr = { .name = __stringify(field), .mode = 0444 }, \
+ .show = show_modinfo_##field, \
+ .setup = setup_modinfo_##field, \
+ .test = modinfo_##field##_exists, \
+@@ -713,6 +713,12 @@
+ if (ret != 0)
+ goto out;
+
++ down(¬ify_mutex);
++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING,
++ mod);
++ up(¬ify_mutex);
++
++
+ /* Never wait if forced. */
+ if (!forced && module_refcount(mod) != 0)
+ wait_for_zero_refcount(mod);
+@@ -725,6 +731,11 @@
+ }
+ free_module(mod);
+
++ down(¬ify_mutex);
++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GONE,
++ NULL);
++ up(¬ify_mutex);
++
+ out:
+ mutex_unlock(&module_mutex);
+ return ret;
+@@ -793,7 +804,7 @@
+ }
+
+ static struct module_attribute refcnt = {
+- .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE },
++ .attr = { .name = "refcnt", .mode = 0444 },
+ .show = show_refcnt,
+ };
+
+@@ -846,12 +857,15 @@
+ case MODULE_STATE_GOING:
+ state = "going";
+ break;
++ case MODULE_STATE_GONE:
++ state = "gone";
++ break;
+ }
+ return sprintf(buffer, "%s\n", state);
+ }
+
+ static struct module_attribute initstate = {
+- .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE },
++ .attr = { .name = "initstate", .mode = 0444 },
+ .show = show_initstate,
+ };
+
+@@ -1032,7 +1046,6 @@
+ sattr->mattr.show = module_sect_show;
+ sattr->mattr.store = NULL;
+ sattr->mattr.attr.name = sattr->name;
+- sattr->mattr.attr.owner = mod;
+ sattr->mattr.attr.mode = S_IRUGO;
+ *(gattr++) = &(sattr++)->mattr.attr;
+ }
+@@ -1090,7 +1103,6 @@
+ if (!attr->test ||
+ (attr->test && attr->test(mod))) {
+ memcpy(temp_attr, attr, sizeof(*temp_attr));
+- temp_attr->attr.owner = mod;
+ error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+ ++temp_attr;
+ }
+@@ -1212,6 +1224,11 @@
+ /* Arch-specific cleanup. */
+ module_arch_cleanup(mod);
+
++#ifdef CONFIG_KGDB
++ /* kgdb info */
++ vfree(mod->mod_sections);
++#endif
++
+ /* Module unload stuff */
+ module_unload_free(mod);
+
+@@ -1471,6 +1488,31 @@
+ }
+ }
+
++#ifdef CONFIG_KGDB
++int add_modsects (struct module *mod, Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const
++ char *secstrings)
++{
++ int i;
++
++ mod->num_sections = hdr->e_shnum - 1;
++ mod->mod_sections = vmalloc((hdr->e_shnum - 1)*
++ sizeof (struct mod_section));
++
++ if (mod->mod_sections == NULL) {
++ return -ENOMEM;
++ }
++
++ for (i = 1; i < hdr->e_shnum; i++) {
++ mod->mod_sections[i - 1].address = (void *)sechdrs[i].sh_addr;
++ strncpy(mod->mod_sections[i - 1].name, secstrings +
++ sechdrs[i].sh_name, MAX_SECTNAME);
++ mod->mod_sections[i - 1].name[MAX_SECTNAME] = '\0';
++ }
++
++ return 0;
++}
++#endif
++
+ #ifdef CONFIG_KALLSYMS
+ static int is_exported(const char *name, const struct module *mod)
+ {
+@@ -1886,6 +1928,12 @@
+
+ add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+
++#ifdef CONFIG_KGDB
++ if ((err = add_modsects(mod, hdr, sechdrs, secstrings)) < 0) {
++ goto nomodsectinfo;
++ }
++#endif
++
+ err = module_finalize(hdr, sechdrs, mod);
+ if (err < 0)
+ goto cleanup;
+@@ -1946,6 +1994,11 @@
+ arch_cleanup:
+ module_arch_cleanup(mod);
+ cleanup:
++
++#ifdef CONFIG_KGDB
++nomodsectinfo:
++ vfree(mod->mod_sections);
++#endif
+ module_unload_free(mod);
+ module_free(mod, mod->module_init);
+ free_core:
+@@ -2017,6 +2070,10 @@
+ /* Init routine failed: abort. Try to protect us from
+ buggy refcounters. */
+ mod->state = MODULE_STATE_GOING;
++ down(¬ify_mutex);
++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING,
++ mod);
++ up(¬ify_mutex);
+ synchronize_sched();
+ if (mod->unsafe)
+ printk(KERN_ERR "%s: module is now stuck!\n",
+diff -Nurb linux-2.6.22-570/kernel/ns_container.c linux-2.6.22-591/kernel/ns_container.c
+--- linux-2.6.22-570/kernel/ns_container.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/ns_container.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,99 @@
++/*
++ * ns_container.c - namespace container subsystem
++ *
++ * Copyright 2006, 2007 IBM Corp
++ */
++
++#include <linux/module.h>
++#include <linux/container.h>
++#include <linux/fs.h>
++
++struct ns_container {
++ struct container_subsys_state css;
++ spinlock_t lock;
++};
++
++struct container_subsys ns_subsys;
++
++static inline struct ns_container *container_to_ns(
++ struct container *container)
++{
++ return container_of(container_subsys_state(container, ns_subsys_id),
++ struct ns_container, css);
++}
++
++int ns_container_clone(struct task_struct *task)
++{
++ return container_clone(task, &ns_subsys);
++}
++
++/*
++ * Rules:
++ * 1. you can only enter a container which is a child of your current
++ * container
++ * 2. you can only place another process into a container if
++ * a. you have CAP_SYS_ADMIN
++ * b. your container is an ancestor of task's destination container
++ * (hence either you are in the same container as task, or in an
++ * ancestor container thereof)
++ */
++static int ns_can_attach(struct container_subsys *ss,
++ struct container *new_container, struct task_struct *task)
++{
++ struct container *orig;
++
++ if (current != task) {
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ if (!container_is_descendant(new_container))
++ return -EPERM;
++ }
++
++ if (atomic_read(&new_container->count) != 0)
++ return -EPERM;
++
++ orig = task_container(task, ns_subsys_id);
++ if (orig && orig != new_container->parent)
++ return -EPERM;
++
++ return 0;
++}
++
++/*
++ * Rules: you can only create a container if
++ * 1. you are capable(CAP_SYS_ADMIN)
++ * 2. the target container is a descendant of your own container
++ */
++static int ns_create(struct container_subsys *ss, struct container *container)
++{
++ struct ns_container *ns_container;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++ if (!container_is_descendant(container))
++ return -EPERM;
++
++ ns_container = kzalloc(sizeof(*ns_container), GFP_KERNEL);
++ if (!ns_container) return -ENOMEM;
++ spin_lock_init(&ns_container->lock);
++ container->subsys[ns_subsys.subsys_id] = &ns_container->css;
++ return 0;
++}
++
++static void ns_destroy(struct container_subsys *ss,
++ struct container *container)
++{
++ struct ns_container *ns_container;
++
++ ns_container = container_to_ns(container);
++ kfree(ns_container);
++}
++
++struct container_subsys ns_subsys = {
++ .name = "ns",
++ .can_attach = ns_can_attach,
++ .create = ns_create,
++ .destroy = ns_destroy,
++ .subsys_id = ns_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/nsproxy.c linux-2.6.22-591/kernel/nsproxy.c
+--- linux-2.6.22-570/kernel/nsproxy.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/nsproxy.c 2007-12-21 15:36:15.000000000 -0500
+@@ -19,10 +19,13 @@
+ #include <linux/init_task.h>
+ #include <linux/mnt_namespace.h>
+ #include <linux/utsname.h>
++#include <net/net_namespace.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/vserver/global.h>
+ #include <linux/vserver/debug.h>
+
++static struct kmem_cache *nsproxy_cachep;
++
+ struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+
+ void get_task_namespaces(struct task_struct *tsk)
+@@ -58,6 +61,7 @@
+ struct fs_struct *new_fs)
+ {
+ struct nsproxy *new_nsp;
++ int err = -ENOMEM;
+
+ vxdprintk(VXD_CBIT(space, 4),
+ "unshare_namespaces(0x%08x,%p,%p)",
+@@ -83,8 +87,24 @@
+ if (IS_ERR(new_nsp->pid_ns))
+ goto out_pid;
+
++ new_nsp->user_ns = copy_user_ns(flags, orig->user_ns);
++ if (IS_ERR(new_nsp->user_ns))
++ goto out_user;
++
++ new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
++ if (IS_ERR(new_nsp->net_ns))
++ goto out_net;
++
+ return new_nsp;
+
++out_net:
++ if (new_nsp->user_ns)
++ put_user_ns(new_nsp->user_ns);
++ if (new_nsp->net_ns)
++ put_net(new_nsp->net_ns);
++out_user:
++ if (new_nsp->pid_ns)
++ put_pid_ns(new_nsp->pid_ns);
+ out_pid:
+ if (new_nsp->ipc_ns)
+ put_ipc_ns(new_nsp->ipc_ns);
+@@ -95,11 +115,11 @@
+ if (new_nsp->mnt_ns)
+ put_mnt_ns(new_nsp->mnt_ns);
+ out_ns:
+- kfree(new_nsp);
+- return ERR_PTR(-ENOMEM);
++ kmem_cache_free(nsproxy_cachep, new_nsp);
++ return ERR_PTR(err);
+ }
+
+-static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk,
++static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk,
+ struct fs_struct *new_fs)
+ {
+ return unshare_namespaces(flags, tsk->nsproxy, new_fs);
+@@ -130,7 +150,7 @@
+ * called from clone. This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+-int copy_namespaces(int flags, struct task_struct *tsk)
++int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+ {
+ struct nsproxy *old_ns = tsk->nsproxy;
+ struct nsproxy *new_ns = NULL;
+@@ -143,10 +163,17 @@
+ return 0;
+
+ get_nsproxy(old_ns);
++ return 0;
+
+- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
++ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+ return 0;
+
++ #ifndef CONFIG_NET_NS
++ if (unshare_flags & CLONE_NEWNET)
++ return -EINVAL;
++ #endif
++
++
+ if (!capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+@@ -158,7 +185,14 @@
+ goto out;
+ }
+
++ err = ns_container_clone(tsk);
++ if (err) {
++ put_nsproxy(new_ns);
++ goto out;
++ }
++
+ tsk->nsproxy = new_ns;
++
+ out:
+ put_nsproxy(old_ns);
+ vxdprintk(VXD_CBIT(space, 3),
+@@ -194,25 +228,37 @@
+ "unshare_nsproxy_namespaces(0x%08lx,[%p])",
+ unshare_flags, current->nsproxy);
+
+- if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
++ if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
++ CLONE_NEWUSER | CLONE_NEWNET)))
+ return 0;
+
+-#ifndef CONFIG_IPC_NS
+- if (unshare_flags & CLONE_NEWIPC)
++#ifndef CONFIG_NET_NS
++ if (unshare_flags & CLONE_NEWNET)
+ return -EINVAL;
+ #endif
+-
+-#ifndef CONFIG_UTS_NS
+- if (unshare_flags & CLONE_NEWUTS)
+- return -EINVAL;
+-#endif
+-
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *new_nsp = create_new_namespaces(unshare_flags, current,
+ new_fs ? new_fs : current->fs);
+- if (IS_ERR(*new_nsp))
++ if (IS_ERR(*new_nsp)) {
+ err = PTR_ERR(*new_nsp);
++ goto out;
++ }
++
++ err = ns_container_clone(current);
++ if (err)
++ put_nsproxy(*new_nsp);
++
++out:
+ return err;
+ }
++
++static int __init nsproxy_cache_init(void)
++{
++ nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
++ 0, SLAB_PANIC, NULL, NULL);
++ return 0;
++}
++
++module_init(nsproxy_cache_init);
+diff -Nurb linux-2.6.22-570/kernel/params.c linux-2.6.22-591/kernel/params.c
+--- linux-2.6.22-570/kernel/params.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/kernel/params.c 2007-12-21 15:36:12.000000000 -0500
+@@ -491,7 +491,6 @@
+ pattr->mattr.show = param_attr_show;
+ pattr->mattr.store = param_attr_store;
+ pattr->mattr.attr.name = (char *)&kp->name[name_skip];
+- pattr->mattr.attr.owner = mk->mod;
+ pattr->mattr.attr.mode = kp->perm;
+ *(gattr++) = &(pattr++)->mattr.attr;
+ }
+diff -Nurb linux-2.6.22-570/kernel/pid.c linux-2.6.22-591/kernel/pid.c
+--- linux-2.6.22-570/kernel/pid.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/pid.c 2007-12-21 15:36:12.000000000 -0500
+@@ -379,7 +379,7 @@
+ }
+ EXPORT_SYMBOL_GPL(find_get_pid);
+
+-struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns)
++struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+ {
+ BUG_ON(!old_ns);
+ get_pid_ns(old_ns);
+diff -Nurb linux-2.6.22-570/kernel/ptrace.c linux-2.6.22-591/kernel/ptrace.c
+--- linux-2.6.22-570/kernel/ptrace.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/ptrace.c 2007-12-21 15:36:12.000000000 -0500
+@@ -143,7 +143,7 @@
+ return -EPERM;
+ smp_rmb();
+ if (task->mm)
+- dumpable = task->mm->dumpable;
++ dumpable = get_dumpable(task->mm);
+ if (!dumpable && !capable(CAP_SYS_PTRACE))
+ return -EPERM;
+ if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT))
+diff -Nurb linux-2.6.22-570/kernel/rcutorture.c linux-2.6.22-591/kernel/rcutorture.c
+--- linux-2.6.22-570/kernel/rcutorture.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/rcutorture.c 2007-12-21 15:36:12.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/percpu.h>
+ #include <linux/notifier.h>
++#include <linux/freezer.h>
+ #include <linux/cpu.h>
+ #include <linux/random.h>
+ #include <linux/delay.h>
+@@ -518,7 +519,6 @@
+
+ VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
+ set_user_nice(current, 19);
+- current->flags |= PF_NOFREEZE;
+
+ do {
+ schedule_timeout_uninterruptible(1);
+@@ -558,7 +558,6 @@
+
+ VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+ set_user_nice(current, 19);
+- current->flags |= PF_NOFREEZE;
+
+ do {
+ schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+@@ -589,7 +588,6 @@
+
+ VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
+ set_user_nice(current, 19);
+- current->flags |= PF_NOFREEZE;
+
+ do {
+ idx = cur_ops->readlock();
+diff -Nurb linux-2.6.22-570/kernel/rtmutex-tester.c linux-2.6.22-591/kernel/rtmutex-tester.c
+--- linux-2.6.22-570/kernel/rtmutex-tester.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/rtmutex-tester.c 2007-12-21 15:36:12.000000000 -0500
+@@ -260,6 +260,7 @@
+ int ret;
+
+ current->flags |= PF_MUTEX_TESTER;
++ set_freezable();
+ allow_signal(SIGHUP);
+
+ for(;;) {
+diff -Nurb linux-2.6.22-570/kernel/sched.c linux-2.6.22-591/kernel/sched.c
+--- linux-2.6.22-570/kernel/sched.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/sched.c 2007-12-21 15:36:12.000000000 -0500
+@@ -51,8 +51,10 @@
+ #include <linux/times.h>
+ #include <linux/tsacct_kern.h>
+ #include <linux/kprobes.h>
++#include <linux/kgdb.h>
+ #include <linux/delayacct.h>
+ #include <linux/reciprocal_div.h>
++#include <linux/cpu_acct.h>
+
+ #include <asm/tlb.h>
+ #include <asm/unistd.h>
+@@ -3399,9 +3401,16 @@
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
+ cputime64_t tmp;
++ struct rq *rq = this_rq();
+ int nice = (TASK_NICE(p) > 0);
+
+ p->utime = cputime_add(p->utime, cputime);
++
++
++ if (p != rq->idle)
++ cpuacct_charge(p, cputime);
++
++
+ vx_account_user(vxi, cputime, nice);
+
+ /* Add user time to cpustat. */
+@@ -3435,9 +3444,10 @@
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ else if (softirq_count())
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+- else if (p != rq->idle)
++ else if (p != rq->idle) {
+ cpustat->system = cputime64_add(cpustat->system, tmp);
+- else if (atomic_read(&rq->nr_iowait) > 0)
++ cpuacct_charge(p, cputime);
++ } else if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, tmp);
+@@ -3462,8 +3472,10 @@
+ cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, tmp);
+- } else
++ } else {
+ cpustat->steal = cputime64_add(cpustat->steal, tmp);
++ cpuacct_charge(p, -tmp);
++ }
+ }
+
+ static void task_running_tick(struct rq *rq, struct task_struct *p, int cpu)
+@@ -5287,8 +5299,6 @@
+ struct migration_req *req;
+ struct list_head *head;
+
+- try_to_freeze();
+-
+ spin_lock_irq(&rq->lock);
+
+ if (cpu_is_offline(cpu)) {
+@@ -5522,7 +5532,6 @@
+ p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+ if (IS_ERR(p))
+ return NOTIFY_BAD;
+- p->flags |= PF_NOFREEZE;
+ kthread_bind(p, cpu);
+ /* Must be high prio: stop_machine expects to yield to it. */
+ rq = task_rq_lock(p, &flags);
+@@ -6926,33 +6935,6 @@
+ arch_destroy_sched_domains(cpu_map);
+ }
+
+-/*
+- * Partition sched domains as specified by the cpumasks below.
+- * This attaches all cpus from the cpumasks to the NULL domain,
+- * waits for a RCU quiescent period, recalculates sched
+- * domain information and then attaches them back to the
+- * correct sched domains
+- * Call with hotplug lock held
+- */
+-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
+-{
+- cpumask_t change_map;
+- int err = 0;
+-
+- cpus_and(*partition1, *partition1, cpu_online_map);
+- cpus_and(*partition2, *partition2, cpu_online_map);
+- cpus_or(change_map, *partition1, *partition2);
+-
+- /* Detach sched domains from all of the affected cpus */
+- detach_destroy_domains(&change_map);
+- if (!cpus_empty(*partition1))
+- err = build_sched_domains(partition1);
+- if (!err && !cpus_empty(*partition2))
+- err = build_sched_domains(partition2);
+-
+- return err;
+-}
+-
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ int arch_reinit_sched_domains(void)
+ {
+@@ -7177,6 +7159,9 @@
+ #ifdef in_atomic
+ static unsigned long prev_jiffy; /* ratelimiting */
+
++ if (atomic_read(&debugger_active))
++ return;
++
+ if ((in_atomic() || irqs_disabled()) &&
+ system_state == SYSTEM_RUNNING && !oops_in_progress) {
+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+diff -Nurb linux-2.6.22-570/kernel/seccomp.c linux-2.6.22-591/kernel/seccomp.c
+--- linux-2.6.22-570/kernel/seccomp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/seccomp.c 2007-12-21 15:36:12.000000000 -0500
+@@ -10,6 +10,7 @@
+ #include <linux/sched.h>
+
+ /* #define SECCOMP_DEBUG 1 */
++#define NR_SECCOMP_MODES 1
+
+ /*
+ * Secure computing mode 1 allows only read/write/exit/sigreturn.
+@@ -54,3 +55,28 @@
+ #endif
+ do_exit(SIGKILL);
+ }
++
++long prctl_get_seccomp(void)
++{
++ return current->seccomp.mode;
++}
++
++long prctl_set_seccomp(unsigned long seccomp_mode)
++{
++ long ret;
++
++ /* can set it only once to be even more secure */
++ ret = -EPERM;
++ if (unlikely(current->seccomp.mode))
++ goto out;
++
++ ret = -EINVAL;
++ if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
++ current->seccomp.mode = seccomp_mode;
++ set_thread_flag(TIF_SECCOMP);
++ ret = 0;
++ }
++
++ out:
++ return ret;
++}
+diff -Nurb linux-2.6.22-570/kernel/signal.c linux-2.6.22-591/kernel/signal.c
+--- linux-2.6.22-570/kernel/signal.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/signal.c 2007-12-21 15:36:12.000000000 -0500
+@@ -257,6 +257,16 @@
+ }
+ }
+
++int unhandled_signal(struct task_struct *tsk, int sig)
++{
++ if (is_init(tsk))
++ return 1;
++ if (tsk->ptrace & PT_PTRACED)
++ return 0;
++ return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
++ (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
++}
++
+
+ /* Notify the system that a driver wants to block all signals for this
+ * process, and wants to be notified if any signals at all were to be
+diff -Nurb linux-2.6.22-570/kernel/softirq.c linux-2.6.22-591/kernel/softirq.c
+--- linux-2.6.22-570/kernel/softirq.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/softirq.c 2007-12-21 15:36:12.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/notifier.h>
+ #include <linux/percpu.h>
+ #include <linux/cpu.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
+ #include <linux/smp.h>
+@@ -304,11 +305,6 @@
+ if (!in_interrupt() && local_softirq_pending())
+ invoke_softirq();
+
+-#ifdef CONFIG_NO_HZ
+- /* Make sure that timer wheel updates are propagated */
+- if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
+- tick_nohz_stop_sched_tick();
+-#endif
+ preempt_enable_no_resched();
+ }
+
+@@ -490,7 +486,6 @@
+ static int ksoftirqd(void * __bind_cpu)
+ {
+ set_user_nice(current, 19);
+- current->flags |= PF_NOFREEZE;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+diff -Nurb linux-2.6.22-570/kernel/softlockup.c linux-2.6.22-591/kernel/softlockup.c
+--- linux-2.6.22-570/kernel/softlockup.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/softlockup.c 2007-12-21 15:36:12.000000000 -0500
+@@ -10,9 +10,11 @@
+ #include <linux/cpu.h>
+ #include <linux/init.h>
+ #include <linux/delay.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/notifier.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+
+ static DEFINE_SPINLOCK(print_lock);
+
+@@ -47,6 +49,9 @@
+ void touch_softlockup_watchdog(void)
+ {
+ __raw_get_cpu_var(touch_timestamp) = get_timestamp();
++#ifdef CONFIG_KGDB
++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 0);
++#endif
+ }
+ EXPORT_SYMBOL(touch_softlockup_watchdog);
+
+@@ -116,7 +121,6 @@
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ sched_setscheduler(current, SCHED_FIFO, ¶m);
+- current->flags |= PF_NOFREEZE;
+
+ /* initialize timestamp */
+ touch_softlockup_watchdog();
+diff -Nurb linux-2.6.22-570/kernel/sys.c linux-2.6.22-591/kernel/sys.c
+--- linux-2.6.22-570/kernel/sys.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/sys.c 2007-12-21 15:36:12.000000000 -0500
+@@ -31,6 +31,7 @@
+ #include <linux/cn_proc.h>
+ #include <linux/getcpu.h>
+ #include <linux/task_io_accounting_ops.h>
++#include <linux/seccomp.h>
+ #include <linux/cpu.h>
+
+ #include <linux/compat.h>
+@@ -1043,7 +1044,7 @@
+ return -EPERM;
+ }
+ if (new_egid != old_egid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ if (rgid != (gid_t) -1 ||
+@@ -1073,13 +1074,13 @@
+
+ if (capable(CAP_SETGID)) {
+ if (old_egid != gid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->gid = current->egid = current->sgid = current->fsgid = gid;
+ } else if ((gid == current->gid) || (gid == current->sgid)) {
+ if (old_egid != gid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->egid = current->fsgid = gid;
+@@ -1110,7 +1111,7 @@
+ switch_uid(new_user);
+
+ if (dumpclear) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->uid = new_ruid;
+@@ -1166,7 +1167,7 @@
+ return -EAGAIN;
+
+ if (new_euid != old_euid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->fsuid = current->euid = new_euid;
+@@ -1216,7 +1217,7 @@
+ return -EPERM;
+
+ if (old_euid != uid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->fsuid = current->euid = uid;
+@@ -1261,7 +1262,7 @@
+ }
+ if (euid != (uid_t) -1) {
+ if (euid != current->euid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->euid = euid;
+@@ -1311,7 +1312,7 @@
+ }
+ if (egid != (gid_t) -1) {
+ if (egid != current->egid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->egid = egid;
+@@ -1357,7 +1358,7 @@
+ uid == current->suid || uid == current->fsuid ||
+ capable(CAP_SETUID)) {
+ if (uid != old_fsuid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->fsuid = uid;
+@@ -1386,7 +1387,7 @@
+ gid == current->sgid || gid == current->fsgid ||
+ capable(CAP_SETGID)) {
+ if (gid != old_fsgid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+ smp_wmb();
+ }
+ current->fsgid = gid;
+@@ -2185,14 +2186,14 @@
+ error = put_user(current->pdeath_signal, (int __user *)arg2);
+ break;
+ case PR_GET_DUMPABLE:
+- error = current->mm->dumpable;
++ error = get_dumpable(current->mm);
+ break;
+ case PR_SET_DUMPABLE:
+ if (arg2 < 0 || arg2 > 1) {
+ error = -EINVAL;
+ break;
+ }
+- current->mm->dumpable = arg2;
++ set_dumpable(current->mm, arg2);
+ break;
+
+ case PR_SET_UNALIGN:
+@@ -2261,6 +2262,13 @@
+ error = SET_ENDIAN(current, arg2);
+ break;
+
++ case PR_GET_SECCOMP:
++ error = prctl_get_seccomp();
++ break;
++ case PR_SET_SECCOMP:
++ error = prctl_set_seccomp(arg2);
++ break;
++
+ default:
+ error = -EINVAL;
+ break;
+@@ -2297,3 +2305,61 @@
+ }
+ return err ? -EFAULT : 0;
+ }
++
++char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
++
++static void argv_cleanup(char **argv, char **envp)
++{
++ argv_free(argv);
++}
++
++/**
++ * Trigger an orderly system poweroff
++ * @force: force poweroff if command execution fails
++ *
++ * This may be called from any context to trigger a system shutdown.
++ * If the orderly shutdown fails, it will force an immediate shutdown.
++ */
++int orderly_poweroff(bool force)
++{
++ int argc;
++ char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
++ static char *envp[] = {
++ "HOME=/",
++ "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
++ NULL
++ };
++ int ret = -ENOMEM;
++ struct subprocess_info *info;
++
++ if (argv == NULL) {
++ printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
++ __func__, poweroff_cmd);
++ goto out;
++ }
++
++ info = call_usermodehelper_setup(argv[0], argv, envp);
++ if (info == NULL) {
++ argv_free(argv);
++ goto out;
++ }
++
++ call_usermodehelper_setcleanup(info, argv_cleanup);
++
++ ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
++
++ out:
++ if (ret && force) {
++ printk(KERN_WARNING "Failed to start orderly shutdown: "
++ "forcing the issue\n");
++
++ /* I guess this should try to kick off some daemon to
++ sync and poweroff asap. Or not even bother syncing
++ if we're doing an emergency shutdown? */
++ emergency_sync();
++ kernel_power_off();
++ }
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(orderly_poweroff);
+diff -Nurb linux-2.6.22-570/kernel/sysctl.c linux-2.6.22-591/kernel/sysctl.c
+--- linux-2.6.22-570/kernel/sysctl.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/sysctl.c 2007-12-21 15:36:15.000000000 -0500
+@@ -45,13 +45,13 @@
+ #include <linux/syscalls.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/acpi.h>
++#include <linux/reboot.h>
++#include <linux/fs.h>
++#include <net/net_namespace.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+
+-extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
+- void __user *buffer, size_t *lenp, loff_t *ppos);
+-
+ #ifdef CONFIG_X86
+ #include <asm/nmi.h>
+ #include <asm/stacktrace.h>
+@@ -140,6 +140,10 @@
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+ #endif
+
++#ifdef CONFIG_NET
++static void sysctl_net_init(struct net *net);
++#endif
++
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+ { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
+@@ -203,7 +207,10 @@
+ .mode = 0555,
+ .child = dev_table,
+ },
+-
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ { .ctl_name = 0 }
+ };
+
+@@ -217,6 +224,15 @@
+ .proc_handler = &proc_dointvec,
+ },
+ {
++ .ctl_name = KERN_POWEROFF_CMD,
++ .procname = "poweroff_cmd",
++ .data = &poweroff_cmd,
++ .maxlen = POWEROFF_CMD_PATH_LEN,
++ .mode = 0644,
++ .proc_handler = &proc_dostring,
++ .strategy = &sysctl_string,
++ },
++ {
+ .ctl_name = KERN_CORE_USES_PID,
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+@@ -625,7 +641,20 @@
+ .proc_handler = &proc_dointvec,
+ },
+ #endif
+-
++#ifdef CONFIG_SECURITY
++ {
++ .ctl_name = CTL_UNNUMBERED,
++ .procname = "mmap_min_addr",
++ .data = &mmap_min_addr,
++ .maxlen = sizeof(unsigned long),
++ .mode = 0644,
++ .proc_handler = &proc_doulongvec_minmax,
++ },
++#endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ { .ctl_name = 0 }
+ };
+
+@@ -744,6 +773,14 @@
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
++ {
++ .ctl_name = VM_HUGETLB_TREAT_MOVABLE,
++ .procname = "hugepages_treat_as_movable",
++ .data = &hugepages_treat_as_movable,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &hugetlb_treat_movable_handler,
++ },
+ #endif
+ {
+ .ctl_name = VM_LOWMEM_RESERVE_RATIO,
+@@ -892,6 +929,10 @@
+ .extra1 = &zero,
+ },
+ #endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ { .ctl_name = 0 }
+ };
+
+@@ -1032,10 +1073,28 @@
+ .child = binfmt_misc_table,
+ },
+ #endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ { .ctl_name = 0 }
+ };
+
+ static ctl_table debug_table[] = {
++#ifdef CONFIG_X86
++ {
++ .ctl_name = DEBUG_UNHANDLED_SIGNALS,
++ .procname = "show-unhandled-signals",
++ .data = &show_unhandled_signals,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = proc_dointvec
++ },
++#endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ { .ctl_name = 0 }
+ };
+
+@@ -1097,6 +1156,11 @@
+ {
+ struct ctl_table_header *head;
+ struct list_head *tmp;
++ struct net *net = current->nsproxy->net_ns;
++
++ if (!net->net_table_header.ctl_table)
++ sysctl_net_init(net);
++
+ spin_lock(&sysctl_lock);
+ if (prev) {
+ tmp = &prev->ctl_entry;
+@@ -1114,6 +1178,10 @@
+ next:
+ tmp = tmp->next;
+ if (tmp == &root_table_header.ctl_entry)
++#ifdef CONFIG_NET
++ tmp = &net->net_table_header.ctl_entry;
++ else if (tmp == &net->net_table_header.ctl_entry)
++#endif
+ break;
+ }
+ spin_unlock(&sysctl_lock);
+@@ -1229,7 +1297,6 @@
+ void __user *newval, size_t newlen)
+ {
+ int op = 0, rc;
+- size_t len;
+
+ if (oldval)
+ op |= 004;
+@@ -1250,25 +1317,10 @@
+ /* If there is no strategy routine, or if the strategy returns
+ * zero, proceed with automatic r/w */
+ if (table->data && table->maxlen) {
+- if (oldval && oldlenp) {
+- if (get_user(len, oldlenp))
+- return -EFAULT;
+- if (len) {
+- if (len > table->maxlen)
+- len = table->maxlen;
+- if(copy_to_user(oldval, table->data, len))
+- return -EFAULT;
+- if(put_user(len, oldlenp))
+- return -EFAULT;
+- }
+- }
+- if (newval && newlen) {
+- len = newlen;
+- if (len > table->maxlen)
+- len = table->maxlen;
+- if(copy_from_user(table->data, newval, len))
+- return -EFAULT;
+- }
++ rc = sysctl_data(table, name, nlen, oldval, oldlenp,
++ newval, newlen);
++ if (rc < 0)
++ return rc;
+ }
+ return 0;
+ }
+@@ -1359,7 +1411,8 @@
+ * This routine returns %NULL on a failure to register, and a pointer
+ * to the table header on success.
+ */
+-struct ctl_table_header *register_sysctl_table(ctl_table * table)
++static struct ctl_table_header *__register_sysctl_table(
++ struct ctl_table_header *root, ctl_table * table)
+ {
+ struct ctl_table_header *tmp;
+ tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+@@ -1371,11 +1424,16 @@
+ tmp->unregistering = NULL;
+ sysctl_set_parent(NULL, table);
+ spin_lock(&sysctl_lock);
+- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++ list_add_tail(&tmp->ctl_entry, &root->ctl_entry);
+ spin_unlock(&sysctl_lock);
+ return tmp;
+ }
+
++struct ctl_table_header *register_sysctl_table(ctl_table *table)
++{
++ return __register_sysctl_table(&root_table_header, table);
++}
++
+ /**
+ * unregister_sysctl_table - unregister a sysctl table hierarchy
+ * @header: the header returned from register_sysctl_table
+@@ -1392,6 +1450,92 @@
+ kfree(header);
+ }
+
++#ifdef CONFIG_NET
++
++static void *fixup_table_addr(void *addr,
++ const char *start, size_t size, const char *new)
++{
++ char *ptr = addr;
++ if ((ptr >= start) && (ptr < (start + size)))
++ ptr += new - start;
++ return ptr;
++}
++
++static void table_fixup(struct ctl_table *table,
++ const void *start, size_t size, const void *new)
++{
++ for (; table->ctl_name || table->procname; table++) {
++ table->data = fixup_table_addr(table->data, start, size, new);
++ table->extra1 = fixup_table_addr(table->extra1, start, size, new);
++ table->extra2 = fixup_table_addr(table->extra2, start, size, new);
++
++ /* Whee recursive functions on the kernel stack */
++ if (table->child)
++ table_fixup(table->child, start, size, new);
++ }
++}
++
++static unsigned count_table_entries(struct ctl_table *table)
++{
++ unsigned entries = 0;
++ for (; table->ctl_name || table->procname; table++) {
++ entries += 1;
++
++ if (table->child)
++ entries += count_table_entries(table->child);
++ }
++ entries += 1; /* Null terminating entry */
++ return entries;
++}
++
++static struct ctl_table *copy_table_entries(
++ struct ctl_table *dest, struct ctl_table *src)
++{
++ struct ctl_table *table = dest;
++ for (; src->ctl_name || src->procname; src++) {
++ *dest++ = *table;
++ }
++ dest++; /* Null terminating entry */
++ for (; table->ctl_name || table->procname; table++) {
++ if (table->child)
++ dest = copy_table_entries(dest, table->child);
++ }
++ return dest;
++}
++
++static void sysctl_net_init(struct net *net)
++{
++ unsigned entries;
++ struct ctl_table *table;
++
++ entries = count_table_entries(net_root_table);
++ table = kzalloc(GFP_KERNEL, sizeof(*table)*entries);
++ /* FIXME free table... */
++
++ copy_table_entries(table, net_root_table);
++ table_fixup(table, &init_net, sizeof(init_net), net);
++
++ net->net_table_header.ctl_table = table;
++ INIT_LIST_HEAD(&net->net_table_header.ctl_entry);
++}
++
++struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table)
++{
++ if (!net->net_table_header.ctl_table)
++ sysctl_net_init(net);
++ table_fixup(table, &init_net, sizeof(init_net), net);
++ return __register_sysctl_table(&net->net_table_header, table);
++}
++EXPORT_SYMBOL_GPL(register_net_sysctl_table);
++
++void unregister_net_sysctl_table(struct ctl_table_header *header)
++{
++ return unregister_sysctl_table(header);
++}
++EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
++#endif
++
++
+ #else /* !CONFIG_SYSCTL */
+ struct ctl_table_header *register_sysctl_table(ctl_table * table)
+ {
+@@ -2167,6 +2311,40 @@
+ * General sysctl support routines
+ */
+
++/* The generic sysctl data routine (used if no strategy routine supplied) */
++int sysctl_data(ctl_table *table, int __user *name, int nlen,
++ void __user *oldval, size_t __user *oldlenp,
++ void __user *newval, size_t newlen)
++{
++ size_t len;
++
++ /* Get out of I don't have a variable */
++ if (!table->data || !table->maxlen)
++ return -ENOTDIR;
++
++ if (oldval && oldlenp) {
++ if (get_user(len, oldlenp))
++ return -EFAULT;
++ if (len) {
++ if (len > table->maxlen)
++ len = table->maxlen;
++ if (copy_to_user(oldval, table->data, len))
++ return -EFAULT;
++ if (put_user(len, oldlenp))
++ return -EFAULT;
++ }
++ }
++
++ if (newval && newlen) {
++ if (newlen > table->maxlen)
++ newlen = table->maxlen;
++
++ if (copy_from_user(table->data, newval, newlen))
++ return -EFAULT;
++ }
++ return 1;
++}
++
+ /* The generic string strategy routine: */
+ int sysctl_string(ctl_table *table, int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+@@ -2355,6 +2533,13 @@
+ return -ENOSYS;
+ }
+
++int sysctl_data(ctl_table *table, int __user *name, int nlen,
++ void __user *oldval, size_t __user *oldlenp,
++ void __user *newval, size_t newlen)
++{
++ return -ENOSYS;
++}
++
+ int sysctl_string(ctl_table *table, int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+@@ -2402,4 +2587,5 @@
+ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_ms_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
++EXPORT_SYMBOL(sysctl_data);
+ EXPORT_SYMBOL(unregister_sysctl_table);
+diff -Nurb linux-2.6.22-570/kernel/taskstats.c linux-2.6.22-591/kernel/taskstats.c
+--- linux-2.6.22-570/kernel/taskstats.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/taskstats.c 2007-12-21 15:36:12.000000000 -0500
+@@ -196,6 +196,8 @@
+
+ /* fill in basic acct fields */
+ stats->version = TASKSTATS_VERSION;
++ stats->nvcsw = tsk->nvcsw;
++ stats->nivcsw = tsk->nivcsw;
+ bacct_add_tsk(stats, tsk);
+
+ /* fill in extended acct fields */
+@@ -242,6 +244,8 @@
+ */
+ delayacct_add_tsk(stats, tsk);
+
++ stats->nvcsw += tsk->nvcsw;
++ stats->nivcsw += tsk->nivcsw;
+ } while_each_thread(first, tsk);
+
+ unlock_task_sighand(first, &flags);
+diff -Nurb linux-2.6.22-570/kernel/time/tick-sched.c linux-2.6.22-591/kernel/time/tick-sched.c
+--- linux-2.6.22-570/kernel/time/tick-sched.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/time/tick-sched.c 2007-12-21 15:36:12.000000000 -0500
+@@ -153,6 +153,7 @@
+ unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+ struct tick_sched *ts;
+ ktime_t last_update, expires, now, delta;
++ struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ int cpu;
+
+ local_irq_save(flags);
+@@ -290,11 +291,34 @@
+ out:
+ ts->next_jiffies = next_jiffies;
+ ts->last_jiffies = last_jiffies;
++ ts->sleep_length = ktime_sub(dev->next_event, now);
+ end:
+ local_irq_restore(flags);
+ }
+
+ /**
++ * tick_nohz_get_sleep_length - return the length of the current sleep
++ *
++ * Called from power state control code with interrupts disabled
++ */
++ktime_t tick_nohz_get_sleep_length(void)
++{
++ struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
++
++ return ts->sleep_length;
++}
++
++/**
++ * tick_nohz_get_idle_jiffies - returns the current idle jiffie count
++ */
++unsigned long tick_nohz_get_idle_jiffies(void)
++{
++ struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
++
++ return ts->idle_jiffies;
++}
++
++/**
+ * nohz_restart_sched_tick - restart the idle tick from the idle task
+ *
+ * Restart the idle tick when the CPU is woken up from idle
+diff -Nurb linux-2.6.22-570/kernel/time/timekeeping.c linux-2.6.22-591/kernel/time/timekeeping.c
+--- linux-2.6.22-570/kernel/time/timekeeping.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/time/timekeeping.c 2007-12-21 15:36:15.000000000 -0500
+@@ -39,7 +39,7 @@
+ */
+ struct timespec xtime __attribute__ ((aligned (16)));
+ struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
+-
++static unsigned long total_sleep_time;
+ EXPORT_SYMBOL(xtime);
+
+
+@@ -251,6 +251,7 @@
+ xtime.tv_nsec = 0;
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
++ total_sleep_time = 0;
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ }
+@@ -282,6 +283,7 @@
+
+ xtime.tv_sec += sleep_length;
+ wall_to_monotonic.tv_sec -= sleep_length;
++ total_sleep_time += sleep_length;
+ }
+ /* re-base the last cycle value */
+ clock->cycle_last = clocksource_read(clock);
+@@ -476,3 +478,34 @@
+ change_clocksource();
+ update_vsyscall(&xtime, clock);
+ }
++
++/**
++ * getboottime - Return the real time of system boot.
++ * @ts: pointer to the timespec to be set
++ *
++ * Returns the time of day in a timespec.
++ *
++ * This is based on the wall_to_monotonic offset and the total suspend
++ * time. Calls to settimeofday will affect the value returned (which
++ * basically means that however wrong your real time clock is at boot time,
++ * you get the right time here).
++ */
++void getboottime(struct timespec *ts)
++{
++ set_normalized_timespec(ts,
++ - (wall_to_monotonic.tv_sec + total_sleep_time),
++ - wall_to_monotonic.tv_nsec);
++}
++
++EXPORT_SYMBOL(getboottime);
++
++/**
++ * monotonic_to_bootbased - Convert the monotonic time to boot based.
++ * @ts: pointer to the timespec to be converted
++ */
++void monotonic_to_bootbased(struct timespec *ts)
++{
++ ts->tv_sec += total_sleep_time;
++}
++
++EXPORT_SYMBOL(monotonic_to_bootbased);
+diff -Nurb linux-2.6.22-570/kernel/timer.c linux-2.6.22-591/kernel/timer.c
+--- linux-2.6.22-570/kernel/timer.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/timer.c 2007-12-21 15:36:15.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/delay.h>
+ #include <linux/tick.h>
+ #include <linux/kallsyms.h>
++#include <linux/kgdb.h>
+ #include <linux/vs_base.h>
+ #include <linux/vs_cvirt.h>
+ #include <linux/vs_pid.h>
+@@ -886,7 +887,11 @@
+ */
+ void run_local_timers(void)
+ {
++ int this_cpu = smp_processor_id();
+ raise_softirq(TIMER_SOFTIRQ);
++#ifdef CONFIG_KGDB
++ if(!atomic_read(&kgdb_sync_softlockup[this_cpu]))
++#endif
+ softlockup_tick();
+ }
+
+@@ -1125,6 +1130,7 @@
+ getnstimeofday(&tp);
+ tp.tv_sec += wall_to_monotonic.tv_sec;
+ tp.tv_nsec += wall_to_monotonic.tv_nsec;
++ monotonic_to_bootbased(&tp);
+ if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
+ tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+ tp.tv_sec++;
+diff -Nurb linux-2.6.22-570/kernel/unwind.c linux-2.6.22-591/kernel/unwind.c
+--- linux-2.6.22-570/kernel/unwind.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/unwind.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,1288 @@
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ * Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ *
++ * A simple API for unwinding kernel stacks. This is used for
++ * debugging and error reporting purposes. The kernel doesn't need
++ * full-blown stack unwinding with all the bells and whistles, so there
++ * is not much point in implementing the full Dwarf2 unwind API.
++ */
++
++#include <linux/unwind.h>
++#include <linux/module.h>
++#include <linux/bootmem.h>
++#include <linux/sort.h>
++#include <linux/stop_machine.h>
++#include <linux/uaccess.h>
++#include <asm/sections.h>
++#include <asm/uaccess.h>
++#include <asm/unaligned.h>
++
++extern const char __start_unwind[], __end_unwind[];
++extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
++
++#define MAX_STACK_DEPTH 8
++
++#define EXTRA_INFO(f) { \
++ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
++ % FIELD_SIZEOF(struct unwind_frame_info, f)) \
++ + offsetof(struct unwind_frame_info, f) \
++ / FIELD_SIZEOF(struct unwind_frame_info, f), \
++ FIELD_SIZEOF(struct unwind_frame_info, f) \
++ }
++#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
++
++static const struct {
++ unsigned offs:BITS_PER_LONG / 2;
++ unsigned width:BITS_PER_LONG / 2;
++} reg_info[] = {
++ UNW_REGISTER_INFO
++};
++
++#undef PTREGS_INFO
++#undef EXTRA_INFO
++
++#ifndef REG_INVALID
++#define REG_INVALID(r) (reg_info[r].width == 0)
++#endif
++
++#define DW_CFA_nop 0x00
++#define DW_CFA_set_loc 0x01
++#define DW_CFA_advance_loc1 0x02
++#define DW_CFA_advance_loc2 0x03
++#define DW_CFA_advance_loc4 0x04
++#define DW_CFA_offset_extended 0x05
++#define DW_CFA_restore_extended 0x06
++#define DW_CFA_undefined 0x07
++#define DW_CFA_same_value 0x08
++#define DW_CFA_register 0x09
++#define DW_CFA_remember_state 0x0a
++#define DW_CFA_restore_state 0x0b
++#define DW_CFA_def_cfa 0x0c
++#define DW_CFA_def_cfa_register 0x0d
++#define DW_CFA_def_cfa_offset 0x0e
++#define DW_CFA_def_cfa_expression 0x0f
++#define DW_CFA_expression 0x10
++#define DW_CFA_offset_extended_sf 0x11
++#define DW_CFA_def_cfa_sf 0x12
++#define DW_CFA_def_cfa_offset_sf 0x13
++#define DW_CFA_val_offset 0x14
++#define DW_CFA_val_offset_sf 0x15
++#define DW_CFA_val_expression 0x16
++#define DW_CFA_lo_user 0x1c
++#define DW_CFA_GNU_window_save 0x2d
++#define DW_CFA_GNU_args_size 0x2e
++#define DW_CFA_GNU_negative_offset_extended 0x2f
++#define DW_CFA_hi_user 0x3f
++
++#define DW_EH_PE_FORM 0x07
++#define DW_EH_PE_native 0x00
++#define DW_EH_PE_leb128 0x01
++#define DW_EH_PE_data2 0x02
++#define DW_EH_PE_data4 0x03
++#define DW_EH_PE_data8 0x04
++#define DW_EH_PE_signed 0x08
++#define DW_EH_PE_ADJUST 0x70
++#define DW_EH_PE_abs 0x00
++#define DW_EH_PE_pcrel 0x10
++#define DW_EH_PE_textrel 0x20
++#define DW_EH_PE_datarel 0x30
++#define DW_EH_PE_funcrel 0x40
++#define DW_EH_PE_aligned 0x50
++#define DW_EH_PE_indirect 0x80
++#define DW_EH_PE_omit 0xff
++
++typedef unsigned long uleb128_t;
++typedef signed long sleb128_t;
++#define sleb128abs __builtin_labs
++
++static struct unwind_table {
++ struct {
++ unsigned long pc;
++ unsigned long range;
++ } core, init;
++ const void *address;
++ unsigned long size;
++ const unsigned char *header;
++ unsigned long hdrsz;
++ struct unwind_table *link;
++ const char *name;
++} root_table;
++
++struct unwind_item {
++ enum item_location {
++ Nowhere,
++ Memory,
++ Register,
++ Value
++ } where;
++ uleb128_t value;
++};
++
++struct unwind_state {
++ uleb128_t loc, org;
++ const u8 *cieStart, *cieEnd;
++ uleb128_t codeAlign;
++ sleb128_t dataAlign;
++ struct cfa {
++ uleb128_t reg, offs;
++ } cfa;
++ struct unwind_item regs[ARRAY_SIZE(reg_info)];
++ unsigned stackDepth:8;
++ unsigned version:8;
++ const u8 *label;
++ const u8 *stack[MAX_STACK_DEPTH];
++};
++
++static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
++
++static unsigned unwind_debug;
++static int __init unwind_debug_setup(char *s)
++{
++ unwind_debug = simple_strtoul(s, NULL, 0);
++ return 1;
++}
++__setup("unwind_debug=", unwind_debug_setup);
++#define dprintk(lvl, fmt, args...) \
++ ((void)(lvl > unwind_debug \
++ || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
++
++static struct unwind_table *find_table(unsigned long pc)
++{
++ struct unwind_table *table;
++
++ for (table = &root_table; table; table = table->link)
++ if ((pc >= table->core.pc
++ && pc < table->core.pc + table->core.range)
++ || (pc >= table->init.pc
++ && pc < table->init.pc + table->init.range))
++ break;
++
++ return table;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++ const void *end,
++ signed ptrType,
++ unsigned long text_base,
++ unsigned long data_base);
++
++static void init_unwind_table(struct unwind_table *table,
++ const char *name,
++ const void *core_start,
++ unsigned long core_size,
++ const void *init_start,
++ unsigned long init_size,
++ const void *table_start,
++ unsigned long table_size,
++ const u8 *header_start,
++ unsigned long header_size)
++{
++ const u8 *ptr = header_start + 4;
++ const u8 *end = header_start + header_size;
++
++ table->core.pc = (unsigned long)core_start;
++ table->core.range = core_size;
++ table->init.pc = (unsigned long)init_start;
++ table->init.range = init_size;
++ table->address = table_start;
++ table->size = table_size;
++ /* See if the linker provided table looks valid. */
++ if (header_size <= 4
++ || header_start[0] != 1
++ || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
++ != table_start
++ || !read_pointer(&ptr, end, header_start[2], 0, 0)
++ || !read_pointer(&ptr, end, header_start[3], 0,
++ (unsigned long)header_start)
++ || !read_pointer(&ptr, end, header_start[3], 0,
++ (unsigned long)header_start))
++ header_start = NULL;
++ table->hdrsz = header_size;
++ smp_wmb();
++ table->header = header_start;
++ table->link = NULL;
++ table->name = name;
++}
++
++void __init unwind_init(void)
++{
++ init_unwind_table(&root_table, "kernel",
++ _text, _end - _text,
++ NULL, 0,
++ __start_unwind, __end_unwind - __start_unwind,
++ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
++}
++
++static const u32 bad_cie, not_fde;
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
++static signed fde_pointer_type(const u32 *cie);
++
++struct eh_frame_hdr_table_entry {
++ unsigned long start, fde;
++};
++
++static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
++{
++ const struct eh_frame_hdr_table_entry *e1 = p1;
++ const struct eh_frame_hdr_table_entry *e2 = p2;
++
++ return (e1->start > e2->start) - (e1->start < e2->start);
++}
++
++static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
++{
++ struct eh_frame_hdr_table_entry *e1 = p1;
++ struct eh_frame_hdr_table_entry *e2 = p2;
++ unsigned long v;
++
++ v = e1->start;
++ e1->start = e2->start;
++ e2->start = v;
++ v = e1->fde;
++ e1->fde = e2->fde;
++ e2->fde = v;
++}
++
++static void __init setup_unwind_table(struct unwind_table *table,
++ void *(*alloc)(unsigned long))
++{
++ const u8 *ptr;
++ unsigned long tableSize = table->size, hdrSize;
++ unsigned n;
++ const u32 *fde;
++ struct {
++ u8 version;
++ u8 eh_frame_ptr_enc;
++ u8 fde_count_enc;
++ u8 table_enc;
++ unsigned long eh_frame_ptr;
++ unsigned int fde_count;
++ struct eh_frame_hdr_table_entry table[];
++ } __attribute__((__packed__)) *header;
++
++ if (table->header)
++ return;
++
++ if (table->hdrsz)
++ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
++ table->name);
++
++ if (tableSize & (sizeof(*fde) - 1))
++ return;
++
++ for (fde = table->address, n = 0;
++ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++ const u32 *cie = cie_for_fde(fde, table);
++ signed ptrType;
++
++ if (cie == ¬_fde)
++ continue;
++ if (cie == NULL
++ || cie == &bad_cie
++ || (ptrType = fde_pointer_type(cie)) < 0)
++ return;
++ ptr = (const u8 *)(fde + 2);
++ if (!read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0))
++ return;
++ ++n;
++ }
++
++ if (tableSize || !n)
++ return;
++
++ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
++ + 2 * n * sizeof(unsigned long);
++ dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
++ header = alloc(hdrSize);
++ if (!header)
++ return;
++ header->version = 1;
++ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
++ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
++ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
++ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
++ BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
++ % __alignof(typeof(header->fde_count)));
++ header->fde_count = n;
++
++ BUILD_BUG_ON(offsetof(typeof(*header), table)
++ % __alignof(typeof(*header->table)));
++ for (fde = table->address, tableSize = table->size, n = 0;
++ tableSize;
++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
++
++ if (!fde[1])
++ continue; /* this is a CIE */
++ ptr = (const u8 *)(fde + 2);
++ header->table[n].start = read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ fde_pointer_type(cie), 0, 0);
++ header->table[n].fde = (unsigned long)fde;
++ ++n;
++ }
++ WARN_ON(n != header->fde_count);
++
++ sort(header->table,
++ n,
++ sizeof(*header->table),
++ cmp_eh_frame_hdr_table_entries,
++ swap_eh_frame_hdr_table_entries);
++
++ table->hdrsz = hdrSize;
++ smp_wmb();
++ table->header = (const void *)header;
++}
++
++static void *__init balloc(unsigned long sz)
++{
++ return __alloc_bootmem_nopanic(sz,
++ sizeof(unsigned int),
++ __pa(MAX_DMA_ADDRESS));
++}
++
++void __init unwind_setup(void)
++{
++ setup_unwind_table(&root_table, balloc);
++}
++
++#ifdef CONFIG_MODULES
++
++static struct unwind_table *last_table;
++
++/* Must be called with module_mutex held. */
++void *unwind_add_table(struct module *module,
++ const void *table_start,
++ unsigned long table_size)
++{
++ struct unwind_table *table;
++
++ if (table_size <= 0)
++ return NULL;
++
++ table = kmalloc(sizeof(*table), GFP_KERNEL);
++ if (!table)
++ return NULL;
++
++ init_unwind_table(table, module->name,
++ module->module_core, module->core_size,
++ module->module_init, module->init_size,
++ table_start, table_size,
++ NULL, 0);
++
++ if (last_table)
++ last_table->link = table;
++ else
++ root_table.link = table;
++ last_table = table;
++
++ return table;
++}
++
++struct unlink_table_info
++{
++ struct unwind_table *table;
++ int init_only;
++};
++
++static int unlink_table(void *arg)
++{
++ struct unlink_table_info *info = arg;
++ struct unwind_table *table = info->table, *prev;
++
++ for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
++ ;
++
++ if (prev->link) {
++ if (info->init_only) {
++ table->init.pc = 0;
++ table->init.range = 0;
++ info->table = NULL;
++ } else {
++ prev->link = table->link;
++ if (!prev->link)
++ last_table = prev;
++ }
++ } else
++ info->table = NULL;
++
++ return 0;
++}
++
++/* Must be called with module_mutex held. */
++void unwind_remove_table(void *handle, int init_only)
++{
++ struct unwind_table *table = handle;
++ struct unlink_table_info info;
++
++ if (!table || table == &root_table)
++ return;
++
++ if (init_only && table == last_table) {
++ table->init.pc = 0;
++ table->init.range = 0;
++ return;
++ }
++
++ info.table = table;
++ info.init_only = init_only;
++ stop_machine_run(unlink_table, &info, NR_CPUS);
++
++ if (info.table)
++ kfree(table);
++}
++
++#endif /* CONFIG_MODULES */
++
++static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
++{
++ const u8 *cur = *pcur;
++ uleb128_t value;
++ unsigned shift;
++
++ for (shift = 0, value = 0; cur < end; shift += 7) {
++ if (shift + 7 > 8 * sizeof(value)
++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++ cur = end + 1;
++ break;
++ }
++ value |= (uleb128_t)(*cur & 0x7f) << shift;
++ if (!(*cur++ & 0x80))
++ break;
++ }
++ *pcur = cur;
++
++ return value;
++}
++
++static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
++{
++ const u8 *cur = *pcur;
++ sleb128_t value;
++ unsigned shift;
++
++ for (shift = 0, value = 0; cur < end; shift += 7) {
++ if (shift + 7 > 8 * sizeof(value)
++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++ cur = end + 1;
++ break;
++ }
++ value |= (sleb128_t)(*cur & 0x7f) << shift;
++ if (!(*cur & 0x80)) {
++ value |= -(*cur++ & 0x40) << shift;
++ break;
++ }
++ }
++ *pcur = cur;
++
++ return value;
++}
++
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
++{
++ const u32 *cie;
++
++ if (!*fde || (*fde & (sizeof(*fde) - 1)))
++ return &bad_cie;
++ if (!fde[1])
++ return ¬_fde; /* this is a CIE */
++ if ((fde[1] & (sizeof(*fde) - 1))
++ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
++ return NULL; /* this is not a valid FDE */
++ cie = fde + 1 - fde[1] / sizeof(*fde);
++ if (*cie <= sizeof(*cie) + 4
++ || *cie >= fde[1] - sizeof(*fde)
++ || (*cie & (sizeof(*cie) - 1))
++ || cie[1])
++ return NULL; /* this is not a (valid) CIE */
++ return cie;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++ const void *end,
++ signed ptrType,
++ unsigned long text_base,
++ unsigned long data_base)
++{
++ unsigned long value = 0;
++ union {
++ const u8 *p8;
++ const u16 *p16u;
++ const s16 *p16s;
++ const u32 *p32u;
++ const s32 *p32s;
++ const unsigned long *pul;
++ } ptr;
++
++ if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
++ dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
++ return 0;
++ }
++ ptr.p8 = *pLoc;
++ switch(ptrType & DW_EH_PE_FORM) {
++ case DW_EH_PE_data2:
++ if (end < (const void *)(ptr.p16u + 1)) {
++ dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ if(ptrType & DW_EH_PE_signed)
++ value = get_unaligned(ptr.p16s++);
++ else
++ value = get_unaligned(ptr.p16u++);
++ break;
++ case DW_EH_PE_data4:
++#ifdef CONFIG_64BIT
++ if (end < (const void *)(ptr.p32u + 1)) {
++ dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ if(ptrType & DW_EH_PE_signed)
++ value = get_unaligned(ptr.p32s++);
++ else
++ value = get_unaligned(ptr.p32u++);
++ break;
++ case DW_EH_PE_data8:
++ BUILD_BUG_ON(sizeof(u64) != sizeof(value));
++#else
++ BUILD_BUG_ON(sizeof(u32) != sizeof(value));
++#endif
++ case DW_EH_PE_native:
++ if (end < (const void *)(ptr.pul + 1)) {
++ dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ value = get_unaligned(ptr.pul++);
++ break;
++ case DW_EH_PE_leb128:
++ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
++ value = ptrType & DW_EH_PE_signed
++ ? get_sleb128(&ptr.p8, end)
++ : get_uleb128(&ptr.p8, end);
++ if ((const void *)ptr.p8 > end) {
++ dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ break;
++ default:
++ dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
++ ptrType, ptr.p8, end);
++ return 0;
++ }
++ switch(ptrType & DW_EH_PE_ADJUST) {
++ case DW_EH_PE_abs:
++ break;
++ case DW_EH_PE_pcrel:
++ value += (unsigned long)*pLoc;
++ break;
++ case DW_EH_PE_textrel:
++ if (likely(text_base)) {
++ value += text_base;
++ break;
++ }
++ dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
++ ptrType, *pLoc, end);
++ return 0;
++ case DW_EH_PE_datarel:
++ if (likely(data_base)) {
++ value += data_base;
++ break;
++ }
++ dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
++ ptrType, *pLoc, end);
++ return 0;
++ default:
++ dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
++ ptrType, *pLoc, end);
++ return 0;
++ }
++ if ((ptrType & DW_EH_PE_indirect)
++ && probe_kernel_address((unsigned long *)value, value)) {
++ dprintk(1, "Cannot read indirect value %lx (%p,%p).",
++ value, *pLoc, end);
++ return 0;
++ }
++ *pLoc = ptr.p8;
++
++ return value;
++}
++
++static signed fde_pointer_type(const u32 *cie)
++{
++ const u8 *ptr = (const u8 *)(cie + 2);
++ unsigned version = *ptr;
++
++ if (version != 1)
++ return -1; /* unsupported */
++ if (*++ptr) {
++ const char *aug;
++ const u8 *end = (const u8 *)(cie + 1) + *cie;
++ uleb128_t len;
++
++ /* check if augmentation size is first (and thus present) */
++ if (*ptr != 'z')
++ return -1;
++ /* check if augmentation string is nul-terminated */
++ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
++ return -1;
++ ++ptr; /* skip terminator */
++ get_uleb128(&ptr, end); /* skip code alignment */
++ get_sleb128(&ptr, end); /* skip data alignment */
++ /* skip return address column */
++ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
++ len = get_uleb128(&ptr, end); /* augmentation length */
++ if (ptr + len < ptr || ptr + len > end)
++ return -1;
++ end = ptr + len;
++ while (*++aug) {
++ if (ptr >= end)
++ return -1;
++ switch(*aug) {
++ case 'L':
++ ++ptr;
++ break;
++ case 'P': {
++ signed ptrType = *ptr++;
++
++ if (!read_pointer(&ptr, end, ptrType, 0, 0)
++ || ptr > end)
++ return -1;
++ }
++ break;
++ case 'R':
++ return *ptr;
++ default:
++ return -1;
++ }
++ }
++ }
++ return DW_EH_PE_native|DW_EH_PE_abs;
++}
++
++static int advance_loc(unsigned long delta, struct unwind_state *state)
++{
++ state->loc += delta * state->codeAlign;
++
++ return delta > 0;
++}
++
++static void set_rule(uleb128_t reg,
++ enum item_location where,
++ uleb128_t value,
++ struct unwind_state *state)
++{
++ if (reg < ARRAY_SIZE(state->regs)) {
++ state->regs[reg].where = where;
++ state->regs[reg].value = value;
++ }
++}
++
++static int processCFI(const u8 *start,
++ const u8 *end,
++ unsigned long targetLoc,
++ signed ptrType,
++ struct unwind_state *state)
++{
++ union {
++ const u8 *p8;
++ const u16 *p16;
++ const u32 *p32;
++ } ptr;
++ int result = 1;
++
++ if (start != state->cieStart) {
++ state->loc = state->org;
++ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
++ if (targetLoc == 0 && state->label == NULL)
++ return result;
++ }
++ for (ptr.p8 = start; result && ptr.p8 < end; ) {
++ switch(*ptr.p8 >> 6) {
++ uleb128_t value;
++
++ case 0:
++ switch(*ptr.p8++) {
++ case DW_CFA_nop:
++ break;
++ case DW_CFA_set_loc:
++ state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
++ if (state->loc == 0)
++ result = 0;
++ break;
++ case DW_CFA_advance_loc1:
++ result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
++ break;
++ case DW_CFA_advance_loc2:
++ result = ptr.p8 <= end + 2
++ && advance_loc(*ptr.p16++, state);
++ break;
++ case DW_CFA_advance_loc4:
++ result = ptr.p8 <= end + 4
++ && advance_loc(*ptr.p32++, state);
++ break;
++ case DW_CFA_offset_extended:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_val_offset:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_offset_extended_sf:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_val_offset_sf:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_restore_extended:
++ case DW_CFA_undefined:
++ case DW_CFA_same_value:
++ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
++ break;
++ case DW_CFA_register:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value,
++ Register,
++ get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_remember_state:
++ if (ptr.p8 == state->label) {
++ state->label = NULL;
++ return 1;
++ }
++ if (state->stackDepth >= MAX_STACK_DEPTH) {
++ dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ state->stack[state->stackDepth++] = ptr.p8;
++ break;
++ case DW_CFA_restore_state:
++ if (state->stackDepth) {
++ const uleb128_t loc = state->loc;
++ const u8 *label = state->label;
++
++ state->label = state->stack[state->stackDepth - 1];
++ memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
++ memset(state->regs, 0, sizeof(state->regs));
++ state->stackDepth = 0;
++ result = processCFI(start, end, 0, ptrType, state);
++ state->loc = loc;
++ state->label = label;
++ } else {
++ dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
++ return 0;
++ }
++ break;
++ case DW_CFA_def_cfa:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ /*nobreak*/
++ case DW_CFA_def_cfa_offset:
++ state->cfa.offs = get_uleb128(&ptr.p8, end);
++ break;
++ case DW_CFA_def_cfa_sf:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ /*nobreak*/
++ case DW_CFA_def_cfa_offset_sf:
++ state->cfa.offs = get_sleb128(&ptr.p8, end)
++ * state->dataAlign;
++ break;
++ case DW_CFA_def_cfa_register:
++ state->cfa.reg = get_uleb128(&ptr.p8, end);
++ break;
++ /*todo case DW_CFA_def_cfa_expression: */
++ /*todo case DW_CFA_expression: */
++ /*todo case DW_CFA_val_expression: */
++ case DW_CFA_GNU_args_size:
++ get_uleb128(&ptr.p8, end);
++ break;
++ case DW_CFA_GNU_negative_offset_extended:
++ value = get_uleb128(&ptr.p8, end);
++ set_rule(value,
++ Memory,
++ (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
++ break;
++ case DW_CFA_GNU_window_save:
++ default:
++ dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
++ result = 0;
++ break;
++ }
++ break;
++ case 1:
++ result = advance_loc(*ptr.p8++ & 0x3f, state);
++ break;
++ case 2:
++ value = *ptr.p8++ & 0x3f;
++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++ break;
++ case 3:
++ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
++ break;
++ }
++ if (ptr.p8 > end) {
++ dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
++ result = 0;
++ }
++ if (result && targetLoc != 0 && targetLoc < state->loc)
++ return 1;
++ }
++
++ if (result && ptr.p8 < end)
++ dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
++
++ return result
++ && ptr.p8 == end
++ && (targetLoc == 0
++ || (/*todo While in theory this should apply, gcc in practice omits
++ everything past the function prolog, and hence the location
++ never reaches the end of the function.
++ targetLoc < state->loc &&*/ state->label == NULL));
++}
++
++/* Unwind to previous to frame. Returns 0 if successful, negative
++ * number in case of an error. */
++int unwind(struct unwind_frame_info *frame)
++{
++#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
++ const u32 *fde = NULL, *cie = NULL;
++ const u8 *ptr = NULL, *end = NULL;
++ unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
++ unsigned long startLoc = 0, endLoc = 0, cfa;
++ unsigned i;
++ signed ptrType = -1;
++ uleb128_t retAddrReg = 0;
++ const struct unwind_table *table;
++ struct unwind_state state;
++
++ if (UNW_PC(frame) == 0)
++ return -EINVAL;
++ if ((table = find_table(pc)) != NULL
++ && !(table->size & (sizeof(*fde) - 1))) {
++ const u8 *hdr = table->header;
++ unsigned long tableSize;
++
++ smp_rmb();
++ if (hdr && hdr[0] == 1) {
++ switch(hdr[3] & DW_EH_PE_FORM) {
++ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
++ case DW_EH_PE_data2: tableSize = 2; break;
++ case DW_EH_PE_data4: tableSize = 4; break;
++ case DW_EH_PE_data8: tableSize = 8; break;
++ default: tableSize = 0; break;
++ }
++ ptr = hdr + 4;
++ end = hdr + table->hdrsz;
++ if (tableSize
++ && read_pointer(&ptr, end, hdr[1], 0, 0)
++ == (unsigned long)table->address
++ && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
++ && i == (end - ptr) / (2 * tableSize)
++ && !((end - ptr) % (2 * tableSize))) {
++ do {
++ const u8 *cur = ptr + (i / 2) * (2 * tableSize);
++
++ startLoc = read_pointer(&cur,
++ cur + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr);
++ if (pc < startLoc)
++ i /= 2;
++ else {
++ ptr = cur - tableSize;
++ i = (i + 1) / 2;
++ }
++ } while (startLoc && i > 1);
++ if (i == 1
++ && (startLoc = read_pointer(&ptr,
++ ptr + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr)) != 0
++ && pc >= startLoc)
++ fde = (void *)read_pointer(&ptr,
++ ptr + tableSize,
++ hdr[3], 0,
++ (unsigned long)hdr);
++ }
++ }
++ if(hdr && !fde)
++ dprintk(3, "Binary lookup for %lx failed.", pc);
++
++ if (fde != NULL) {
++ cie = cie_for_fde(fde, table);
++ ptr = (const u8 *)(fde + 2);
++ if(cie != NULL
++ && cie != &bad_cie
++ && cie != ¬_fde
++ && (ptrType = fde_pointer_type(cie)) >= 0
++ && read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0) == startLoc) {
++ if (!(ptrType & DW_EH_PE_indirect))
++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++ endLoc = startLoc
++ + read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if(pc >= endLoc)
++ fde = NULL;
++ } else
++ fde = NULL;
++ if(!fde)
++ dprintk(1, "Binary lookup result for %lx discarded.", pc);
++ }
++ if (fde == NULL) {
++ for (fde = table->address, tableSize = table->size;
++ cie = NULL, tableSize > sizeof(*fde)
++ && tableSize - sizeof(*fde) >= *fde;
++ tableSize -= sizeof(*fde) + *fde,
++ fde += 1 + *fde / sizeof(*fde)) {
++ cie = cie_for_fde(fde, table);
++ if (cie == &bad_cie) {
++ cie = NULL;
++ break;
++ }
++ if (cie == NULL
++ || cie == ¬_fde
++ || (ptrType = fde_pointer_type(cie)) < 0)
++ continue;
++ ptr = (const u8 *)(fde + 2);
++ startLoc = read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if (!startLoc)
++ continue;
++ if (!(ptrType & DW_EH_PE_indirect))
++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++ endLoc = startLoc
++ + read_pointer(&ptr,
++ (const u8 *)(fde + 1) + *fde,
++ ptrType, 0, 0);
++ if (pc >= startLoc && pc < endLoc)
++ break;
++ }
++ if(!fde)
++ dprintk(3, "Linear lookup for %lx failed.", pc);
++ }
++ }
++ if (cie != NULL) {
++ memset(&state, 0, sizeof(state));
++ state.cieEnd = ptr; /* keep here temporarily */
++ ptr = (const u8 *)(cie + 2);
++ end = (const u8 *)(cie + 1) + *cie;
++ frame->call_frame = 1;
++ if ((state.version = *ptr) != 1)
++ cie = NULL; /* unsupported version */
++ else if (*++ptr) {
++ /* check if augmentation size is first (and thus present) */
++ if (*ptr == 'z') {
++ while (++ptr < end && *ptr) {
++ switch(*ptr) {
++ /* check for ignorable (or already handled)
++ * nul-terminated augmentation string */
++ case 'L':
++ case 'P':
++ case 'R':
++ continue;
++ case 'S':
++ frame->call_frame = 0;
++ continue;
++ default:
++ break;
++ }
++ break;
++ }
++ }
++ if (ptr >= end || *ptr)
++ cie = NULL;
++ }
++ if(!cie)
++ dprintk(1, "CIE unusable (%p,%p).", ptr, end);
++ ++ptr;
++ }
++ if (cie != NULL) {
++ /* get code aligment factor */
++ state.codeAlign = get_uleb128(&ptr, end);
++ /* get data aligment factor */
++ state.dataAlign = get_sleb128(&ptr, end);
++ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
++ cie = NULL;
++ else if (UNW_PC(frame) % state.codeAlign
++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++ dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
++ UNW_PC(frame), UNW_SP(frame));
++ return -EPERM;
++ } else {
++ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
++ /* skip augmentation */
++ if (((const char *)(cie + 2))[1] == 'z') {
++ uleb128_t augSize = get_uleb128(&ptr, end);
++
++ ptr += augSize;
++ }
++ if (ptr > end
++ || retAddrReg >= ARRAY_SIZE(reg_info)
++ || REG_INVALID(retAddrReg)
++ || reg_info[retAddrReg].width != sizeof(unsigned long))
++ cie = NULL;
++ }
++ if(!cie)
++ dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
++ }
++ if (cie != NULL) {
++ state.cieStart = ptr;
++ ptr = state.cieEnd;
++ state.cieEnd = end;
++ end = (const u8 *)(fde + 1) + *fde;
++ /* skip augmentation */
++ if (((const char *)(cie + 2))[1] == 'z') {
++ uleb128_t augSize = get_uleb128(&ptr, end);
++
++ if ((ptr += augSize) > end)
++ fde = NULL;
++ }
++ if(!fde)
++ dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
++ }
++ if (cie == NULL || fde == NULL) {
++#ifdef CONFIG_FRAME_POINTER
++ unsigned long top, bottom;
++
++ if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
++ return -EPERM;
++ top = STACK_TOP(frame->task);
++ bottom = STACK_BOTTOM(frame->task);
++# if FRAME_RETADDR_OFFSET < 0
++ if (UNW_SP(frame) < top
++ && UNW_FP(frame) <= UNW_SP(frame)
++ && bottom < UNW_FP(frame)
++# else
++ if (UNW_SP(frame) > top
++ && UNW_FP(frame) >= UNW_SP(frame)
++ && bottom > UNW_FP(frame)
++# endif
++ && !((UNW_SP(frame) | UNW_FP(frame))
++ & (sizeof(unsigned long) - 1))) {
++ unsigned long link;
++
++ if (!probe_kernel_address(
++ (unsigned long *)(UNW_FP(frame)
++ + FRAME_LINK_OFFSET),
++ link)
++# if FRAME_RETADDR_OFFSET < 0
++ && link > bottom && link < UNW_FP(frame)
++# else
++ && link > UNW_FP(frame) && link < bottom
++# endif
++ && !(link & (sizeof(link) - 1))
++ && !probe_kernel_address(
++ (unsigned long *)(UNW_FP(frame)
++ + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
++ UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
++# if FRAME_RETADDR_OFFSET < 0
++ -
++# else
++ +
++# endif
++ sizeof(UNW_PC(frame));
++ UNW_FP(frame) = link;
++ return 0;
++ }
++ }
++#endif
++ return -ENXIO;
++ }
++ state.org = startLoc;
++ memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
++ /* process instructions */
++ if (!processCFI(ptr, end, pc, ptrType, &state)
++ || state.loc > endLoc
++ || state.regs[retAddrReg].where == Nowhere
++ || state.cfa.reg >= ARRAY_SIZE(reg_info)
++ || reg_info[state.cfa.reg].width != sizeof(unsigned long)
++ || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
++ || state.cfa.offs % sizeof(unsigned long)) {
++ dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
++ return -EIO;
++ }
++ /* update frame */
++#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
++ if(frame->call_frame
++ && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
++ frame->call_frame = 0;
++#endif
++ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
++ startLoc = min((unsigned long)UNW_SP(frame), cfa);
++ endLoc = max((unsigned long)UNW_SP(frame), cfa);
++ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
++ startLoc = min(STACK_LIMIT(cfa), cfa);
++ endLoc = max(STACK_LIMIT(cfa), cfa);
++ }
++#ifndef CONFIG_64BIT
++# define CASES CASE(8); CASE(16); CASE(32)
++#else
++# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
++#endif
++ pc = UNW_PC(frame);
++ sp = UNW_SP(frame);
++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++ if (REG_INVALID(i)) {
++ if (state.regs[i].where == Nowhere)
++ continue;
++ dprintk(1, "Cannot restore register %u (%d).",
++ i, state.regs[i].where);
++ return -EIO;
++ }
++ switch(state.regs[i].where) {
++ default:
++ break;
++ case Register:
++ if (state.regs[i].value >= ARRAY_SIZE(reg_info)
++ || REG_INVALID(state.regs[i].value)
++ || reg_info[i].width > reg_info[state.regs[i].value].width) {
++ dprintk(1, "Cannot restore register %u from register %lu.",
++ i, state.regs[i].value);
++ return -EIO;
++ }
++ switch(reg_info[state.regs[i].value].width) {
++#define CASE(n) \
++ case sizeof(u##n): \
++ state.regs[i].value = FRAME_REG(state.regs[i].value, \
++ const u##n); \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported register size %u (%lu).",
++ reg_info[state.regs[i].value].width,
++ state.regs[i].value);
++ return -EIO;
++ }
++ break;
++ }
++ }
++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++ if (REG_INVALID(i))
++ continue;
++ switch(state.regs[i].where) {
++ case Nowhere:
++ if (reg_info[i].width != sizeof(UNW_SP(frame))
++ || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
++ != &UNW_SP(frame))
++ continue;
++ UNW_SP(frame) = cfa;
++ break;
++ case Register:
++ switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++ FRAME_REG(i, u##n) = state.regs[i].value; \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported register size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ break;
++ case Value:
++ if (reg_info[i].width != sizeof(unsigned long)) {
++ dprintk(1, "Unsupported value size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
++ * state.dataAlign;
++ break;
++ case Memory: {
++ unsigned long addr = cfa + state.regs[i].value
++ * state.dataAlign;
++
++ if ((state.regs[i].value * state.dataAlign)
++ % sizeof(unsigned long)
++ || addr < startLoc
++ || addr + sizeof(unsigned long) < addr
++ || addr + sizeof(unsigned long) > endLoc) {
++ dprintk(1, "Bad memory location %lx (%lx).",
++ addr, state.regs[i].value);
++ return -EIO;
++ }
++ switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++ probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
++ break
++ CASES;
++#undef CASE
++ default:
++ dprintk(1, "Unsupported memory size %u (%u).",
++ reg_info[i].width, i);
++ return -EIO;
++ }
++ }
++ break;
++ }
++ }
++
++ if (UNW_PC(frame) % state.codeAlign
++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++ dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
++ UNW_PC(frame), UNW_SP(frame));
++ return -EIO;
++ }
++ if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
++ dprintk(1, "No progress (%lx,%lx).", pc, sp);
++ return -EIO;
++ }
++
++ return 0;
++#undef CASES
++#undef FRAME_REG
++}
++EXPORT_SYMBOL(unwind);
++
++int unwind_init_frame_info(struct unwind_frame_info *info,
++ struct task_struct *tsk,
++ /*const*/ struct pt_regs *regs)
++{
++ info->task = tsk;
++ info->call_frame = 0;
++ arch_unw_init_frame_info(info, regs);
++
++ return 0;
++}
++EXPORT_SYMBOL(unwind_init_frame_info);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++int unwind_init_blocked(struct unwind_frame_info *info,
++ struct task_struct *tsk)
++{
++ info->task = tsk;
++ info->call_frame = 0;
++ arch_unw_init_blocked(info);
++
++ return 0;
++}
++EXPORT_SYMBOL(unwind_init_blocked);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++int unwind_init_running(struct unwind_frame_info *info,
++ asmlinkage int (*callback)(struct unwind_frame_info *,
++ void *arg),
++ void *arg)
++{
++ info->task = current;
++ info->call_frame = 0;
++
++ return arch_unwind_init_running(info, callback, arg);
++}
++EXPORT_SYMBOL(unwind_init_running);
++
+diff -Nurb linux-2.6.22-570/kernel/user.c linux-2.6.22-591/kernel/user.c
+--- linux-2.6.22-570/kernel/user.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/user.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,17 +14,17 @@
+ #include <linux/bitops.h>
+ #include <linux/key.h>
+ #include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/user_namespace.h>
+
+ /*
+ * UID task count cache, to get fast user lookup in "alloc_uid"
+ * when changing user ID's (ie setuid() and friends).
+ */
+
+-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
+-#define UIDHASH_SZ (1 << UIDHASH_BITS)
+ #define UIDHASH_MASK (UIDHASH_SZ - 1)
+ #define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
+-#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid)))
++#define uidhashentry(ns, xid, uid) ((ns)->uidhash_table + __uidhashfn(xid, uid))
+
+ static struct kmem_cache *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -94,9 +94,10 @@
+ {
+ struct user_struct *ret;
+ unsigned long flags;
++ struct user_namespace *ns = current->nsproxy->user_ns;
+
+ spin_lock_irqsave(&uidhash_lock, flags);
+- ret = uid_hash_find(xid, uid, uidhashentry(xid, uid));
++ ret = uid_hash_find(xid, uid, uidhashentry(ns, xid, uid));
+ spin_unlock_irqrestore(&uidhash_lock, flags);
+ return ret;
+ }
+@@ -122,7 +123,8 @@
+
+ struct user_struct * alloc_uid(xid_t xid, uid_t uid)
+ {
+- struct list_head *hashent = uidhashentry(xid, uid);
++ struct user_namespace *ns = current->nsproxy->user_ns;
++ struct list_head *hashent = uidhashentry(ns,xid, uid);
+ struct user_struct *up;
+
+ spin_lock_irq(&uidhash_lock);
+@@ -212,11 +214,11 @@
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+
+ for(n = 0; n < UIDHASH_SZ; ++n)
+- INIT_LIST_HEAD(uidhash_table + n);
++ INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
+
+ /* Insert the root user immediately (init already runs as root) */
+ spin_lock_irq(&uidhash_lock);
+- uid_hash_insert(&root_user, uidhashentry(0,0));
++ uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0, 0));
+ spin_unlock_irq(&uidhash_lock);
+
+ return 0;
+diff -Nurb linux-2.6.22-570/kernel/user_namespace.c linux-2.6.22-591/kernel/user_namespace.c
+--- linux-2.6.22-570/kernel/user_namespace.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/kernel/user_namespace.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,87 @@
++/*
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation, version 2 of the
++ * License.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/nsproxy.h>
++#include <linux/user_namespace.h>
++
++struct user_namespace init_user_ns = {
++ .kref = {
++ .refcount = ATOMIC_INIT(2),
++ },
++ .root_user = &root_user,
++};
++
++EXPORT_SYMBOL_GPL(init_user_ns);
++
++#ifdef CONFIG_USER_NS
++
++/*
++ * Clone a new ns copying an original user ns, setting refcount to 1
++ * @old_ns: namespace to clone
++ * Return NULL on error (failure to kmalloc), new ns otherwise
++ */
++static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
++{
++ struct user_namespace *ns;
++ struct user_struct *new_user;
++ int n;
++
++ ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
++ if (!ns)
++ return ERR_PTR(-ENOMEM);
++
++ kref_init(&ns->kref);
++
++ for (n = 0; n < UIDHASH_SZ; ++n)
++ INIT_LIST_HEAD(ns->uidhash_table + n);
++
++ /* Insert new root user. */
++ ns->root_user = alloc_uid(ns, 0);
++ if (!ns->root_user) {
++ kfree(ns);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ /* Reset current->user with a new one */
++ new_user = alloc_uid(ns, current->uid);
++ if (!new_user) {
++ free_uid(ns->root_user);
++ kfree(ns);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ switch_uid(new_user);
++ return ns;
++}
++
++struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
++{
++ struct user_namespace *new_ns;
++
++ BUG_ON(!old_ns);
++ get_user_ns(old_ns);
++
++ if (!(flags & CLONE_NEWUSER))
++ return old_ns;
++
++ new_ns = clone_user_ns(old_ns);
++
++ put_user_ns(old_ns);
++ return new_ns;
++}
++
++void free_user_ns(struct kref *kref)
++{
++ struct user_namespace *ns;
++
++ ns = container_of(kref, struct user_namespace, kref);
++ kfree(ns);
++}
++
++#endif /* CONFIG_USER_NS */
+diff -Nurb linux-2.6.22-570/kernel/utsname.c linux-2.6.22-591/kernel/utsname.c
+--- linux-2.6.22-570/kernel/utsname.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/kernel/utsname.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/utsname.h>
+ #include <linux/version.h>
+ #include <linux/vserver/global.h>
++#include <linux/err.h>
+
+ /*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+@@ -25,11 +26,12 @@
+ struct uts_namespace *ns;
+
+ ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+- if (ns) {
++ if (!ns)
++ return ERR_PTR(-ENOMEM);
++
+ memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ kref_init(&ns->kref);
+- atomic_inc(&vs_global_uts_ns);
+- }
++
+ return ns;
+ }
+
+@@ -39,7 +41,7 @@
+ * utsname of this process won't be seen by parent, and vice
+ * versa.
+ */
+-struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
++struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
+ {
+ struct uts_namespace *new_ns;
+
+diff -Nurb linux-2.6.22-570/kernel/utsname_sysctl.c linux-2.6.22-591/kernel/utsname_sysctl.c
+--- linux-2.6.22-570/kernel/utsname_sysctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/kernel/utsname_sysctl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -18,10 +18,7 @@
+ static void *get_uts(ctl_table *table, int write)
+ {
+ char *which = table->data;
+-#ifdef CONFIG_UTS_NS
+- struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+- which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
+-#endif
++
+ if (!write)
+ down_read(&uts_sem);
+ else
+diff -Nurb linux-2.6.22-570/kernel/workqueue.c linux-2.6.22-591/kernel/workqueue.c
+--- linux-2.6.22-570/kernel/workqueue.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/kernel/workqueue.c 2007-12-21 15:36:12.000000000 -0500
+@@ -282,8 +282,8 @@
+ struct cpu_workqueue_struct *cwq = __cwq;
+ DEFINE_WAIT(wait);
+
+- if (!cwq->wq->freezeable)
+- current->flags |= PF_NOFREEZE;
++ if (cwq->wq->freezeable)
++ set_freezable();
+
+ set_user_nice(current, -5);
+
+diff -Nurb linux-2.6.22-570/lib/Kconfig.debug linux-2.6.22-591/lib/Kconfig.debug
+--- linux-2.6.22-570/lib/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/Kconfig.debug 2007-12-21 15:36:12.000000000 -0500
+@@ -364,6 +364,24 @@
+ some architectures or if you use external debuggers.
+ If you don't debug the kernel, you can say N.
+
++config UNWIND_INFO
++ bool "Compile the kernel with frame unwind information"
++ depends on !IA64 && !PARISC && !ARM
++ depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
++ help
++ If you say Y here the resulting kernel image will be slightly larger
++ but not slower, and it will give very useful debugging information.
++ If you don't debug the kernel, you can say N, but we may not be able
++ to solve problems without frame unwind information or frame pointers.
++
++config STACK_UNWIND
++ bool "Stack unwind support"
++ depends on UNWIND_INFO
++ depends on X86
++ help
++ This enables more precise stack traces, omitting all unrelated
++ occurrences of pointers into kernel code from the dump.
++
+ config FORCED_INLINING
+ bool "Force gcc to inline functions marked 'inline'"
+ depends on DEBUG_KERNEL
+@@ -409,6 +427,9 @@
+ config FAULT_INJECTION
+ bool "Fault-injection framework"
+ depends on DEBUG_KERNEL
++ # could support fp on X86_32 here too, but let's not
++ select UNWIND_INFO if X86
++ select STACK_UNWIND if X86
+ help
+ Provide fault-injection framework.
+ For more details, see Documentation/fault-injection/.
+@@ -445,3 +466,5 @@
+ select FRAME_POINTER
+ help
+ Provide stacktrace filter for fault-injection capabilities
++
++source "lib/Kconfig.kgdb"
+diff -Nurb linux-2.6.22-570/lib/Kconfig.kgdb linux-2.6.22-591/lib/Kconfig.kgdb
+--- linux-2.6.22-570/lib/Kconfig.kgdb 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/lib/Kconfig.kgdb 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,255 @@
++
++config WANT_EXTRA_DEBUG_INFORMATION
++ bool
++ select DEBUG_INFO
++ select UNWIND_INFO
++ select FRAME_POINTER if X86 || SUPERH
++ default n
++
++config UNWIND_INFO
++ bool
++ default n
++
++config KGDB
++ bool "KGDB: kernel debugging with remote gdb"
++ select WANT_EXTRA_DEBUG_INFORMATION
++ select KGDB_ARCH_HAS_SHADOW_INFO if X86_64
++ depends on DEBUG_KERNEL && (ARM || X86 || MIPS || (SUPERH && !SUPERH64) || IA64 || PPC)
++ help
++ If you say Y here, it will be possible to remotely debug the
++ kernel using gdb. Documentation of kernel debugger is available
++ at http://kgdb.sourceforge.net as well as in DocBook form
++ in Documentation/DocBook/. If unsure, say N.
++
++config KGDB_ARCH_HAS_SHADOW_INFO
++ bool
++
++config KGDB_CONSOLE
++ bool "KGDB: Console messages through gdb"
++ depends on KGDB
++ help
++ If you say Y here, console messages will appear through gdb.
++ Other consoles such as tty or ttyS will continue to work as usual.
++ Note, that if you use this in conjunction with KGDB_ETH, if the
++ ethernet driver runs into an error condition during use with KGDB
++ it is possible to hit an infinite recusrion, causing the kernel
++ to crash, and typically reboot. For this reason, it is preferable
++ to use NETCONSOLE in conjunction with KGDB_ETH instead of
++ KGDB_CONSOLE.
++
++choice
++ prompt "Method for KGDB communication"
++ depends on KGDB
++ default KGDB_MPSC if SERIAL_MPSC
++ default KGDB_CPM_UART if (CPM2 || 8xx)
++ default KGDB_SIBYTE if SIBYTE_SB1xxx_SOC
++ default KGDB_TXX9 if CPU_TX49XX
++ default KGDB_SH_SCI if SERIAL_SH_SCI
++ default KGDB_PXA_SERIAL if ARCH_PXA
++ default KGDB_AMBA_PL011 if ARM_AMBA
++ default KGDB_8250_NOMODULE
++ help
++ There are a number of different ways in which you can communicate
++ with KGDB. The most common is via serial, with the 8250 driver
++ (should your hardware have an 8250, or ns1655x style uart).
++ Another option is to use the NETPOLL framework and UDP, should
++ your ethernet card support this. Other options may exist.
++ You can elect to have one core I/O driver that is built into the
++ kernel for debugging as the kernel is booting, or using only
++ kernel modules.
++
++config KGDB_ONLY_MODULES
++ bool "KGDB: Use only kernel modules for I/O"
++ depends on MODULES
++ help
++ Use only kernel modules to configure KGDB I/O after the
++ kernel is booted.
++
++config KGDB_8250_NOMODULE
++ bool "KGDB: On generic serial port (8250)"
++ select KGDB_8250
++ help
++ Uses generic serial port (8250) to communicate with the host
++ GDB. This is independent of the normal (SERIAL_8250) driver
++ for this chipset.
++
++config KGDBOE_NOMODULE
++ bool "KGDB: On ethernet - in kernel"
++ select KGDBOE
++ help
++ Uses the NETPOLL API to communicate with the host GDB via UDP.
++ In order for this to work, the ethernet interface specified must
++ support the NETPOLL API, and this must be initialized at boot.
++ See the documentation for syntax.
++
++config KGDB_MPSC
++ bool "KGDB: On MV64x60 MPSC"
++ depends on SERIAL_MPSC
++ help
++ Uses a Marvell GT64260B or MV64x60 Multi-Purpose Serial
++ Controller (MPSC) channel. Note that the GT64260A is not
++ supported.
++
++config KGDB_CPM_UART
++ bool "KGDB: On CPM UART"
++ depends on PPC && (CPM2 || 8xx)
++ help
++ Uses CPM UART to communicate with the host GDB.
++
++config KGDB_SIBYTE
++ bool "KGDB: On Broadcom SB1xxx serial port"
++ depends on MIPS && SIBYTE_SB1xxx_SOC
++
++config KGDB_TXX9
++ bool "KGDB: On TX49xx serial port"
++ depends on MIPS && CPU_TX49XX
++ help
++ Uses TX49xx serial port to communicate with the host KGDB.
++
++config KGDB_SH_SCI
++ bool "KGDB: On SH SCI(F) serial port"
++ depends on SUPERH && SERIAL_SH_SCI
++ help
++ Uses the SH SCI(F) serial port to communicate with the host GDB.
++
++config KGDB_AMBA_PL011
++ bool "KGDB: On ARM AMBA PL011 Serial Port"
++ depends on ARM && ARCH_VERSATILE
++ help
++ Enables the KGDB serial driver for the AMBA bus PL011 serial
++ devices from ARM.
++
++config KGDB_PXA_SERIAL
++ bool "KGDB: On the PXA2xx serial port"
++ depends on ARCH_PXA
++ help
++ Enables the KGDB serial driver for Intel PXA SOC
++endchoice
++
++choice
++ prompt "PXA UART to use for KGDB"
++ depends on KGDB_PXA_SERIAL
++ default KGDB_PXA_FFUART
++
++config KGDB_PXA_FFUART
++ bool "FFUART"
++
++config KGDB_PXA_BTUART
++ bool "BTUART"
++
++config KGDB_PXA_STUART
++ bool "STUART"
++endchoice
++
++choice
++ prompt "SCC/SMC to use for KGDB"
++ depends on KGDB_CPM_UART
++ default KGDB_CPM_UART_SCC4 if ADS8272
++
++config KGDB_CPM_UART_SCC1
++ bool "SCC1"
++ depends on SERIAL_CPM_SCC1
++
++config KGDB_CPM_UART_SCC2
++ bool "SCC2"
++ depends on SERIAL_CPM_SCC2
++
++config KGDB_CPM_UART_SCC3
++ bool "SCC3"
++ depends on SERIAL_CPM_SCC3
++
++config KGDB_CPM_UART_SCC4
++ bool "SCC4"
++ depends on SERIAL_CPM_SCC4
++
++config KGDB_CPM_UART_SMC1
++ bool "SMC1"
++ depends on SERIAL_CPM_SMC1
++
++config KGDB_CPM_UART_SMC2
++ bool "SMC2"
++ depends on SERIAL_CPM_SMC2
++endchoice
++
++config KGDBOE
++ tristate "KGDB: On ethernet" if !KGDBOE_NOMODULE
++ depends on m && KGDB
++ select NETPOLL
++ select NETPOLL_TRAP
++ help
++ Uses the NETPOLL API to communicate with the host GDB via UDP.
++ In order for this to work, the ethernet interface specified must
++ support the NETPOLL API, and this must be initialized at boot.
++ See the documentation for syntax.
++
++config KGDB_8250
++ tristate "KGDB: On generic serial port (8250)" if !KGDB_8250_NOMODULE
++ depends on m && KGDB_ONLY_MODULES
++ help
++ Uses generic serial port (8250) to communicate with the host
++ GDB. This is independent of the normal (SERIAL_8250) driver
++ for this chipset.
++
++config KGDB_SIMPLE_SERIAL
++ bool "Simple selection of KGDB serial port"
++ depends on KGDB_8250_NOMODULE
++ default y
++ help
++ If you say Y here, you will only have to pick the baud rate
++ and port number that you wish to use for KGDB. Note that this
++ only works on architectures that register known serial ports
++ early on. If you say N, you will have to provide, either here
++ or on the command line, the type (I/O or MMIO), IRQ and
++ address to use. If in doubt, say Y.
++
++config KGDB_BAUDRATE
++ int "Debug serial port baud rate"
++ depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || \
++ KGDB_MPSC || KGDB_CPM_UART || \
++ KGDB_TXX9 || KGDB_PXA_SERIAL || KGDB_AMBA_PL011
++ default "115200"
++ help
++ gdb and the kernel stub need to agree on the baud rate to be
++ used. Standard rates from 9600 to 115200 are allowed, and this
++ may be overridden via the commandline.
++
++config KGDB_PORT_NUM
++ int "Serial port number for KGDB"
++ range 0 1 if KGDB_MPSC
++ range 0 3
++ depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || KGDB_MPSC || KGDB_TXX9
++ default "1"
++ help
++ Pick the port number (0 based) for KGDB to use.
++
++config KGDB_AMBA_BASE
++ hex "AMBA PL011 Serial Port Base Address"
++ default 0x101f2000 if ARCH_VERSATILE
++ depends on KGDB_AMBA_PL011
++ help
++ Base address of the AMBA port that KGDB will use.
++
++config KGDB_AMBA_UARTCLK
++ int "AMBAPL011 Serial UART Clock Frequency"
++ default 24000000 if ARCH_VERSATILE
++ depends on KGDB_AMBA_PL011
++ help
++ Frequency (in HZ) of the ARM AMBA UART clock
++
++config KGDB_AMBA_IRQ
++ int "AMBA PL011 Serial Port IRQ"
++ default 13 if ARCH_VERSATILE
++ depends on KGDB_AMBA_PL011
++ help
++ Pick the IRQ of the AMBA port that KGDB will use.
++
++config KGDB_8250_CONF_STRING
++ string "Configuration string for KGDB"
++ depends on KGDB_8250_NOMODULE && !KGDB_SIMPLE_SERIAL
++ default "io,2f8,115200,3" if X86
++ help
++ The format of this string should be <io or
++ mmio>,<address>,<baud rate>,<irq>. For example, to use the
++ serial port on an i386 box located at 0x2f8 and 115200 baud
++ on IRQ 3 at use:
++ io,2f8,115200,3
+diff -Nurb linux-2.6.22-570/lib/Makefile linux-2.6.22-591/lib/Makefile
+--- linux-2.6.22-570/lib/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -5,9 +5,10 @@
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+ rbtree.o radix-tree.o dump_stack.o \
+ idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+- sha1.o irq_regs.o reciprocal_div.o
++ sha1.o irq_regs.o reciprocal_div.o argv_split.o \
++ check_signature.o
+
+-lib-$(CONFIG_MMU) += ioremap.o
++lib-$(CONFIG_MMU) += ioremap.o pagewalk.o
+ lib-$(CONFIG_SMP) += cpumask.o
+
+ lib-y += kobject.o kref.o kobject_uevent.o klist.o
+diff -Nurb linux-2.6.22-570/lib/argv_split.c linux-2.6.22-591/lib/argv_split.c
+--- linux-2.6.22-570/lib/argv_split.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/lib/argv_split.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,159 @@
++/*
++ * Helper function for splitting a string into an argv-like array.
++ */
++
++#ifndef TEST
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/bug.h>
++#endif
++
++static const char *skip_sep(const char *cp)
++{
++ while (*cp && isspace(*cp))
++ cp++;
++
++ return cp;
++}
++
++static const char *skip_arg(const char *cp)
++{
++ while (*cp && !isspace(*cp))
++ cp++;
++
++ return cp;
++}
++
++static int count_argc(const char *str)
++{
++ int count = 0;
++
++ while (*str) {
++ str = skip_sep(str);
++ if (*str) {
++ count++;
++ str = skip_arg(str);
++ }
++ }
++
++ return count;
++}
++
++/**
++ * argv_free - free an argv
++ *
++ * @argv - the argument vector to be freed
++ *
++ * Frees an argv and the strings it points to.
++ */
++void argv_free(char **argv)
++{
++ char **p;
++ for (p = argv; *p; p++)
++ kfree(*p);
++
++ kfree(argv);
++}
++EXPORT_SYMBOL(argv_free);
++
++/**
++ * argv_split - split a string at whitespace, returning an argv
++ * @gfp: the GFP mask used to allocate memory
++ * @str: the string to be split
++ * @argcp: returned argument count
++ *
++ * Returns an array of pointers to strings which are split out from
++ * @str. This is performed by strictly splitting on white-space; no
++ * quote processing is performed. Multiple whitespace characters are
++ * considered to be a single argument separator. The returned array
++ * is always NULL-terminated. Returns NULL on memory allocation
++ * failure.
++ */
++char **argv_split(gfp_t gfp, const char *str, int *argcp)
++{
++ int argc = count_argc(str);
++ char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp);
++ char **argvp;
++
++ if (argv == NULL)
++ goto out;
++
++ *argcp = argc;
++ argvp = argv;
++
++ while (*str) {
++ str = skip_sep(str);
++
++ if (*str) {
++ const char *p = str;
++ char *t;
++
++ str = skip_arg(str);
++
++ t = kstrndup(p, str-p, gfp);
++ if (t == NULL)
++ goto fail;
++ *argvp++ = t;
++ }
++ }
++ *argvp = NULL;
++
++ out:
++ return argv;
++
++ fail:
++ argv_free(argv);
++ return NULL;
++}
++EXPORT_SYMBOL(argv_split);
++
++#ifdef TEST
++#define _GNU_SOURCE
++#include <ctype.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <string.h>
++
++typedef enum {
++ GFP_KERNEL,
++} gfp_t;
++#define kzalloc(size, x) malloc(size)
++#define kfree(x) free(x)
++#define kstrndup(s, n, gfp) strndup(s, n)
++#define BUG() abort()
++
++int main() {
++ const char *testvec[] = {
++ "",
++ "x",
++ "\"",
++ "\\\0",
++ "\"",
++ "test one two three",
++ "arg\"foo\"bar biff",
++ "one two\\ three four",
++ "one \"two three\" four",
++ NULL,
++ };
++ const char **t;
++
++ for (t = testvec; *t; t++) {
++ char **argv;
++ int argc;
++ char **a;
++
++ printf("%d: test [%s]\n", t-testvec, *t);
++
++ argv = argv_split(GFP_KERNEL, *t, &argc);
++
++ printf("argc=%d vec=", argc);
++ for (a = argv; *a; a++)
++ printf("[%s] ", *a);
++ printf("\n");
++
++ argv_free(argv);
++ }
++
++ return 0;
++}
++#endif
+diff -Nurb linux-2.6.22-570/lib/check_signature.c linux-2.6.22-591/lib/check_signature.c
+--- linux-2.6.22-570/lib/check_signature.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/lib/check_signature.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,26 @@
++#include <linux/io.h>
++#include <linux/module.h>
++
++/**
++ * check_signature - find BIOS signatures
++ * @io_addr: mmio address to check
++ * @signature: signature block
++ * @length: length of signature
++ *
++ * Perform a signature comparison with the mmio address io_addr. This
++ * address should have been obtained by ioremap.
++ * Returns 1 on a match.
++ */
++
++int check_signature(const volatile void __iomem *io_addr,
++ const unsigned char *signature, int length)
++{
++ while (length--) {
++ if (readb(io_addr) != *signature)
++ return 0;
++ io_addr++;
++ signature++;
++ }
++ return 1;
++}
++EXPORT_SYMBOL(check_signature);
+diff -Nurb linux-2.6.22-570/lib/idr.c linux-2.6.22-591/lib/idr.c
+--- linux-2.6.22-570/lib/idr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/idr.c 2007-12-21 15:36:15.000000000 -0500
+@@ -70,6 +70,26 @@
+ spin_unlock_irqrestore(&idp->lock, flags);
+ }
+
++static void idr_mark_full(struct idr_layer **pa, int id)
++{
++ struct idr_layer *p = pa[0];
++ int l = 0;
++
++ __set_bit(id & IDR_MASK, &p->bitmap);
++ /*
++ * If this layer is full mark the bit in the layer above to
++ * show that this part of the radix tree is full. This may
++ * complete the layer above and require walking up the radix
++ * tree.
++ */
++ while (p->bitmap == IDR_FULL) {
++ if (!(p = pa[++l]))
++ break;
++ id = id >> IDR_BITS;
++ __set_bit((id & IDR_MASK), &p->bitmap);
++ }
++}
++
+ /**
+ * idr_pre_get - reserver resources for idr allocation
+ * @idp: idr handle
+@@ -95,11 +115,10 @@
+ }
+ EXPORT_SYMBOL(idr_pre_get);
+
+-static int sub_alloc(struct idr *idp, void *ptr, int *starting_id)
++static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
+ {
+ int n, m, sh;
+ struct idr_layer *p, *new;
+- struct idr_layer *pa[MAX_LEVEL];
+ int l, id;
+ long bm;
+
+@@ -144,30 +163,13 @@
+ pa[l--] = p;
+ p = p->ary[m];
+ }
+- /*
+- * We have reached the leaf node, plant the
+- * users pointer and return the raw id.
+- */
+- p->ary[m] = (struct idr_layer *)ptr;
+- __set_bit(m, &p->bitmap);
+- p->count++;
+- /*
+- * If this layer is full mark the bit in the layer above
+- * to show that this part of the radix tree is full.
+- * This may complete the layer above and require walking
+- * up the radix tree.
+- */
+- n = id;
+- while (p->bitmap == IDR_FULL) {
+- if (!(p = pa[++l]))
+- break;
+- n = n >> IDR_BITS;
+- __set_bit((n & IDR_MASK), &p->bitmap);
+- }
+- return(id);
++
++ pa[l] = p;
++ return id;
+ }
+
+-static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
++static int idr_get_empty_slot(struct idr *idp, int starting_id,
++ struct idr_layer **pa)
+ {
+ struct idr_layer *p, *new;
+ int layers, v, id;
+@@ -213,12 +215,31 @@
+ }
+ idp->top = p;
+ idp->layers = layers;
+- v = sub_alloc(idp, ptr, &id);
++ v = sub_alloc(idp, &id, pa);
+ if (v == -2)
+ goto build_up;
+ return(v);
+ }
+
++static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
++{
++ struct idr_layer *pa[MAX_LEVEL];
++ int id;
++
++ id = idr_get_empty_slot(idp, starting_id, pa);
++ if (id >= 0) {
++ /*
++ * Successfully found an empty slot. Install the user
++ * pointer and mark the slot full.
++ */
++ pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
++ pa[0]->count++;
++ idr_mark_full(pa, id);
++ }
++
++ return id;
++}
++
+ /**
+ * idr_get_new_above - allocate new idr entry above or equal to a start id
+ * @idp: idr handle
+@@ -473,3 +494,248 @@
+ spin_lock_init(&idp->lock);
+ }
+ EXPORT_SYMBOL(idr_init);
++
++
++/*
++ * IDA - IDR based ID allocator
++ *
++ * this is id allocator without id -> pointer translation. Memory
++ * usage is much lower than full blown idr because each id only
++ * occupies a bit. ida uses a custom leaf node which contains
++ * IDA_BITMAP_BITS slots.
++ *
++ * 2007-04-25 written by Tejun Heo <htejun@gmail.com>
++ */
++
++static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
++{
++ unsigned long flags;
++
++ if (!ida->free_bitmap) {
++ spin_lock_irqsave(&ida->idr.lock, flags);
++ if (!ida->free_bitmap) {
++ ida->free_bitmap = bitmap;
++ bitmap = NULL;
++ }
++ spin_unlock_irqrestore(&ida->idr.lock, flags);
++ }
++
++ kfree(bitmap);
++}
++
++/**
++ * ida_pre_get - reserve resources for ida allocation
++ * @ida: ida handle
++ * @gfp_mask: memory allocation flag
++ *
++ * This function should be called prior to locking and calling the
++ * following function. It preallocates enough memory to satisfy the
++ * worst possible allocation.
++ *
++ * If the system is REALLY out of memory this function returns 0,
++ * otherwise 1.
++ */
++int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
++{
++ /* allocate idr_layers */
++ if (!idr_pre_get(&ida->idr, gfp_mask))
++ return 0;
++
++ /* allocate free_bitmap */
++ if (!ida->free_bitmap) {
++ struct ida_bitmap *bitmap;
++
++ bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
++ if (!bitmap)
++ return 0;
++
++ free_bitmap(ida, bitmap);
++ }
++
++ return 1;
++}
++EXPORT_SYMBOL(ida_pre_get);
++
++/**
++ * ida_get_new_above - allocate new ID above or equal to a start id
++ * @ida: ida handle
++ * @staring_id: id to start search at
++ * @p_id: pointer to the allocated handle
++ *
++ * Allocate new ID above or equal to @ida. It should be called with
++ * any required locks.
++ *
++ * If memory is required, it will return -EAGAIN, you should unlock
++ * and go back to the ida_pre_get() call. If the ida is full, it will
++ * return -ENOSPC.
++ *
++ * @p_id returns a value in the range 0 ... 0x7fffffff.
++ */
++int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
++{
++ struct idr_layer *pa[MAX_LEVEL];
++ struct ida_bitmap *bitmap;
++ unsigned long flags;
++ int idr_id = starting_id / IDA_BITMAP_BITS;
++ int offset = starting_id % IDA_BITMAP_BITS;
++ int t, id;
++
++ restart:
++ /* get vacant slot */
++ t = idr_get_empty_slot(&ida->idr, idr_id, pa);
++ if (t < 0) {
++ if (t == -1)
++ return -EAGAIN;
++ else /* will be -3 */
++ return -ENOSPC;
++ }
++
++ if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
++ return -ENOSPC;
++
++ if (t != idr_id)
++ offset = 0;
++ idr_id = t;
++
++ /* if bitmap isn't there, create a new one */
++ bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
++ if (!bitmap) {
++ spin_lock_irqsave(&ida->idr.lock, flags);
++ bitmap = ida->free_bitmap;
++ ida->free_bitmap = NULL;
++ spin_unlock_irqrestore(&ida->idr.lock, flags);
++
++ if (!bitmap)
++ return -EAGAIN;
++
++ memset(bitmap, 0, sizeof(struct ida_bitmap));
++ pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
++ pa[0]->count++;
++ }
++
++ /* lookup for empty slot */
++ t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
++ if (t == IDA_BITMAP_BITS) {
++ /* no empty slot after offset, continue to the next chunk */
++ idr_id++;
++ offset = 0;
++ goto restart;
++ }
++
++ id = idr_id * IDA_BITMAP_BITS + t;
++ if (id >= MAX_ID_BIT)
++ return -ENOSPC;
++
++ __set_bit(t, bitmap->bitmap);
++ if (++bitmap->nr_busy == IDA_BITMAP_BITS)
++ idr_mark_full(pa, idr_id);
++
++ *p_id = id;
++
++ /* Each leaf node can handle nearly a thousand slots and the
++ * whole idea of ida is to have small memory foot print.
++ * Throw away extra resources one by one after each successful
++ * allocation.
++ */
++ if (ida->idr.id_free_cnt || ida->free_bitmap) {
++ struct idr_layer *p = alloc_layer(&ida->idr);
++ if (p)
++ kmem_cache_free(idr_layer_cache, p);
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL(ida_get_new_above);
++
++/**
++ * ida_get_new - allocate new ID
++ * @ida: idr handle
++ * @p_id: pointer to the allocated handle
++ *
++ * Allocate new ID. It should be called with any required locks.
++ *
++ * If memory is required, it will return -EAGAIN, you should unlock
++ * and go back to the idr_pre_get() call. If the idr is full, it will
++ * return -ENOSPC.
++ *
++ * @id returns a value in the range 0 ... 0x7fffffff.
++ */
++int ida_get_new(struct ida *ida, int *p_id)
++{
++ return ida_get_new_above(ida, 0, p_id);
++}
++EXPORT_SYMBOL(ida_get_new);
++
++/**
++ * ida_remove - remove the given ID
++ * @ida: ida handle
++ * @id: ID to free
++ */
++void ida_remove(struct ida *ida, int id)
++{
++ struct idr_layer *p = ida->idr.top;
++ int shift = (ida->idr.layers - 1) * IDR_BITS;
++ int idr_id = id / IDA_BITMAP_BITS;
++ int offset = id % IDA_BITMAP_BITS;
++ int n;
++ struct ida_bitmap *bitmap;
++
++ /* clear full bits while looking up the leaf idr_layer */
++ while ((shift > 0) && p) {
++ n = (idr_id >> shift) & IDR_MASK;
++ __clear_bit(n, &p->bitmap);
++ p = p->ary[n];
++ shift -= IDR_BITS;
++ }
++
++ if (p == NULL)
++ goto err;
++
++ n = idr_id & IDR_MASK;
++ __clear_bit(n, &p->bitmap);
++
++ bitmap = (void *)p->ary[n];
++ if (!test_bit(offset, bitmap->bitmap))
++ goto err;
++
++ /* update bitmap and remove it if empty */
++ __clear_bit(offset, bitmap->bitmap);
++ if (--bitmap->nr_busy == 0) {
++ __set_bit(n, &p->bitmap); /* to please idr_remove() */
++ idr_remove(&ida->idr, idr_id);
++ free_bitmap(ida, bitmap);
++ }
++
++ return;
++
++ err:
++ printk(KERN_WARNING
++ "ida_remove called for id=%d which is not allocated.\n", id);
++}
++EXPORT_SYMBOL(ida_remove);
++
++/**
++ * ida_destroy - release all cached layers within an ida tree
++ * ida: ida handle
++ */
++void ida_destroy(struct ida *ida)
++{
++ idr_destroy(&ida->idr);
++ kfree(ida->free_bitmap);
++}
++EXPORT_SYMBOL(ida_destroy);
++
++/**
++ * ida_init - initialize ida handle
++ * @ida: ida handle
++ *
++ * This function is use to set up the handle (@ida) that you will pass
++ * to the rest of the functions.
++ */
++void ida_init(struct ida *ida)
++{
++ memset(ida, 0, sizeof(struct ida));
++ idr_init(&ida->idr);
++
++}
++EXPORT_SYMBOL(ida_init);
+diff -Nurb linux-2.6.22-570/lib/kobject.c linux-2.6.22-591/lib/kobject.c
+--- linux-2.6.22-570/lib/kobject.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/kobject.c 2007-12-21 15:36:15.000000000 -0500
+@@ -44,11 +44,11 @@
+ return error;
+ }
+
+-static int create_dir(struct kobject * kobj, struct dentry *shadow_parent)
++static int create_dir(struct kobject * kobj)
+ {
+ int error = 0;
+ if (kobject_name(kobj)) {
+- error = sysfs_create_dir(kobj, shadow_parent);
++ error = sysfs_create_dir(kobj);
+ if (!error) {
+ if ((error = populate_dir(kobj)))
+ sysfs_remove_dir(kobj);
+@@ -157,12 +157,11 @@
+ }
+
+ /**
+- * kobject_shadow_add - add an object to the hierarchy.
++ * kobject_add - add an object to the hierarchy.
+ * @kobj: object.
+- * @shadow_parent: sysfs directory to add to.
+ */
+
+-int kobject_shadow_add(struct kobject * kobj, struct dentry *shadow_parent)
++int kobject_add(struct kobject * kobj)
+ {
+ int error = 0;
+ struct kobject * parent;
+@@ -194,7 +193,7 @@
+ kobj->parent = parent;
+ }
+
+- error = create_dir(kobj, shadow_parent);
++ error = create_dir(kobj);
+ if (error) {
+ /* unlink does the kobject_put() for us */
+ unlink(kobj);
+@@ -216,16 +215,6 @@
+ }
+
+ /**
+- * kobject_add - add an object to the hierarchy.
+- * @kobj: object.
+- */
+-int kobject_add(struct kobject * kobj)
+-{
+- return kobject_shadow_add(kobj, NULL);
+-}
+-
+-
+-/**
+ * kobject_register - initialize and add an object.
+ * @kobj: object in question.
+ */
+@@ -338,7 +327,7 @@
+ /* Note : if we want to send the new name alone, not the full path,
+ * we could probably use kobject_name(kobj); */
+
+- error = sysfs_rename_dir(kobj, kobj->parent->dentry, new_name);
++ error = sysfs_rename_dir(kobj, new_name);
+
+ /* This function is mostly/only used for network interface.
+ * Some hotplug package track interfaces by their name and
+@@ -355,27 +344,6 @@
+ }
+
+ /**
+- * kobject_rename - change the name of an object
+- * @kobj: object in question.
+- * @new_parent: object's new parent
+- * @new_name: object's new name
+- */
+-
+-int kobject_shadow_rename(struct kobject * kobj, struct dentry *new_parent,
+- const char *new_name)
+-{
+- int error = 0;
+-
+- kobj = kobject_get(kobj);
+- if (!kobj)
+- return -EINVAL;
+- error = sysfs_rename_dir(kobj, new_parent, new_name);
+- kobject_put(kobj);
+-
+- return error;
+-}
+-
+-/**
+ * kobject_move - move object to another parent
+ * @kobj: object in question.
+ * @new_parent: object's new parent (can be NULL)
+diff -Nurb linux-2.6.22-570/lib/kobject_uevent.c linux-2.6.22-591/lib/kobject_uevent.c
+--- linux-2.6.22-570/lib/kobject_uevent.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/kobject_uevent.c 2007-12-21 15:36:15.000000000 -0500
+@@ -208,7 +208,7 @@
+ argv [0] = uevent_helper;
+ argv [1] = (char *)subsystem;
+ argv [2] = NULL;
+- call_usermodehelper (argv[0], argv, envp, 0);
++ call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC);
+ }
+
+ exit:
+@@ -290,9 +290,8 @@
+ #if defined(CONFIG_NET)
+ static int __init kobject_uevent_init(void)
+ {
+- uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
+- NULL, THIS_MODULE);
+-
++ uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
++ 1, NULL, NULL, THIS_MODULE);
+ if (!uevent_sock) {
+ printk(KERN_ERR
+ "kobject_uevent: unable to create netlink socket!\n");
+diff -Nurb linux-2.6.22-570/lib/pagewalk.c linux-2.6.22-591/lib/pagewalk.c
+--- linux-2.6.22-570/lib/pagewalk.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/lib/pagewalk.c 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,112 @@
++#include <linux/mm.h>
++#include <linux/highmem.h>
++
++static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
++ struct mm_walk *walk, void *private)
++{
++ pte_t *pte;
++ int err;
++
++ for (pte = pte_offset_map(pmd, addr); addr != end;
++ addr += PAGE_SIZE, pte++) {
++ if (pte_none(*pte))
++ continue;
++ err = walk->pte_entry(pte, addr, addr, private);
++ if (err) {
++ pte_unmap(pte);
++ return err;
++ }
++ }
++ pte_unmap(pte);
++ return 0;
++}
++
++static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
++ struct mm_walk *walk, void *private)
++{
++ pmd_t *pmd;
++ unsigned long next;
++ int err;
++
++ for (pmd = pmd_offset(pud, addr); addr != end;
++ pmd++, addr = next) {
++ next = pmd_addr_end(addr, end);
++ if (pmd_none_or_clear_bad(pmd))
++ continue;
++ if (walk->pmd_entry) {
++ err = walk->pmd_entry(pmd, addr, next, private);
++ if (err)
++ return err;
++ }
++ if (walk->pte_entry) {
++ err = walk_pte_range(pmd, addr, next, walk, private);
++ if (err)
++ return err;
++ }
++ }
++ return 0;
++}
++
++static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
++ struct mm_walk *walk, void *private)
++{
++ pud_t *pud;
++ unsigned long next;
++ int err;
++
++ for (pud = pud_offset(pgd, addr); addr != end;
++ pud++, addr = next) {
++ next = pud_addr_end(addr, end);
++ if (pud_none_or_clear_bad(pud))
++ continue;
++ if (walk->pud_entry) {
++ err = walk->pud_entry(pud, addr, next, private);
++ if (err)
++ return err;
++ }
++ if (walk->pmd_entry || walk->pte_entry) {
++ err = walk_pmd_range(pud, addr, next, walk, private);
++ if (err)
++ return err;
++ }
++ }
++ return 0;
++}
++
++/*
++ * walk_page_range - walk a memory map's page tables with a callback
++ * @mm - memory map to walk
++ * @addr - starting address
++ * @end - ending address
++ * @walk - set of callbacks to invoke for each level of the tree
++ * @private - private data passed to the callback function
++ *
++ * Recursively walk the page table for the memory area in a VMA, calling
++ * a callback for every bottom-level (PTE) page table.
++ */
++int walk_page_range(struct mm_struct *mm,
++ unsigned long addr, unsigned long end,
++ struct mm_walk *walk, void *private)
++{
++ pgd_t *pgd;
++ unsigned long next;
++ int err;
++
++ for (pgd = pgd_offset(mm, addr); addr != end;
++ pgd++, addr = next) {
++ next = pgd_addr_end(addr, end);
++ if (pgd_none_or_clear_bad(pgd))
++ continue;
++ if (walk->pgd_entry) {
++ err = walk->pgd_entry(pgd, addr, next, private);
++ if (err)
++ return err;
++ }
++ if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) {
++ err = walk_pud_range(pgd, addr, next, walk, private);
++ if (err)
++ return err;
++ }
++ }
++ return 0;
++}
+diff -Nurb linux-2.6.22-570/lib/radix-tree.c linux-2.6.22-591/lib/radix-tree.c
+--- linux-2.6.22-570/lib/radix-tree.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/lib/radix-tree.c 2007-12-21 15:36:12.000000000 -0500
+@@ -93,7 +93,8 @@
+ struct radix_tree_node *ret;
+ gfp_t gfp_mask = root_gfp_mask(root);
+
+- ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
++ ret = kmem_cache_alloc(radix_tree_node_cachep,
++ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
+ if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
+ struct radix_tree_preload *rtp;
+
+@@ -137,7 +138,8 @@
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
+ preempt_enable();
+- node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
++ node = kmem_cache_alloc(radix_tree_node_cachep,
++ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
+ if (node == NULL)
+ goto out;
+ preempt_disable();
+diff -Nurb linux-2.6.22-570/mm/filemap.c linux-2.6.22-591/mm/filemap.c
+--- linux-2.6.22-570/mm/filemap.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/filemap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1334,39 +1334,38 @@
+ #define MMAP_LOTSAMISS (100)
+
+ /**
+- * filemap_nopage - read in file data for page fault handling
+- * @area: the applicable vm_area
+- * @address: target address to read in
+- * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
++ * filemap_fault - read in file data for page fault handling
++ * @vma: user vma (not used)
++ * @fdata: the applicable fault_data
+ *
+- * filemap_nopage() is invoked via the vma operations vector for a
++ * filemap_fault() is invoked via the vma operations vector for a
+ * mapped memory region to read in file data during a page fault.
+ *
+ * The goto's are kind of ugly, but this streamlines the normal case of having
+ * it in the page cache, and handles the special cases reasonably without
+ * having a lot of duplicated code.
+ */
+-struct page *filemap_nopage(struct vm_area_struct *area,
+- unsigned long address, int *type)
++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
+ {
+ int error;
+- struct file *file = area->vm_file;
++ struct file *file = vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct file_ra_state *ra = &file->f_ra;
+ struct inode *inode = mapping->host;
+ struct page *page;
+- unsigned long size, pgoff;
+- int did_readaround = 0, majmin = VM_FAULT_MINOR;
++ unsigned long size;
++ int did_readaround = 0;
+
+- pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
++ fdata->type = VM_FAULT_MINOR;
++
++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+
+-retry_all:
+ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+- if (pgoff >= size)
++ if (fdata->pgoff >= size)
+ goto outside_data_content;
+
+ /* If we don't want any read-ahead, don't bother */
+- if (VM_RandomReadHint(area))
++ if (VM_RandomReadHint(vma))
+ goto no_cached_page;
+
+ /*
+@@ -1375,19 +1374,19 @@
+ *
+ * For sequential accesses, we use the generic readahead logic.
+ */
+- if (VM_SequentialReadHint(area))
+- page_cache_readahead(mapping, ra, file, pgoff, 1);
++ if (VM_SequentialReadHint(vma))
++ page_cache_readahead(mapping, ra, file, fdata->pgoff, 1);
+
+ /*
+ * Do we have something in the page cache already?
+ */
+ retry_find:
+- page = find_get_page(mapping, pgoff);
++ page = find_lock_page(mapping, fdata->pgoff);
+ if (!page) {
+ unsigned long ra_pages;
+
+- if (VM_SequentialReadHint(area)) {
+- handle_ra_miss(mapping, ra, pgoff);
++ if (VM_SequentialReadHint(vma)) {
++ handle_ra_miss(mapping, ra, fdata->pgoff);
+ goto no_cached_page;
+ }
+ ra->mmap_miss++;
+@@ -1404,7 +1403,7 @@
+ * check did_readaround, as this is an inner loop.
+ */
+ if (!did_readaround) {
+- majmin = VM_FAULT_MAJOR;
++ fdata->type = VM_FAULT_MAJOR;
+ count_vm_event(PGMAJFAULT);
+ }
+ did_readaround = 1;
+@@ -1412,11 +1411,11 @@
+ if (ra_pages) {
+ pgoff_t start = 0;
+
+- if (pgoff > ra_pages / 2)
+- start = pgoff - ra_pages / 2;
++ if (fdata->pgoff > ra_pages / 2)
++ start = fdata->pgoff - ra_pages / 2;
+ do_page_cache_readahead(mapping, file, start, ra_pages);
+ }
+- page = find_get_page(mapping, pgoff);
++ page = find_lock_page(mapping, fdata->pgoff);
+ if (!page)
+ goto no_cached_page;
+ }
+@@ -1425,19 +1424,23 @@
+ ra->mmap_hit++;
+
+ /*
+- * Ok, found a page in the page cache, now we need to check
+- * that it's up-to-date.
++ * We have a locked page in the page cache, now we need to check
++ * that it's up-to-date. If not, it is going to be due to an error.
+ */
+- if (!PageUptodate(page))
++ if (unlikely(!PageUptodate(page)))
+ goto page_not_uptodate;
+
+-success:
++ /* Must recheck i_size under page lock */
++ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++ if (unlikely(fdata->pgoff >= size)) {
++ unlock_page(page);
++ goto outside_data_content;
++ }
++
+ /*
+ * Found the page and have a reference on it.
+ */
+ mark_page_accessed(page);
+- if (type)
+- *type = majmin;
+ return page;
+
+ outside_data_content:
+@@ -1445,15 +1448,17 @@
+ * An external ptracer can access pages that normally aren't
+ * accessible..
+ */
+- if (area->vm_mm == current->mm)
+- return NOPAGE_SIGBUS;
++ if (vma->vm_mm == current->mm) {
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
++ }
+ /* Fall through to the non-read-ahead case */
+ no_cached_page:
+ /*
+ * We're only likely to ever get here if MADV_RANDOM is in
+ * effect.
+ */
+- error = page_cache_read(file, pgoff);
++ error = page_cache_read(file, fdata->pgoff);
+
+ /*
+ * The page we want has now been added to the page cache.
+@@ -1469,12 +1474,15 @@
+ * to schedule I/O.
+ */
+ if (error == -ENOMEM)
+- return NOPAGE_OOM;
+- return NOPAGE_SIGBUS;
++ fdata->type = VM_FAULT_OOM;
++ else
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
+
+ page_not_uptodate:
++ /* IO error path */
+ if (!did_readaround) {
+- majmin = VM_FAULT_MAJOR;
++ fdata->type = VM_FAULT_MAJOR;
+ count_vm_event(PGMAJFAULT);
+ }
+
+@@ -1484,38 +1492,39 @@
+ * because there really aren't any performance issues here
+ * and we need to check for errors.
+ */
+- lock_page(page);
+-
+- /* Somebody truncated the page on us? */
+- if (!page->mapping) {
+- unlock_page(page);
+- page_cache_release(page);
+- goto retry_all;
+- }
+-
+- /* Somebody else successfully read it in? */
+- if (PageUptodate(page)) {
+- unlock_page(page);
+- goto success;
+- }
+ ClearPageError(page);
+ error = mapping->a_ops->readpage(file, page);
+- if (!error) {
+- wait_on_page_locked(page);
+- if (PageUptodate(page))
+- goto success;
+- } else if (error == AOP_TRUNCATED_PAGE) {
+ page_cache_release(page);
++
++ if (!error || error == AOP_TRUNCATED_PAGE)
+ goto retry_find;
+- }
+
+- /*
+- * Things didn't work out. Return zero to tell the
+- * mm layer so, possibly freeing the page cache page first.
+- */
++ /* Things didn't work out. Return zero to tell the mm layer so. */
+ shrink_readahead_size_eio(file, ra);
+- page_cache_release(page);
+- return NOPAGE_SIGBUS;
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
++}
++EXPORT_SYMBOL(filemap_fault);
++
++/*
++ * filemap_nopage and filemap_populate are legacy exports that are not used
++ * in tree. Scheduled for removal.
++ */
++struct page *filemap_nopage(struct vm_area_struct *area,
++ unsigned long address, int *type)
++{
++ struct page *page;
++ struct fault_data fdata;
++ fdata.address = address;
++ fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
++ + area->vm_pgoff;
++ fdata.flags = 0;
++
++ page = filemap_fault(area, &fdata);
++ if (type)
++ *type = fdata.type;
++
++ return page;
+ }
+ EXPORT_SYMBOL(filemap_nopage);
+
+@@ -1693,8 +1702,7 @@
+ EXPORT_SYMBOL(filemap_populate);
+
+ struct vm_operations_struct generic_file_vm_ops = {
+- .nopage = filemap_nopage,
+- .populate = filemap_populate,
++ .fault = filemap_fault,
+ };
+
+ /* This is used for a general mmap of a disk file */
+@@ -1707,6 +1715,7 @@
+ return -ENOEXEC;
+ file_accessed(file);
+ vma->vm_ops = &generic_file_vm_ops;
++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/mm/filemap_xip.c linux-2.6.22-591/mm/filemap_xip.c
+--- linux-2.6.22-570/mm/filemap_xip.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/filemap_xip.c 2007-12-21 15:36:12.000000000 -0500
+@@ -228,62 +228,67 @@
+ }
+
+ /*
+- * xip_nopage() is invoked via the vma operations vector for a
++ * xip_fault() is invoked via the vma operations vector for a
+ * mapped memory region to read in file data during a page fault.
+ *
+- * This function is derived from filemap_nopage, but used for execute in place
++ * This function is derived from filemap_fault, but used for execute in place
+ */
+-static struct page *
+-xip_file_nopage(struct vm_area_struct * area,
+- unsigned long address,
+- int *type)
++static struct page *xip_file_fault(struct vm_area_struct *area,
++ struct fault_data *fdata)
+ {
+ struct file *file = area->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct page *page;
+- unsigned long size, pgoff, endoff;
++ pgoff_t size;
+
+- pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
+- + area->vm_pgoff;
+- endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
+- + area->vm_pgoff;
++ /* XXX: are VM_FAULT_ codes OK? */
+
+ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+- if (pgoff >= size)
+- return NOPAGE_SIGBUS;
++ if (fdata->pgoff >= size) {
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
++ }
+
+- page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
++ page = mapping->a_ops->get_xip_page(mapping,
++ fdata->pgoff*(PAGE_SIZE/512), 0);
+ if (!IS_ERR(page))
+ goto out;
+- if (PTR_ERR(page) != -ENODATA)
+- return NOPAGE_SIGBUS;
++ if (PTR_ERR(page) != -ENODATA) {
++ fdata->type = VM_FAULT_OOM;
++ return NULL;
++ }
+
+ /* sparse block */
+ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
+ (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
+ (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
+ /* maybe shared writable, allocate new block */
+- page = mapping->a_ops->get_xip_page (mapping,
+- pgoff*(PAGE_SIZE/512), 1);
+- if (IS_ERR(page))
+- return NOPAGE_SIGBUS;
++ page = mapping->a_ops->get_xip_page(mapping,
++ fdata->pgoff*(PAGE_SIZE/512), 1);
++ if (IS_ERR(page)) {
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
++ }
+ /* unmap page at pgoff from all other vmas */
+- __xip_unmap(mapping, pgoff);
++ __xip_unmap(mapping, fdata->pgoff);
+ } else {
+ /* not shared and writable, use xip_sparse_page() */
+ page = xip_sparse_page();
+- if (!page)
+- return NOPAGE_OOM;
++ if (!page) {
++ fdata->type = VM_FAULT_OOM;
++ return NULL;
++ }
+ }
+
+ out:
++ fdata->type = VM_FAULT_MINOR;
+ page_cache_get(page);
+ return page;
+ }
+
+ static struct vm_operations_struct xip_file_vm_ops = {
+- .nopage = xip_file_nopage,
++ .fault = xip_file_fault,
+ };
+
+ int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
+@@ -292,6 +297,7 @@
+
+ file_accessed(file);
+ vma->vm_ops = &xip_file_vm_ops;
++ vma->vm_flags |= VM_CAN_NONLINEAR;
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(xip_file_mmap);
+diff -Nurb linux-2.6.22-570/mm/fremap.c linux-2.6.22-591/mm/fremap.c
+--- linux-2.6.22-570/mm/fremap.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/fremap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -129,6 +129,25 @@
+ return err;
+ }
+
++static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
++ unsigned long addr, unsigned long size, pgoff_t pgoff)
++{
++ int err;
++
++ do {
++ err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
++ if (err)
++ return err;
++
++ size -= PAGE_SIZE;
++ addr += PAGE_SIZE;
++ pgoff++;
++ } while (size);
++
++ return 0;
++
++}
++
+ /***
+ * sys_remap_file_pages - remap arbitrary pages of a shared backing store
+ * file within an existing vma.
+@@ -186,15 +205,27 @@
+ * the single existing vma. vm_private_data is used as a
+ * swapout cursor in a VM_NONLINEAR vma.
+ */
+- if (vma && (vma->vm_flags & VM_SHARED) &&
+- (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) &&
+- vma->vm_ops && vma->vm_ops->populate &&
+- end > start && start >= vma->vm_start &&
+- end <= vma->vm_end) {
++ if (!vma || !(vma->vm_flags & VM_SHARED))
++ goto out;
++
++ if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
++ goto out;
++
++ if ((!vma->vm_ops || !vma->vm_ops->populate) &&
++ !(vma->vm_flags & VM_CAN_NONLINEAR))
++ goto out;
++
++ if (end <= start || start < vma->vm_start || end > vma->vm_end)
++ goto out;
+
+ /* Must set VM_NONLINEAR before any pages are populated. */
+- if (pgoff != linear_page_index(vma, start) &&
+- !(vma->vm_flags & VM_NONLINEAR)) {
++ if (!(vma->vm_flags & VM_NONLINEAR)) {
++ /* Don't need a nonlinear mapping, exit success */
++ if (pgoff == linear_page_index(vma, start)) {
++ err = 0;
++ goto out;
++ }
++
+ if (!has_write_lock) {
+ up_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+@@ -211,8 +242,17 @@
+ spin_unlock(&mapping->i_mmap_lock);
+ }
+
+- err = vma->vm_ops->populate(vma, start, size,
+- vma->vm_page_prot,
++ if (vma->vm_flags & VM_CAN_NONLINEAR) {
++ err = populate_range(mm, vma, start, size, pgoff);
++ if (!err && !(flags & MAP_NONBLOCK)) {
++ if (unlikely(has_write_lock)) {
++ downgrade_write(&mm->mmap_sem);
++ has_write_lock = 0;
++ }
++ make_pages_present(start, start+size);
++ }
++ } else
++ err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot,
+ pgoff, flags & MAP_NONBLOCK);
+
+ /*
+@@ -220,7 +260,8 @@
+ * it after ->populate completes, and that would prevent
+ * downgrading the lock. (Locks can't be upgraded).
+ */
+- }
++
++out:
+ if (likely(!has_write_lock))
+ up_read(&mm->mmap_sem);
+ else
+diff -Nurb linux-2.6.22-570/mm/hugetlb.c linux-2.6.22-591/mm/hugetlb.c
+--- linux-2.6.22-570/mm/hugetlb.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/hugetlb.c 2007-12-21 15:36:12.000000000 -0500
+@@ -28,6 +28,9 @@
+ static struct list_head hugepage_freelists[MAX_NUMNODES];
+ static unsigned int nr_huge_pages_node[MAX_NUMNODES];
+ static unsigned int free_huge_pages_node[MAX_NUMNODES];
++gfp_t htlb_alloc_mask = GFP_HIGHUSER;
++unsigned long hugepages_treat_as_movable;
++
+ /*
+ * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
+ */
+@@ -67,14 +70,15 @@
+ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+ unsigned long address)
+ {
+- int nid = numa_node_id();
++ int nid;
+ struct page *page = NULL;
+- struct zonelist *zonelist = huge_zonelist(vma, address);
++ struct zonelist *zonelist = huge_zonelist(vma, address,
++ htlb_alloc_mask);
+ struct zone **z;
+
+ for (z = zonelist->zones; *z; z++) {
+ nid = zone_to_nid(*z);
+- if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
++ if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
+ !list_empty(&hugepage_freelists[nid]))
+ break;
+ }
+@@ -114,7 +118,7 @@
+ prev_nid = nid;
+ spin_unlock(&nid_lock);
+
+- page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
++ page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
+ HUGETLB_PAGE_ORDER);
+ if (page) {
+ set_compound_page_dtor(page, free_huge_page);
+@@ -264,6 +268,19 @@
+ max_huge_pages = set_max_huge_pages(max_huge_pages);
+ return 0;
+ }
++
++int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
++ struct file *file, void __user *buffer,
++ size_t *length, loff_t *ppos)
++{
++ proc_dointvec(table, write, file, buffer, length, ppos);
++ if (hugepages_treat_as_movable)
++ htlb_alloc_mask = GFP_HIGH_MOVABLE;
++ else
++ htlb_alloc_mask = GFP_HIGHUSER;
++ return 0;
++}
++
+ #endif /* CONFIG_SYSCTL */
+
+ int hugetlb_report_meminfo(char *buf)
+diff -Nurb linux-2.6.22-570/mm/memory.c linux-2.6.22-591/mm/memory.c
+--- linux-2.6.22-570/mm/memory.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/memory.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1052,7 +1052,8 @@
+ if (pages)
+ foll_flags |= FOLL_GET;
+ if (!write && !(vma->vm_flags & VM_LOCKED) &&
+- (!vma->vm_ops || !vma->vm_ops->nopage))
++ (!vma->vm_ops || (!vma->vm_ops->nopage &&
++ !vma->vm_ops->fault)))
+ foll_flags |= FOLL_ANON;
+
+ do {
+@@ -1712,11 +1713,11 @@
+ if (unlikely(anon_vma_prepare(vma)))
+ goto oom;
+ if (old_page == ZERO_PAGE(address)) {
+- new_page = alloc_zeroed_user_highpage(vma, address);
++ new_page = alloc_zeroed_user_highpage_movable(vma, address);
+ if (!new_page)
+ goto oom;
+ } else {
+- new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ if (!new_page)
+ goto oom;
+ cow_user_page(new_page, old_page, address, vma);
+@@ -1828,6 +1829,13 @@
+ unsigned long restart_addr;
+ int need_break;
+
++ /*
++ * files that support invalidating or truncating portions of the
++ * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
++ * have their .nopage function return the page locked.
++ */
++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
++
+ again:
+ restart_addr = vma->vm_truncate_count;
+ if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
+@@ -1956,17 +1964,8 @@
+
+ spin_lock(&mapping->i_mmap_lock);
+
+- /* serialize i_size write against truncate_count write */
+- smp_wmb();
+- /* Protect against page faults, and endless unmapping loops */
++ /* Protect against endless unmapping loops */
+ mapping->truncate_count++;
+- /*
+- * For archs where spin_lock has inclusive semantics like ia64
+- * this smp_mb() will prevent to read pagetable contents
+- * before the truncate_count increment is visible to
+- * other cpus.
+- */
+- smp_mb();
+ if (unlikely(is_restart_addr(mapping->truncate_count))) {
+ if (mapping->truncate_count == 0)
+ reset_vma_truncate_counts(mapping);
+@@ -2005,8 +2004,18 @@
+ if (IS_SWAPFILE(inode))
+ goto out_busy;
+ i_size_write(inode, offset);
++
++ /*
++ * unmap_mapping_range is called twice, first simply for efficiency
++ * so that truncate_inode_pages does fewer single-page unmaps. However
++ * after this first call, and before truncate_inode_pages finishes,
++ * it is possible for private pages to be COWed, which remain after
++ * truncate_inode_pages finishes, hence the second unmap_mapping_range
++ * call must be made for correctness.
++ */
+ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(mapping, offset);
++ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ goto out_truncate;
+
+ do_expand:
+@@ -2046,6 +2055,7 @@
+ down_write(&inode->i_alloc_sem);
+ unmap_mapping_range(mapping, offset, (end - offset), 1);
+ truncate_inode_pages_range(mapping, offset, end);
++ unmap_mapping_range(mapping, offset, (end - offset), 1);
+ inode->i_op->truncate_range(inode, offset, end);
+ up_write(&inode->i_alloc_sem);
+ mutex_unlock(&inode->i_mutex);
+@@ -2208,7 +2218,6 @@
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, address, pte);
+- lazy_mmu_prot_update(pte);
+ unlock:
+ pte_unmap_unlock(page_table, ptl);
+ out:
+@@ -2241,7 +2250,7 @@
+ goto oom;
+ if (unlikely(anon_vma_prepare(vma)))
+ goto oom;
+- page = alloc_zeroed_user_highpage(vma, address);
++ page = alloc_zeroed_user_highpage_movable(vma, address);
+ if (!page)
+ goto oom;
+
+@@ -2284,10 +2293,10 @@
+ }
+
+ /*
+- * do_no_page() tries to create a new page mapping. It aggressively
++ * __do_fault() tries to create a new page mapping. It aggressively
+ * tries to share with existing pages, but makes a separate copy if
+- * the "write_access" parameter is true in order to avoid the next
+- * page fault.
++ * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid
++ * the next page fault.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+@@ -2296,92 +2305,85 @@
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+-static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
++static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+- int write_access)
+-{
++ pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
++ {
+ spinlock_t *ptl;
+- struct page *new_page;
+- struct address_space *mapping = NULL;
++ struct page *page, *faulted_page;
+ pte_t entry;
+- unsigned int sequence = 0;
+- int ret = VM_FAULT_MINOR;
+ int anon = 0;
+ struct page *dirty_page = NULL;
++ struct fault_data fdata;
++
++ fdata.address = address & PAGE_MASK;
++ fdata.pgoff = pgoff;
++ fdata.flags = flags;
+
+ pte_unmap(page_table);
+ BUG_ON(vma->vm_flags & VM_PFNMAP);
+
+- if (!vx_rss_avail(mm, 1))
++ if (likely(vma->vm_ops->fault)) {
++ fdata.type = -1;
++ faulted_page = vma->vm_ops->fault(vma, &fdata);
++ WARN_ON(fdata.type == -1);
++ if (unlikely(!faulted_page))
++ return fdata.type;
++ } else {
++ /* Legacy ->nopage path */
++ fdata.type = VM_FAULT_MINOR;
++ faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK,
++ &fdata.type);
++ /* no page was available -- either SIGBUS or OOM */
++ if (unlikely(faulted_page == NOPAGE_SIGBUS))
++ return VM_FAULT_SIGBUS;
++ else if (unlikely(faulted_page == NOPAGE_OOM))
+ return VM_FAULT_OOM;
++ }
+
+- if (vma->vm_file) {
+- mapping = vma->vm_file->f_mapping;
+- sequence = mapping->truncate_count;
+- smp_rmb(); /* serializes i_size against truncate_count */
+- }
+-retry:
+- new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+- /*
+- * No smp_rmb is needed here as long as there's a full
+- * spin_lock/unlock sequence inside the ->nopage callback
+- * (for the pagecache lookup) that acts as an implicit
+- * smp_mb() and prevents the i_size read to happen
+- * after the next truncate_count read.
++ /*
++ * For consistency in subsequent calls, make the faulted_page always
++ * locked.
+ */
+-
+- /* no page was available -- either SIGBUS, OOM or REFAULT */
+- if (unlikely(new_page == NOPAGE_SIGBUS))
+- return VM_FAULT_SIGBUS;
+- else if (unlikely(new_page == NOPAGE_OOM))
+- return VM_FAULT_OOM;
+- else if (unlikely(new_page == NOPAGE_REFAULT))
+- return VM_FAULT_MINOR;
++ if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
++ lock_page(faulted_page);
++ else
++ BUG_ON(!PageLocked(faulted_page));
+
+ /*
+ * Should we do an early C-O-W break?
+ */
+- if (write_access) {
++ page = faulted_page;
++ if (flags & FAULT_FLAG_WRITE) {
+ if (!(vma->vm_flags & VM_SHARED)) {
+- struct page *page;
+-
+- if (unlikely(anon_vma_prepare(vma)))
+- goto oom;
+- page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+- if (!page)
+- goto oom;
+- copy_user_highpage(page, new_page, address, vma);
+- page_cache_release(new_page);
+- new_page = page;
+ anon = 1;
+-
++ if (unlikely(anon_vma_prepare(vma))) {
++ fdata.type = VM_FAULT_OOM;
++ goto out;
++ }
++ page = alloc_page_vma(GFP_HIGHUSER, vma, address);
++ if (!page) {
++ fdata.type = VM_FAULT_OOM;
++ goto out;
++ }
++ copy_user_highpage(page, faulted_page, address, vma);
+ } else {
+- /* if the page will be shareable, see if the backing
++ /*
++ * If the page will be shareable, see if the backing
+ * address space wants to know that the page is about
+- * to become writable */
++ * to become writable
++ */
+ if (vma->vm_ops->page_mkwrite &&
+- vma->vm_ops->page_mkwrite(vma, new_page) < 0
+- ) {
+- page_cache_release(new_page);
+- return VM_FAULT_SIGBUS;
++ vma->vm_ops->page_mkwrite(vma, page) < 0) {
++ fdata.type = VM_FAULT_SIGBUS;
++ anon = 1; /* no anon but release faulted_page */
++ goto out;
+ }
+ }
++
+ }
+
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+- /*
+- * For a file-backed vma, someone could have truncated or otherwise
+- * invalidated this page. If unmap_mapping_range got called,
+- * retry getting the page.
+- */
+- if (mapping && unlikely(sequence != mapping->truncate_count)) {
+- pte_unmap_unlock(page_table, ptl);
+- page_cache_release(new_page);
+- cond_resched();
+- sequence = mapping->truncate_count;
+- smp_rmb();
+- goto retry;
+- }
+
+ /*
+ * This silly early PAGE_DIRTY setting removes a race
+@@ -2394,43 +2396,68 @@
+ * handle that later.
+ */
+ /* Only go through if we didn't race with anybody else... */
+- if (pte_none(*page_table)) {
+- flush_icache_page(vma, new_page);
+- entry = mk_pte(new_page, vma->vm_page_prot);
+- if (write_access)
++ if (likely(pte_same(*page_table, orig_pte))) {
++ flush_icache_page(vma, page);
++ entry = mk_pte(page, vma->vm_page_prot);
++ if (flags & FAULT_FLAG_WRITE)
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ set_pte_at(mm, address, page_table, entry);
+ if (anon) {
+ inc_mm_counter(mm, anon_rss);
+- lru_cache_add_active(new_page);
+- page_add_new_anon_rmap(new_page, vma, address);
++ lru_cache_add_active(page);
++ page_add_new_anon_rmap(page, vma, address);
+ } else {
+ inc_mm_counter(mm, file_rss);
+- page_add_file_rmap(new_page);
+- if (write_access) {
+- dirty_page = new_page;
++ page_add_file_rmap(page);
++ if (flags & FAULT_FLAG_WRITE) {
++ dirty_page = page;
+ get_page(dirty_page);
+ }
+ }
+- } else {
+- /* One of our sibling threads was faster, back out. */
+- page_cache_release(new_page);
+- goto unlock;
+- }
+
+- /* no need to invalidate: a not-present page shouldn't be cached */
++ /* no need to invalidate: a not-present page won't be cached */
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
+-unlock:
++ } else {
++ if (anon)
++ page_cache_release(page);
++ else
++ anon = 1; /* no anon but release faulted_page */
++ }
++
+ pte_unmap_unlock(page_table, ptl);
+- if (dirty_page) {
++
++out:
++ unlock_page(faulted_page);
++ if (anon)
++ page_cache_release(faulted_page);
++ else if (dirty_page) {
+ set_page_dirty_balance(dirty_page);
+ put_page(dirty_page);
+ }
+- return ret;
+-oom:
+- page_cache_release(new_page);
+- return VM_FAULT_OOM;
++
++ return fdata.type;
++}
++
++static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++ unsigned long address, pte_t *page_table, pmd_t *pmd,
++ int write_access, pte_t orig_pte)
++{
++ pgoff_t pgoff = (((address & PAGE_MASK)
++ - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
++ unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
++
++ return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
++}
++
++static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++ unsigned long address, pte_t *page_table, pmd_t *pmd,
++ int write_access, pgoff_t pgoff, pte_t orig_pte)
++{
++ unsigned int flags = FAULT_FLAG_NONLINEAR |
++ (write_access ? FAULT_FLAG_WRITE : 0);
++
++ return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
+ }
+
+ /*
+@@ -2509,9 +2536,14 @@
+ print_bad_pte(vma, orig_pte, address);
+ return VM_FAULT_OOM;
+ }
+- /* We can then assume vm->vm_ops && vma->vm_ops->populate */
+
+ pgoff = pte_to_pgoff(orig_pte);
++
++ if (vma->vm_ops && vma->vm_ops->fault)
++ return do_nonlinear_fault(mm, vma, address, page_table, pmd,
++ write_access, pgoff, orig_pte);
++
++ /* We can then assume vm->vm_ops && vma->vm_ops->populate */
+ err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+ vma->vm_page_prot, pgoff, 0);
+ if (err == -ENOMEM)
+@@ -2546,10 +2578,9 @@
+ if (!pte_present(entry)) {
+ if (pte_none(entry)) {
+ if (vma->vm_ops) {
+- if (vma->vm_ops->nopage)
+- return do_no_page(mm, vma, address,
+- pte, pmd,
+- write_access);
++ if (vma->vm_ops->fault || vma->vm_ops->nopage)
++ return do_linear_fault(mm, vma, address,
++ pte, pmd, write_access, entry);
+ if (unlikely(vma->vm_ops->nopfn))
+ return do_no_pfn(mm, vma, address, pte,
+ pmd, write_access);
+diff -Nurb linux-2.6.22-570/mm/mempolicy.c linux-2.6.22-591/mm/mempolicy.c
+--- linux-2.6.22-570/mm/mempolicy.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/mempolicy.c 2007-12-21 15:36:12.000000000 -0500
+@@ -594,7 +594,7 @@
+
+ static struct page *new_node_page(struct page *page, unsigned long node, int **x)
+ {
+- return alloc_pages_node(node, GFP_HIGHUSER, 0);
++ return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+ }
+
+ /*
+@@ -710,7 +710,8 @@
+ {
+ struct vm_area_struct *vma = (struct vm_area_struct *)private;
+
+- return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma));
++ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
++ page_address_in_vma(page, vma));
+ }
+ #else
+
+@@ -1202,7 +1203,8 @@
+
+ #ifdef CONFIG_HUGETLBFS
+ /* Return a zonelist suitable for a huge page allocation. */
+-struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
++struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
++ gfp_t gfp_flags)
+ {
+ struct mempolicy *pol = get_vma_policy(current, vma, addr);
+
+@@ -1210,7 +1212,7 @@
+ unsigned nid;
+
+ nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+- return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
++ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
+ }
+ return zonelist_policy(GFP_HIGHUSER, pol);
+ }
+@@ -1309,7 +1311,6 @@
+ * keeps mempolicies cpuset relative after its cpuset moves. See
+ * further kernel/cpuset.c update_nodemask().
+ */
+-void *cpuset_being_rebound;
+
+ /* Slow path of a mempolicy copy */
+ struct mempolicy *__mpol_copy(struct mempolicy *old)
+@@ -1908,4 +1909,3 @@
+ m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
+ return 0;
+ }
+-
+diff -Nurb linux-2.6.22-570/mm/migrate.c linux-2.6.22-591/mm/migrate.c
+--- linux-2.6.22-570/mm/migrate.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/migrate.c 2007-12-21 15:36:12.000000000 -0500
+@@ -761,7 +761,8 @@
+
+ *result = &pm->status;
+
+- return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
++ return alloc_pages_node(pm->node,
++ GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
+ }
+
+ /*
+diff -Nurb linux-2.6.22-570/mm/mmap.c linux-2.6.22-591/mm/mmap.c
+--- linux-2.6.22-570/mm/mmap.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/mmap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -202,6 +202,17 @@
+ }
+
+ /*
++ * Requires inode->i_mapping->i_mmap_lock
++ */
++void __unlink_file_vma(struct vm_area_struct *vma)
++{
++ struct file *file = vma->vm_file;
++ struct address_space *mapping = file->f_mapping;
++
++ __remove_shared_vm_struct(vma, file, mapping);
++}
++
++/*
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
+ */
+@@ -1023,7 +1034,7 @@
+ }
+ }
+
+- error = security_file_mmap(file, reqprot, prot, flags);
++ error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+ if (error)
+ return error;
+
+@@ -1150,12 +1161,8 @@
+ vx_vmlocked_add(mm, len >> PAGE_SHIFT);
+ make_pages_present(addr, addr + len);
+ }
+- if (flags & MAP_POPULATE) {
+- up_write(&mm->mmap_sem);
+- sys_remap_file_pages(addr, len, 0,
+- pgoff, flags & MAP_NONBLOCK);
+- down_write(&mm->mmap_sem);
+- }
++ if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
++ make_pages_present(addr, addr + len);
+ return addr;
+
+ unmap_and_free_vma:
+diff -Nurb linux-2.6.22-570/mm/mremap.c linux-2.6.22-591/mm/mremap.c
+--- linux-2.6.22-570/mm/mremap.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/mremap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -292,6 +292,10 @@
+ if ((addr <= new_addr) && (addr+old_len) > new_addr)
+ goto out;
+
++ ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
++ if (ret)
++ goto out;
++
+ ret = do_munmap(mm, new_addr, new_len);
+ if (ret)
+ goto out;
+@@ -394,8 +398,13 @@
+
+ new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
+ vma->vm_pgoff, map_flags);
++ if (new_addr & ~PAGE_MASK) {
+ ret = new_addr;
+- if (new_addr & ~PAGE_MASK)
++ goto out;
++ }
++
++ ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
++ if (ret)
+ goto out;
+ }
+ ret = move_vma(vma, addr, old_len, new_len, new_addr);
+diff -Nurb linux-2.6.22-570/mm/nommu.c linux-2.6.22-591/mm/nommu.c
+--- linux-2.6.22-570/mm/nommu.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/nommu.c 2007-12-21 15:36:12.000000000 -0500
+@@ -639,7 +639,7 @@
+ }
+
+ /* allow the security API to have its say */
+- ret = security_file_mmap(file, reqprot, prot, flags);
++ ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+ if (ret < 0)
+ return ret;
+
+@@ -1336,8 +1336,7 @@
+ return 0;
+ }
+
+-struct page *filemap_nopage(struct vm_area_struct *area,
+- unsigned long address, int *type)
++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
+ {
+ BUG();
+ return NULL;
+diff -Nurb linux-2.6.22-570/mm/page_alloc.c linux-2.6.22-591/mm/page_alloc.c
+--- linux-2.6.22-570/mm/page_alloc.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/page_alloc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -143,6 +143,42 @@
+ EXPORT_SYMBOL(nr_node_ids);
+ #endif
+
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++static inline int get_pageblock_migratetype(struct page *page)
++{
++ return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
++}
++
++static void set_pageblock_migratetype(struct page *page, int migratetype)
++{
++ set_pageblock_flags_group(page, (unsigned long)migratetype,
++ PB_migrate, PB_migrate_end);
++}
++
++static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
++{
++ WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
++
++ return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
++ ((gfp_flags & __GFP_RECLAIMABLE) != 0);
++}
++
++#else
++static inline int get_pageblock_migratetype(struct page *page)
++{
++ return MIGRATE_UNMOVABLE;
++}
++
++static void set_pageblock_migratetype(struct page *page, int migratetype)
++{
++}
++
++static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
++{
++ return MIGRATE_UNMOVABLE;
++}
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ #ifdef CONFIG_DEBUG_VM
+ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
+ {
+@@ -397,6 +433,7 @@
+ {
+ unsigned long page_idx;
+ int order_size = 1 << order;
++ int migratetype = get_pageblock_migratetype(page);
+
+ if (unlikely(PageCompound(page)))
+ destroy_compound_page(page, order);
+@@ -409,7 +446,6 @@
+ __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
+ while (order < MAX_ORDER-1) {
+ unsigned long combined_idx;
+- struct free_area *area;
+ struct page *buddy;
+
+ buddy = __page_find_buddy(page, page_idx, order);
+@@ -417,8 +453,7 @@
+ break; /* Move the buddy up one level. */
+
+ list_del(&buddy->lru);
+- area = zone->free_area + order;
+- area->nr_free--;
++ zone->free_area[order].nr_free--;
+ rmv_page_order(buddy);
+ combined_idx = __find_combined_index(page_idx, order);
+ page = page + (combined_idx - page_idx);
+@@ -426,7 +461,8 @@
+ order++;
+ }
+ set_page_order(page, order);
+- list_add(&page->lru, &zone->free_area[order].free_list);
++ list_add(&page->lru,
++ &zone->free_area[order].free_list[migratetype]);
+ zone->free_area[order].nr_free++;
+ }
+
+@@ -566,7 +602,8 @@
+ * -- wli
+ */
+ static inline void expand(struct zone *zone, struct page *page,
+- int low, int high, struct free_area *area)
++ int low, int high, struct free_area *area,
++ int migratetype)
+ {
+ unsigned long size = 1 << high;
+
+@@ -575,7 +612,7 @@
+ high--;
+ size >>= 1;
+ VM_BUG_ON(bad_range(zone, &page[size]));
+- list_add(&page[size].lru, &area->free_list);
++ list_add(&page[size].lru, &area->free_list[migratetype]);
+ area->nr_free++;
+ set_page_order(&page[size], high);
+ }
+@@ -628,31 +665,172 @@
+ return 0;
+ }
+
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++/*
++ * This array describes the order lists are fallen back to when
++ * the free lists for the desirable migrate type are depleted
++ */
++static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
++ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE },
++ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE },
++ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
++};
++
++/*
++ * Move the free pages in a range to the free lists of the requested type.
++ * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES
++ * boundary. If alignment is required, use move_freepages_block()
++ */
++int move_freepages(struct zone *zone,
++ struct page *start_page, struct page *end_page,
++ int migratetype)
++{
++ struct page *page;
++ unsigned long order;
++ int blocks_moved = 0;
++
++#ifndef CONFIG_HOLES_IN_ZONE
++ /*
++ * page_zone is not safe to call in this context when
++ * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
++ * anyway as we check zone boundaries in move_freepages_block().
++ * Remove at a later date when no bug reports exist related to
++ * CONFIG_PAGE_GROUP_BY_MOBILITY
++ */
++ BUG_ON(page_zone(start_page) != page_zone(end_page));
++#endif
++
++ for (page = start_page; page <= end_page;) {
++ if (!pfn_valid_within(page_to_pfn(page))) {
++ page++;
++ continue;
++ }
++
++ if (!PageBuddy(page)) {
++ page++;
++ continue;
++ }
++
++ order = page_order(page);
++ list_del(&page->lru);
++ list_add(&page->lru,
++ &zone->free_area[order].free_list[migratetype]);
++ page += 1 << order;
++ blocks_moved++;
++ }
++
++ return blocks_moved;
++}
++
++int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
++{
++ unsigned long start_pfn, end_pfn;
++ struct page *start_page, *end_page;
++
++ start_pfn = page_to_pfn(page);
++ start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1);
++ start_page = pfn_to_page(start_pfn);
++ end_page = start_page + MAX_ORDER_NR_PAGES - 1;
++ end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1;
++
++ /* Do not cross zone boundaries */
++ if (start_pfn < zone->zone_start_pfn)
++ start_page = page;
++ if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
++ return 0;
++
++ return move_freepages(zone, start_page, end_page, migratetype);
++}
++
++/* Remove an element from the buddy allocator from the fallback list */
++static struct page *__rmqueue_fallback(struct zone *zone, int order,
++ int start_migratetype)
++{
++ struct free_area * area;
++ int current_order;
++ struct page *page;
++ int migratetype, i;
++
++ /* Find the largest possible block of pages in the other list */
++ for (current_order = MAX_ORDER-1; current_order >= order;
++ --current_order) {
++ for (i = 0; i < MIGRATE_TYPES - 1; i++) {
++ migratetype = fallbacks[start_migratetype][i];
++
++ area = &(zone->free_area[current_order]);
++ if (list_empty(&area->free_list[migratetype]))
++ continue;
++
++ page = list_entry(area->free_list[migratetype].next,
++ struct page, lru);
++ area->nr_free--;
++
++ /*
++ * If breaking a large block of pages, move all free
++ * pages to the preferred allocation list
++ */
++ if (unlikely(current_order >= MAX_ORDER / 2)) {
++ migratetype = start_migratetype;
++ move_freepages_block(zone, page, migratetype);
++ }
++
++ /* Remove the page from the freelists */
++ list_del(&page->lru);
++ rmv_page_order(page);
++ __mod_zone_page_state(zone, NR_FREE_PAGES,
++ -(1UL << order));
++
++ if (current_order == MAX_ORDER - 1)
++ set_pageblock_migratetype(page,
++ start_migratetype);
++
++ expand(zone, page, order, current_order, area, migratetype);
++ return page;
++ }
++ }
++
++ return NULL;
++}
++#else
++static struct page *__rmqueue_fallback(struct zone *zone, int order,
++ int start_migratetype)
++{
++ return NULL;
++}
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ /*
+ * Do the hard work of removing an element from the buddy allocator.
+ * Call me with the zone->lock already held.
+ */
+-static struct page *__rmqueue(struct zone *zone, unsigned int order)
++static struct page *__rmqueue(struct zone *zone, unsigned int order,
++ int migratetype)
+ {
+ struct free_area * area;
+ unsigned int current_order;
+ struct page *page;
+
++ /* Find a page of the appropriate size in the preferred list */
+ for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+- area = zone->free_area + current_order;
+- if (list_empty(&area->free_list))
++ area = &(zone->free_area[current_order]);
++ if (list_empty(&area->free_list[migratetype]))
+ continue;
+
+- page = list_entry(area->free_list.next, struct page, lru);
++ page = list_entry(area->free_list[migratetype].next,
++ struct page, lru);
+ list_del(&page->lru);
+ rmv_page_order(page);
+ area->nr_free--;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
+- expand(zone, page, order, current_order, area);
+- return page;
++ expand(zone, page, order, current_order, area, migratetype);
++ goto got_page;
+ }
+
+- return NULL;
++ page = __rmqueue_fallback(zone, order, migratetype);
++
++got_page:
++
++ return page;
+ }
+
+ /*
+@@ -661,16 +839,18 @@
+ * Returns the number of new pages which were placed at *list.
+ */
+ static int rmqueue_bulk(struct zone *zone, unsigned int order,
+- unsigned long count, struct list_head *list)
++ unsigned long count, struct list_head *list,
++ int migratetype)
+ {
+ int i;
+
+ spin_lock(&zone->lock);
+ for (i = 0; i < count; ++i) {
+- struct page *page = __rmqueue(zone, order);
++ struct page *page = __rmqueue(zone, order, migratetype);
+ if (unlikely(page == NULL))
+ break;
+- list_add_tail(&page->lru, list);
++ list_add(&page->lru, list);
++ set_page_private(page, migratetype);
+ }
+ spin_unlock(&zone->lock);
+ return i;
+@@ -732,7 +912,7 @@
+ {
+ unsigned long pfn, max_zone_pfn;
+ unsigned long flags;
+- int order;
++ int order, t;
+ struct list_head *curr;
+
+ if (!zone->spanned_pages)
+@@ -749,15 +929,15 @@
+ swsusp_unset_page_free(page);
+ }
+
+- for (order = MAX_ORDER - 1; order >= 0; --order)
+- list_for_each(curr, &zone->free_area[order].free_list) {
++ for_each_migratetype_order(order, t) {
++ list_for_each(curr, &zone->free_area[order].free_list[t]) {
+ unsigned long i;
+
+ pfn = page_to_pfn(list_entry(curr, struct page, lru));
+ for (i = 0; i < (1UL << order); i++)
+ swsusp_set_page_free(pfn_to_page(pfn + i));
+ }
+-
++ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+
+@@ -797,6 +977,7 @@
+ local_irq_save(flags);
+ __count_vm_event(PGFREE);
+ list_add(&page->lru, &pcp->list);
++ set_page_private(page, get_pageblock_migratetype(page));
+ pcp->count++;
+ if (pcp->count >= pcp->high) {
+ free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+@@ -846,6 +1027,7 @@
+ struct page *page;
+ int cold = !!(gfp_flags & __GFP_COLD);
+ int cpu;
++ int migratetype = gfpflags_to_migratetype(gfp_flags);
+
+ again:
+ cpu = get_cpu();
+@@ -856,16 +1038,32 @@
+ local_irq_save(flags);
+ if (!pcp->count) {
+ pcp->count = rmqueue_bulk(zone, 0,
+- pcp->batch, &pcp->list);
++ pcp->batch, &pcp->list, migratetype);
+ if (unlikely(!pcp->count))
+ goto failed;
+ }
++
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++ /* Find a page of the appropriate migrate type */
++ list_for_each_entry(page, &pcp->list, lru)
++ if (page_private(page) == migratetype)
++ break;
++
++ /* Allocate more to the pcp list if necessary */
++ if (unlikely(&page->lru == &pcp->list)) {
++ pcp->count += rmqueue_bulk(zone, 0,
++ pcp->batch, &pcp->list, migratetype);
++ page = list_entry(pcp->list.next, struct page, lru);
++ }
++#else
+ page = list_entry(pcp->list.next, struct page, lru);
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ list_del(&page->lru);
+ pcp->count--;
+ } else {
+ spin_lock_irqsave(&zone->lock, flags);
+- page = __rmqueue(zone, order);
++ page = __rmqueue(zone, order, migratetype);
+ spin_unlock(&zone->lock);
+ if (!page)
+ goto failed;
+@@ -1952,6 +2150,16 @@
+ init_page_count(page);
+ reset_page_mapcount(page);
+ SetPageReserved(page);
++
++ /*
++ * Mark the block movable so that blocks are reserved for
++ * movable at startup. This will force kernel allocations
++ * to reserve their blocks rather than leaking throughout
++ * the address space during boot when many long-lived
++ * kernel allocations are made
++ */
++ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
++
+ INIT_LIST_HEAD(&page->lru);
+ #ifdef WANT_PAGE_VIRTUAL
+ /* The shift won't overflow because ZONE_NORMAL is below 4G. */
+@@ -1964,9 +2172,9 @@
+ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
+ unsigned long size)
+ {
+- int order;
+- for (order = 0; order < MAX_ORDER ; order++) {
+- INIT_LIST_HEAD(&zone->free_area[order].free_list);
++ int order, t;
++ for_each_migratetype_order(order, t) {
++ INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
+ zone->free_area[order].nr_free = 0;
+ }
+ }
+@@ -2584,6 +2792,41 @@
+ realtotalpages);
+ }
+
++#ifndef CONFIG_SPARSEMEM
++/*
++ * Calculate the size of the zone->blockflags rounded to an unsigned long
++ * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up
++ * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally
++ * round what is now in bits to nearest long in bits, then return it in
++ * bytes.
++ */
++static unsigned long __init usemap_size(unsigned long zonesize)
++{
++ unsigned long usemapsize;
++
++ usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES);
++ usemapsize = usemapsize >> (MAX_ORDER-1);
++ usemapsize *= NR_PAGEBLOCK_BITS;
++ usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
++
++ return usemapsize / 8;
++}
++
++static void __init setup_usemap(struct pglist_data *pgdat,
++ struct zone *zone, unsigned long zonesize)
++{
++ unsigned long usemapsize = usemap_size(zonesize);
++ zone->pageblock_flags = NULL;
++ if (usemapsize) {
++ zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
++ memset(zone->pageblock_flags, 0, usemapsize);
++ }
++}
++#else
++static void inline setup_usemap(struct pglist_data *pgdat,
++ struct zone *zone, unsigned long zonesize) {}
++#endif /* CONFIG_SPARSEMEM */
++
+ /*
+ * Set up the zone data structures:
+ * - mark all pages reserved
+@@ -2664,6 +2907,7 @@
+ if (!size)
+ continue;
+
++ setup_usemap(pgdat, zone, size);
+ ret = init_currently_empty_zone(zone, zone_start_pfn,
+ size, MEMMAP_EARLY);
+ BUG_ON(ret);
+@@ -3363,6 +3607,21 @@
+ for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++)
+ ;
+ table = (void*) __get_free_pages(GFP_ATOMIC, order);
++ /*
++ * If bucketsize is not a power-of-two, we may free
++ * some pages at the end of hash table.
++ */
++ if (table) {
++ unsigned long alloc_end = (unsigned long)table +
++ (PAGE_SIZE << order);
++ unsigned long used = (unsigned long)table +
++ PAGE_ALIGN(size);
++ split_page(virt_to_page(table), order);
++ while (used < alloc_end) {
++ free_page(used);
++ used += PAGE_SIZE;
++ }
++ }
+ }
+ } while (!table && size > PAGE_SIZE && --log2qty);
+
+@@ -3396,4 +3655,79 @@
+ EXPORT_SYMBOL(page_to_pfn);
+ #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
+
++/* Return a pointer to the bitmap storing bits affecting a block of pages */
++static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
++ unsigned long pfn)
++{
++#ifdef CONFIG_SPARSEMEM
++ return __pfn_to_section(pfn)->pageblock_flags;
++#else
++ return zone->pageblock_flags;
++#endif /* CONFIG_SPARSEMEM */
++}
+
++static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
++{
++#ifdef CONFIG_SPARSEMEM
++ pfn &= (PAGES_PER_SECTION-1);
++ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
++#else
++ pfn = pfn - zone->zone_start_pfn;
++ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
++#endif /* CONFIG_SPARSEMEM */
++}
++
++/**
++ * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages
++ * @page: The page within the block of interest
++ * @start_bitidx: The first bit of interest to retrieve
++ * @end_bitidx: The last bit of interest
++ * returns pageblock_bits flags
++ */
++unsigned long get_pageblock_flags_group(struct page *page,
++ int start_bitidx, int end_bitidx)
++{
++ struct zone *zone;
++ unsigned long *bitmap;
++ unsigned long pfn, bitidx;
++ unsigned long flags = 0;
++ unsigned long value = 1;
++
++ zone = page_zone(page);
++ pfn = page_to_pfn(page);
++ bitmap = get_pageblock_bitmap(zone, pfn);
++ bitidx = pfn_to_bitidx(zone, pfn);
++
++ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
++ if (test_bit(bitidx + start_bitidx, bitmap))
++ flags |= value;
++
++ return flags;
++}
++
++/**
++ * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages
++ * @page: The page within the block of interest
++ * @start_bitidx: The first bit of interest
++ * @end_bitidx: The last bit of interest
++ * @flags: The flags to set
++ */
++void set_pageblock_flags_group(struct page *page, unsigned long flags,
++ int start_bitidx, int end_bitidx)
++{
++ struct zone *zone;
++ unsigned long *bitmap;
++ unsigned long pfn, bitidx;
++ unsigned long value = 1;
++
++ zone = page_zone(page);
++ pfn = page_to_pfn(page);
++ bitmap = get_pageblock_bitmap(zone, pfn);
++ bitidx = pfn_to_bitidx(zone, pfn);
++
++ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
++ if (flags & value)
++ __set_bit(bitidx + start_bitidx, bitmap);
++ else
++ __clear_bit(bitidx + start_bitidx, bitmap);
++}
+diff -Nurb linux-2.6.22-570/mm/pdflush.c linux-2.6.22-591/mm/pdflush.c
+--- linux-2.6.22-570/mm/pdflush.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/pdflush.c 2007-12-21 15:36:12.000000000 -0500
+@@ -92,6 +92,7 @@
+ static int __pdflush(struct pdflush_work *my_work)
+ {
+ current->flags |= PF_FLUSHER | PF_SWAPWRITE;
++ set_freezable();
+ my_work->fn = NULL;
+ my_work->who = current;
+ INIT_LIST_HEAD(&my_work->list);
+diff -Nurb linux-2.6.22-570/mm/rmap.c linux-2.6.22-591/mm/rmap.c
+--- linux-2.6.22-570/mm/rmap.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/rmap.c 2007-12-21 15:36:12.000000000 -0500
+@@ -622,8 +622,10 @@
+ printk (KERN_EMERG " page->count = %x\n", page_count(page));
+ printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
+ print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
+- if (vma->vm_ops)
++ if (vma->vm_ops) {
+ print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage);
++ print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
++ }
+ if (vma->vm_file && vma->vm_file->f_op)
+ print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
+ BUG();
+diff -Nurb linux-2.6.22-570/mm/shmem.c linux-2.6.22-591/mm/shmem.c
+--- linux-2.6.22-570/mm/shmem.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/shmem.c 2007-12-21 15:36:12.000000000 -0500
+@@ -81,6 +81,7 @@
+ SGP_READ, /* don't exceed i_size, don't allocate page */
+ SGP_CACHE, /* don't exceed i_size, may allocate page */
+ SGP_WRITE, /* may exceed i_size, may allocate page */
++ SGP_FAULT, /* same as SGP_CACHE, return with page locked */
+ };
+
+ static int shmem_getpage(struct inode *inode, unsigned long idx,
+@@ -92,8 +93,11 @@
+ * The above definition of ENTRIES_PER_PAGE, and the use of
+ * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
+ * might be reconsidered if it ever diverges from PAGE_SIZE.
++ *
++ * Mobility flags are masked out as swap vectors cannot move
+ */
+- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
++ return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
++ PAGE_CACHE_SHIFT-PAGE_SHIFT);
+ }
+
+ static inline void shmem_dir_free(struct page *page)
+@@ -371,7 +375,7 @@
+ }
+
+ spin_unlock(&info->lock);
+- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
++ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
+ if (page)
+ set_page_private(page, 0);
+ spin_lock(&info->lock);
+@@ -1110,6 +1114,10 @@
+
+ if (idx >= SHMEM_MAX_INDEX)
+ return -EFBIG;
++
++ if (type)
++ *type = VM_FAULT_MINOR;
++
+ /*
+ * Normally, filepage is NULL on entry, and either found
+ * uptodate immediately, or allocated and zeroed, or read
+@@ -1299,8 +1307,10 @@
+ }
+ done:
+ if (*pagep != filepage) {
+- unlock_page(filepage);
+ *pagep = filepage;
++ if (sgp != SGP_FAULT)
++ unlock_page(filepage);
++
+ }
+ return 0;
+
+@@ -1312,72 +1322,29 @@
+ return error;
+ }
+
+-static struct page *shmem_nopage(struct vm_area_struct *vma,
+- unsigned long address, int *type)
++static struct page *shmem_fault(struct vm_area_struct *vma,
++ struct fault_data *fdata)
+ {
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct page *page = NULL;
+- unsigned long idx;
+ int error;
+
+- idx = (address - vma->vm_start) >> PAGE_SHIFT;
+- idx += vma->vm_pgoff;
+- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
+- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+- return NOPAGE_SIGBUS;
+-
+- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+- if (error)
+- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
+-
+- mark_page_accessed(page);
+- return page;
+-}
+-
+-static int shmem_populate(struct vm_area_struct *vma,
+- unsigned long addr, unsigned long len,
+- pgprot_t prot, unsigned long pgoff, int nonblock)
+-{
+- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+- struct mm_struct *mm = vma->vm_mm;
+- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
+- unsigned long size;
+-
+- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
+- return -EINVAL;
++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+
+- while ((long) len > 0) {
+- struct page *page = NULL;
+- int err;
+- /*
+- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
+- */
+- err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
+- if (err)
+- return err;
+- /* Page may still be null, but only if nonblock was set. */
+- if (page) {
+- mark_page_accessed(page);
+- err = install_page(mm, vma, addr, page, prot);
+- if (err) {
+- page_cache_release(page);
+- return err;
+- }
+- } else if (vma->vm_flags & VM_NONLINEAR) {
+- /* No page was found just because we can't read it in
+- * now (being here implies nonblock != 0), but the page
+- * may exist, so set the PTE to fault it in later. */
+- err = install_file_pte(mm, vma, addr, pgoff, prot);
+- if (err)
+- return err;
++ if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
++ fdata->type = VM_FAULT_SIGBUS;
++ return NULL;
+ }
+
+- len -= PAGE_SIZE;
+- addr += PAGE_SIZE;
+- pgoff++;
++ error = shmem_getpage(inode, fdata->pgoff, &page,
++ SGP_FAULT, &fdata->type);
++ if (error) {
++ fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
++ return NULL;
+ }
+- return 0;
++
++ mark_page_accessed(page);
++ return page;
+ }
+
+ #ifdef CONFIG_NUMA
+@@ -1424,6 +1391,7 @@
+ {
+ file_accessed(file);
+ vma->vm_ops = &shmem_vm_ops;
++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ return 0;
+ }
+
+@@ -2477,8 +2445,7 @@
+ };
+
+ static struct vm_operations_struct shmem_vm_ops = {
+- .nopage = shmem_nopage,
+- .populate = shmem_populate,
++ .fault = shmem_fault,
+ #ifdef CONFIG_NUMA
+ .set_policy = shmem_set_policy,
+ .get_policy = shmem_get_policy,
+@@ -2614,5 +2581,6 @@
+ fput(vma->vm_file);
+ vma->vm_file = file;
+ vma->vm_ops = &shmem_vm_ops;
++ vma->vm_flags |= VM_CAN_INVALIDATE;
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/mm/shmem.c.orig linux-2.6.22-591/mm/shmem.c.orig
+--- linux-2.6.22-570/mm/shmem.c.orig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/mm/shmem.c.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,2619 +0,0 @@
+-/*
+- * Resizable virtual memory filesystem for Linux.
+- *
+- * Copyright (C) 2000 Linus Torvalds.
+- * 2000 Transmeta Corp.
+- * 2000-2001 Christoph Rohland
+- * 2000-2001 SAP AG
+- * 2002 Red Hat Inc.
+- * Copyright (C) 2002-2005 Hugh Dickins.
+- * Copyright (C) 2002-2005 VERITAS Software Corporation.
+- * Copyright (C) 2004 Andi Kleen, SuSE Labs
+- *
+- * Extended attribute support for tmpfs:
+- * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+- *
+- * This file is released under the GPL.
+- */
+-
+-/*
+- * This virtual memory filesystem is heavily based on the ramfs. It
+- * extends ramfs by the ability to use swap and honor resource limits
+- * which makes it a completely usable filesystem.
+- */
+-
+-#include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/fs.h>
+-#include <linux/xattr.h>
+-#include <linux/generic_acl.h>
+-#include <linux/mm.h>
+-#include <linux/mman.h>
+-#include <linux/file.h>
+-#include <linux/swap.h>
+-#include <linux/pagemap.h>
+-#include <linux/string.h>
+-#include <linux/slab.h>
+-#include <linux/backing-dev.h>
+-#include <linux/shmem_fs.h>
+-#include <linux/mount.h>
+-#include <linux/writeback.h>
+-#include <linux/vfs.h>
+-#include <linux/blkdev.h>
+-#include <linux/security.h>
+-#include <linux/swapops.h>
+-#include <linux/mempolicy.h>
+-#include <linux/namei.h>
+-#include <linux/ctype.h>
+-#include <linux/migrate.h>
+-#include <linux/highmem.h>
+-#include <linux/backing-dev.h>
+-
+-#include <asm/uaccess.h>
+-#include <asm/div64.h>
+-#include <asm/pgtable.h>
+-
+-/* This magic number is used in glibc for posix shared memory */
+-#define TMPFS_MAGIC 0x01021994
+-
+-#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
+-#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+-#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
+-
+-#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+-#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
+-
+-#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
+-
+-/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
+-#define SHMEM_PAGEIN VM_READ
+-#define SHMEM_TRUNCATE VM_WRITE
+-
+-/* Definition to limit shmem_truncate's steps between cond_rescheds */
+-#define LATENCY_LIMIT 64
+-
+-/* Pretend that each entry is of this size in directory's i_size */
+-#define BOGO_DIRENT_SIZE 20
+-
+-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
+-enum sgp_type {
+- SGP_QUICK, /* don't try more than file page cache lookup */
+- SGP_READ, /* don't exceed i_size, don't allocate page */
+- SGP_CACHE, /* don't exceed i_size, may allocate page */
+- SGP_WRITE, /* may exceed i_size, may allocate page */
+-};
+-
+-static int shmem_getpage(struct inode *inode, unsigned long idx,
+- struct page **pagep, enum sgp_type sgp, int *type);
+-
+-static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
+-{
+- /*
+- * The above definition of ENTRIES_PER_PAGE, and the use of
+- * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
+- * might be reconsidered if it ever diverges from PAGE_SIZE.
+- */
+- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+-}
+-
+-static inline void shmem_dir_free(struct page *page)
+-{
+- __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+-}
+-
+-static struct page **shmem_dir_map(struct page *page)
+-{
+- return (struct page **)kmap_atomic(page, KM_USER0);
+-}
+-
+-static inline void shmem_dir_unmap(struct page **dir)
+-{
+- kunmap_atomic(dir, KM_USER0);
+-}
+-
+-static swp_entry_t *shmem_swp_map(struct page *page)
+-{
+- return (swp_entry_t *)kmap_atomic(page, KM_USER1);
+-}
+-
+-static inline void shmem_swp_balance_unmap(void)
+-{
+- /*
+- * When passing a pointer to an i_direct entry, to code which
+- * also handles indirect entries and so will shmem_swp_unmap,
+- * we must arrange for the preempt count to remain in balance.
+- * What kmap_atomic of a lowmem page does depends on config
+- * and architecture, so pretend to kmap_atomic some lowmem page.
+- */
+- (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
+-}
+-
+-static inline void shmem_swp_unmap(swp_entry_t *entry)
+-{
+- kunmap_atomic(entry, KM_USER1);
+-}
+-
+-static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
+-{
+- return sb->s_fs_info;
+-}
+-
+-/*
+- * shmem_file_setup pre-accounts the whole fixed size of a VM object,
+- * for shared memory and for shared anonymous (/dev/zero) mappings
+- * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
+- * consistent with the pre-accounting of private mappings ...
+- */
+-static inline int shmem_acct_size(unsigned long flags, loff_t size)
+-{
+- return (flags & VM_ACCOUNT)?
+- security_vm_enough_memory(VM_ACCT(size)): 0;
+-}
+-
+-static inline void shmem_unacct_size(unsigned long flags, loff_t size)
+-{
+- if (flags & VM_ACCOUNT)
+- vm_unacct_memory(VM_ACCT(size));
+-}
+-
+-/*
+- * ... whereas tmpfs objects are accounted incrementally as
+- * pages are allocated, in order to allow huge sparse files.
+- * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
+- * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
+- */
+-static inline int shmem_acct_block(unsigned long flags)
+-{
+- return (flags & VM_ACCOUNT)?
+- 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
+-}
+-
+-static inline void shmem_unacct_blocks(unsigned long flags, long pages)
+-{
+- if (!(flags & VM_ACCOUNT))
+- vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
+-}
+-
+-static const struct super_operations shmem_ops;
+-static const struct address_space_operations shmem_aops;
+-static const struct file_operations shmem_file_operations;
+-static const struct inode_operations shmem_inode_operations;
+-static const struct inode_operations shmem_dir_inode_operations;
+-static const struct inode_operations shmem_special_inode_operations;
+-static struct vm_operations_struct shmem_vm_ops;
+-
+-static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
+- .ra_pages = 0, /* No readahead */
+- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+- .unplug_io_fn = default_unplug_io_fn,
+-};
+-
+-static LIST_HEAD(shmem_swaplist);
+-static DEFINE_SPINLOCK(shmem_swaplist_lock);
+-
+-static void shmem_free_blocks(struct inode *inode, long pages)
+-{
+- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+- if (sbinfo->max_blocks) {
+- spin_lock(&sbinfo->stat_lock);
+- sbinfo->free_blocks += pages;
+- inode->i_blocks -= pages*BLOCKS_PER_PAGE;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-}
+-
+-/*
+- * shmem_recalc_inode - recalculate the size of an inode
+- *
+- * @inode: inode to recalc
+- *
+- * We have to calculate the free blocks since the mm can drop
+- * undirtied hole pages behind our back.
+- *
+- * But normally info->alloced == inode->i_mapping->nrpages + info->swapped
+- * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
+- *
+- * It has to be called with the spinlock held.
+- */
+-static void shmem_recalc_inode(struct inode *inode)
+-{
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- long freed;
+-
+- freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
+- if (freed > 0) {
+- info->alloced -= freed;
+- shmem_unacct_blocks(info->flags, freed);
+- shmem_free_blocks(inode, freed);
+- }
+-}
+-
+-/*
+- * shmem_swp_entry - find the swap vector position in the info structure
+- *
+- * @info: info structure for the inode
+- * @index: index of the page to find
+- * @page: optional page to add to the structure. Has to be preset to
+- * all zeros
+- *
+- * If there is no space allocated yet it will return NULL when
+- * page is NULL, else it will use the page for the needed block,
+- * setting it to NULL on return to indicate that it has been used.
+- *
+- * The swap vector is organized the following way:
+- *
+- * There are SHMEM_NR_DIRECT entries directly stored in the
+- * shmem_inode_info structure. So small files do not need an addional
+- * allocation.
+- *
+- * For pages with index > SHMEM_NR_DIRECT there is the pointer
+- * i_indirect which points to a page which holds in the first half
+- * doubly indirect blocks, in the second half triple indirect blocks:
+- *
+- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
+- * following layout (for SHMEM_NR_DIRECT == 16):
+- *
+- * i_indirect -> dir --> 16-19
+- * | +-> 20-23
+- * |
+- * +-->dir2 --> 24-27
+- * | +-> 28-31
+- * | +-> 32-35
+- * | +-> 36-39
+- * |
+- * +-->dir3 --> 40-43
+- * +-> 44-47
+- * +-> 48-51
+- * +-> 52-55
+- */
+-static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
+-{
+- unsigned long offset;
+- struct page **dir;
+- struct page *subdir;
+-
+- if (index < SHMEM_NR_DIRECT) {
+- shmem_swp_balance_unmap();
+- return info->i_direct+index;
+- }
+- if (!info->i_indirect) {
+- if (page) {
+- info->i_indirect = *page;
+- *page = NULL;
+- }
+- return NULL; /* need another page */
+- }
+-
+- index -= SHMEM_NR_DIRECT;
+- offset = index % ENTRIES_PER_PAGE;
+- index /= ENTRIES_PER_PAGE;
+- dir = shmem_dir_map(info->i_indirect);
+-
+- if (index >= ENTRIES_PER_PAGE/2) {
+- index -= ENTRIES_PER_PAGE/2;
+- dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
+- index %= ENTRIES_PER_PAGE;
+- subdir = *dir;
+- if (!subdir) {
+- if (page) {
+- *dir = *page;
+- *page = NULL;
+- }
+- shmem_dir_unmap(dir);
+- return NULL; /* need another page */
+- }
+- shmem_dir_unmap(dir);
+- dir = shmem_dir_map(subdir);
+- }
+-
+- dir += index;
+- subdir = *dir;
+- if (!subdir) {
+- if (!page || !(subdir = *page)) {
+- shmem_dir_unmap(dir);
+- return NULL; /* need a page */
+- }
+- *dir = subdir;
+- *page = NULL;
+- }
+- shmem_dir_unmap(dir);
+- return shmem_swp_map(subdir) + offset;
+-}
+-
+-static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
+-{
+- long incdec = value? 1: -1;
+-
+- entry->val = value;
+- info->swapped += incdec;
+- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
+- struct page *page = kmap_atomic_to_page(entry);
+- set_page_private(page, page_private(page) + incdec);
+- }
+-}
+-
+-/*
+- * shmem_swp_alloc - get the position of the swap entry for the page.
+- * If it does not exist allocate the entry.
+- *
+- * @info: info structure for the inode
+- * @index: index of the page to find
+- * @sgp: check and recheck i_size? skip allocation?
+- */
+-static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
+-{
+- struct inode *inode = &info->vfs_inode;
+- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+- struct page *page = NULL;
+- swp_entry_t *entry;
+-
+- if (sgp != SGP_WRITE &&
+- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+- return ERR_PTR(-EINVAL);
+-
+- while (!(entry = shmem_swp_entry(info, index, &page))) {
+- if (sgp == SGP_READ)
+- return shmem_swp_map(ZERO_PAGE(0));
+- /*
+- * Test free_blocks against 1 not 0, since we have 1 data
+- * page (and perhaps indirect index pages) yet to allocate:
+- * a waste to allocate index if we cannot allocate data.
+- */
+- if (sbinfo->max_blocks) {
+- spin_lock(&sbinfo->stat_lock);
+- if (sbinfo->free_blocks <= 1) {
+- spin_unlock(&sbinfo->stat_lock);
+- return ERR_PTR(-ENOSPC);
+- }
+- sbinfo->free_blocks--;
+- inode->i_blocks += BLOCKS_PER_PAGE;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-
+- spin_unlock(&info->lock);
+- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+- if (page)
+- set_page_private(page, 0);
+- spin_lock(&info->lock);
+-
+- if (!page) {
+- shmem_free_blocks(inode, 1);
+- return ERR_PTR(-ENOMEM);
+- }
+- if (sgp != SGP_WRITE &&
+- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+- entry = ERR_PTR(-EINVAL);
+- break;
+- }
+- if (info->next_index <= index)
+- info->next_index = index + 1;
+- }
+- if (page) {
+- /* another task gave its page, or truncated the file */
+- shmem_free_blocks(inode, 1);
+- shmem_dir_free(page);
+- }
+- if (info->next_index <= index && !IS_ERR(entry))
+- info->next_index = index + 1;
+- return entry;
+-}
+-
+-/*
+- * shmem_free_swp - free some swap entries in a directory
+- *
+- * @dir: pointer to the directory
+- * @edir: pointer after last entry of the directory
+- * @punch_lock: pointer to spinlock when needed for the holepunch case
+- */
+-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
+- spinlock_t *punch_lock)
+-{
+- spinlock_t *punch_unlock = NULL;
+- swp_entry_t *ptr;
+- int freed = 0;
+-
+- for (ptr = dir; ptr < edir; ptr++) {
+- if (ptr->val) {
+- if (unlikely(punch_lock)) {
+- punch_unlock = punch_lock;
+- punch_lock = NULL;
+- spin_lock(punch_unlock);
+- if (!ptr->val)
+- continue;
+- }
+- free_swap_and_cache(*ptr);
+- *ptr = (swp_entry_t){0};
+- freed++;
+- }
+- }
+- if (punch_unlock)
+- spin_unlock(punch_unlock);
+- return freed;
+-}
+-
+-static int shmem_map_and_free_swp(struct page *subdir, int offset,
+- int limit, struct page ***dir, spinlock_t *punch_lock)
+-{
+- swp_entry_t *ptr;
+- int freed = 0;
+-
+- ptr = shmem_swp_map(subdir);
+- for (; offset < limit; offset += LATENCY_LIMIT) {
+- int size = limit - offset;
+- if (size > LATENCY_LIMIT)
+- size = LATENCY_LIMIT;
+- freed += shmem_free_swp(ptr+offset, ptr+offset+size,
+- punch_lock);
+- if (need_resched()) {
+- shmem_swp_unmap(ptr);
+- if (*dir) {
+- shmem_dir_unmap(*dir);
+- *dir = NULL;
+- }
+- cond_resched();
+- ptr = shmem_swp_map(subdir);
+- }
+- }
+- shmem_swp_unmap(ptr);
+- return freed;
+-}
+-
+-static void shmem_free_pages(struct list_head *next)
+-{
+- struct page *page;
+- int freed = 0;
+-
+- do {
+- page = container_of(next, struct page, lru);
+- next = next->next;
+- shmem_dir_free(page);
+- freed++;
+- if (freed >= LATENCY_LIMIT) {
+- cond_resched();
+- freed = 0;
+- }
+- } while (next);
+-}
+-
+-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+-{
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- unsigned long idx;
+- unsigned long size;
+- unsigned long limit;
+- unsigned long stage;
+- unsigned long diroff;
+- struct page **dir;
+- struct page *topdir;
+- struct page *middir;
+- struct page *subdir;
+- swp_entry_t *ptr;
+- LIST_HEAD(pages_to_free);
+- long nr_pages_to_free = 0;
+- long nr_swaps_freed = 0;
+- int offset;
+- int freed;
+- int punch_hole;
+- spinlock_t *needs_lock;
+- spinlock_t *punch_lock;
+- unsigned long upper_limit;
+-
+- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+- idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+- if (idx >= info->next_index)
+- return;
+-
+- spin_lock(&info->lock);
+- info->flags |= SHMEM_TRUNCATE;
+- if (likely(end == (loff_t) -1)) {
+- limit = info->next_index;
+- upper_limit = SHMEM_MAX_INDEX;
+- info->next_index = idx;
+- needs_lock = NULL;
+- punch_hole = 0;
+- } else {
+- if (end + 1 >= inode->i_size) { /* we may free a little more */
+- limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
+- PAGE_CACHE_SHIFT;
+- upper_limit = SHMEM_MAX_INDEX;
+- } else {
+- limit = (end + 1) >> PAGE_CACHE_SHIFT;
+- upper_limit = limit;
+- }
+- needs_lock = &info->lock;
+- punch_hole = 1;
+- }
+-
+- topdir = info->i_indirect;
+- if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
+- info->i_indirect = NULL;
+- nr_pages_to_free++;
+- list_add(&topdir->lru, &pages_to_free);
+- }
+- spin_unlock(&info->lock);
+-
+- if (info->swapped && idx < SHMEM_NR_DIRECT) {
+- ptr = info->i_direct;
+- size = limit;
+- if (size > SHMEM_NR_DIRECT)
+- size = SHMEM_NR_DIRECT;
+- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
+- }
+-
+- /*
+- * If there are no indirect blocks or we are punching a hole
+- * below indirect blocks, nothing to be done.
+- */
+- if (!topdir || limit <= SHMEM_NR_DIRECT)
+- goto done2;
+-
+- /*
+- * The truncation case has already dropped info->lock, and we're safe
+- * because i_size and next_index have already been lowered, preventing
+- * access beyond. But in the punch_hole case, we still need to take
+- * the lock when updating the swap directory, because there might be
+- * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
+- * shmem_writepage. However, whenever we find we can remove a whole
+- * directory page (not at the misaligned start or end of the range),
+- * we first NULLify its pointer in the level above, and then have no
+- * need to take the lock when updating its contents: needs_lock and
+- * punch_lock (either pointing to info->lock or NULL) manage this.
+- */
+-
+- upper_limit -= SHMEM_NR_DIRECT;
+- limit -= SHMEM_NR_DIRECT;
+- idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
+- offset = idx % ENTRIES_PER_PAGE;
+- idx -= offset;
+-
+- dir = shmem_dir_map(topdir);
+- stage = ENTRIES_PER_PAGEPAGE/2;
+- if (idx < ENTRIES_PER_PAGEPAGE/2) {
+- middir = topdir;
+- diroff = idx/ENTRIES_PER_PAGE;
+- } else {
+- dir += ENTRIES_PER_PAGE/2;
+- dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
+- while (stage <= idx)
+- stage += ENTRIES_PER_PAGEPAGE;
+- middir = *dir;
+- if (*dir) {
+- diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
+- ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
+- if (!diroff && !offset && upper_limit >= stage) {
+- if (needs_lock) {
+- spin_lock(needs_lock);
+- *dir = NULL;
+- spin_unlock(needs_lock);
+- needs_lock = NULL;
+- } else
+- *dir = NULL;
+- nr_pages_to_free++;
+- list_add(&middir->lru, &pages_to_free);
+- }
+- shmem_dir_unmap(dir);
+- dir = shmem_dir_map(middir);
+- } else {
+- diroff = 0;
+- offset = 0;
+- idx = stage;
+- }
+- }
+-
+- for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
+- if (unlikely(idx == stage)) {
+- shmem_dir_unmap(dir);
+- dir = shmem_dir_map(topdir) +
+- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
+- while (!*dir) {
+- dir++;
+- idx += ENTRIES_PER_PAGEPAGE;
+- if (idx >= limit)
+- goto done1;
+- }
+- stage = idx + ENTRIES_PER_PAGEPAGE;
+- middir = *dir;
+- if (punch_hole)
+- needs_lock = &info->lock;
+- if (upper_limit >= stage) {
+- if (needs_lock) {
+- spin_lock(needs_lock);
+- *dir = NULL;
+- spin_unlock(needs_lock);
+- needs_lock = NULL;
+- } else
+- *dir = NULL;
+- nr_pages_to_free++;
+- list_add(&middir->lru, &pages_to_free);
+- }
+- shmem_dir_unmap(dir);
+- cond_resched();
+- dir = shmem_dir_map(middir);
+- diroff = 0;
+- }
+- punch_lock = needs_lock;
+- subdir = dir[diroff];
+- if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
+- if (needs_lock) {
+- spin_lock(needs_lock);
+- dir[diroff] = NULL;
+- spin_unlock(needs_lock);
+- punch_lock = NULL;
+- } else
+- dir[diroff] = NULL;
+- nr_pages_to_free++;
+- list_add(&subdir->lru, &pages_to_free);
+- }
+- if (subdir && page_private(subdir) /* has swap entries */) {
+- size = limit - idx;
+- if (size > ENTRIES_PER_PAGE)
+- size = ENTRIES_PER_PAGE;
+- freed = shmem_map_and_free_swp(subdir,
+- offset, size, &dir, punch_lock);
+- if (!dir)
+- dir = shmem_dir_map(middir);
+- nr_swaps_freed += freed;
+- if (offset || punch_lock) {
+- spin_lock(&info->lock);
+- set_page_private(subdir,
+- page_private(subdir) - freed);
+- spin_unlock(&info->lock);
+- } else
+- BUG_ON(page_private(subdir) != freed);
+- }
+- offset = 0;
+- }
+-done1:
+- shmem_dir_unmap(dir);
+-done2:
+- if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
+- /*
+- * Call truncate_inode_pages again: racing shmem_unuse_inode
+- * may have swizzled a page in from swap since vmtruncate or
+- * generic_delete_inode did it, before we lowered next_index.
+- * Also, though shmem_getpage checks i_size before adding to
+- * cache, no recheck after: so fix the narrow window there too.
+- *
+- * Recalling truncate_inode_pages_range and unmap_mapping_range
+- * every time for punch_hole (which never got a chance to clear
+- * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
+- * yet hardly ever necessary: try to optimize them out later.
+- */
+- truncate_inode_pages_range(inode->i_mapping, start, end);
+- if (punch_hole)
+- unmap_mapping_range(inode->i_mapping, start,
+- end - start, 1);
+- }
+-
+- spin_lock(&info->lock);
+- info->flags &= ~SHMEM_TRUNCATE;
+- info->swapped -= nr_swaps_freed;
+- if (nr_pages_to_free)
+- shmem_free_blocks(inode, nr_pages_to_free);
+- shmem_recalc_inode(inode);
+- spin_unlock(&info->lock);
+-
+- /*
+- * Empty swap vector directory pages to be freed?
+- */
+- if (!list_empty(&pages_to_free)) {
+- pages_to_free.prev->next = NULL;
+- shmem_free_pages(pages_to_free.next);
+- }
+-}
+-
+-static void shmem_truncate(struct inode *inode)
+-{
+- shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
+-}
+-
+-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
+-{
+- struct inode *inode = dentry->d_inode;
+- struct page *page = NULL;
+- int error;
+-
+- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+- if (attr->ia_size < inode->i_size) {
+- /*
+- * If truncating down to a partial page, then
+- * if that page is already allocated, hold it
+- * in memory until the truncation is over, so
+- * truncate_partial_page cannnot miss it were
+- * it assigned to swap.
+- */
+- if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
+- (void) shmem_getpage(inode,
+- attr->ia_size>>PAGE_CACHE_SHIFT,
+- &page, SGP_READ, NULL);
+- }
+- /*
+- * Reset SHMEM_PAGEIN flag so that shmem_truncate can
+- * detect if any pages might have been added to cache
+- * after truncate_inode_pages. But we needn't bother
+- * if it's being fully truncated to zero-length: the
+- * nrpages check is efficient enough in that case.
+- */
+- if (attr->ia_size) {
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- spin_lock(&info->lock);
+- info->flags &= ~SHMEM_PAGEIN;
+- spin_unlock(&info->lock);
+- }
+- }
+- }
+-
+- error = inode_change_ok(inode, attr);
+- if (!error)
+- error = inode_setattr(inode, attr);
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- if (!error && (attr->ia_valid & ATTR_MODE))
+- error = generic_acl_chmod(inode, &shmem_acl_ops);
+-#endif
+- if (page)
+- page_cache_release(page);
+- return error;
+-}
+-
+-static void shmem_delete_inode(struct inode *inode)
+-{
+- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+- struct shmem_inode_info *info = SHMEM_I(inode);
+-
+- if (inode->i_op->truncate == shmem_truncate) {
+- truncate_inode_pages(inode->i_mapping, 0);
+- shmem_unacct_size(info->flags, inode->i_size);
+- inode->i_size = 0;
+- shmem_truncate(inode);
+- if (!list_empty(&info->swaplist)) {
+- spin_lock(&shmem_swaplist_lock);
+- list_del_init(&info->swaplist);
+- spin_unlock(&shmem_swaplist_lock);
+- }
+- }
+- BUG_ON(inode->i_blocks);
+- if (sbinfo->max_inodes) {
+- spin_lock(&sbinfo->stat_lock);
+- sbinfo->free_inodes++;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+- clear_inode(inode);
+-}
+-
+-static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
+-{
+- swp_entry_t *ptr;
+-
+- for (ptr = dir; ptr < edir; ptr++) {
+- if (ptr->val == entry.val)
+- return ptr - dir;
+- }
+- return -1;
+-}
+-
+-static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
+-{
+- struct inode *inode;
+- unsigned long idx;
+- unsigned long size;
+- unsigned long limit;
+- unsigned long stage;
+- struct page **dir;
+- struct page *subdir;
+- swp_entry_t *ptr;
+- int offset;
+-
+- idx = 0;
+- ptr = info->i_direct;
+- spin_lock(&info->lock);
+- limit = info->next_index;
+- size = limit;
+- if (size > SHMEM_NR_DIRECT)
+- size = SHMEM_NR_DIRECT;
+- offset = shmem_find_swp(entry, ptr, ptr+size);
+- if (offset >= 0) {
+- shmem_swp_balance_unmap();
+- goto found;
+- }
+- if (!info->i_indirect)
+- goto lost2;
+-
+- dir = shmem_dir_map(info->i_indirect);
+- stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
+-
+- for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
+- if (unlikely(idx == stage)) {
+- shmem_dir_unmap(dir-1);
+- dir = shmem_dir_map(info->i_indirect) +
+- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
+- while (!*dir) {
+- dir++;
+- idx += ENTRIES_PER_PAGEPAGE;
+- if (idx >= limit)
+- goto lost1;
+- }
+- stage = idx + ENTRIES_PER_PAGEPAGE;
+- subdir = *dir;
+- shmem_dir_unmap(dir);
+- dir = shmem_dir_map(subdir);
+- }
+- subdir = *dir;
+- if (subdir && page_private(subdir)) {
+- ptr = shmem_swp_map(subdir);
+- size = limit - idx;
+- if (size > ENTRIES_PER_PAGE)
+- size = ENTRIES_PER_PAGE;
+- offset = shmem_find_swp(entry, ptr, ptr+size);
+- if (offset >= 0) {
+- shmem_dir_unmap(dir);
+- goto found;
+- }
+- shmem_swp_unmap(ptr);
+- }
+- }
+-lost1:
+- shmem_dir_unmap(dir-1);
+-lost2:
+- spin_unlock(&info->lock);
+- return 0;
+-found:
+- idx += offset;
+- inode = &info->vfs_inode;
+- if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
+- info->flags |= SHMEM_PAGEIN;
+- shmem_swp_set(info, ptr + offset, 0);
+- }
+- shmem_swp_unmap(ptr);
+- spin_unlock(&info->lock);
+- /*
+- * Decrement swap count even when the entry is left behind:
+- * try_to_unuse will skip over mms, then reincrement count.
+- */
+- swap_free(entry);
+- return 1;
+-}
+-
+-/*
+- * shmem_unuse() search for an eventually swapped out shmem page.
+- */
+-int shmem_unuse(swp_entry_t entry, struct page *page)
+-{
+- struct list_head *p, *next;
+- struct shmem_inode_info *info;
+- int found = 0;
+-
+- spin_lock(&shmem_swaplist_lock);
+- list_for_each_safe(p, next, &shmem_swaplist) {
+- info = list_entry(p, struct shmem_inode_info, swaplist);
+- if (!info->swapped)
+- list_del_init(&info->swaplist);
+- else if (shmem_unuse_inode(info, entry, page)) {
+- /* move head to start search for next from here */
+- list_move_tail(&shmem_swaplist, &info->swaplist);
+- found = 1;
+- break;
+- }
+- }
+- spin_unlock(&shmem_swaplist_lock);
+- return found;
+-}
+-
+-/*
+- * Move the page from the page cache to the swap cache.
+- */
+-static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+-{
+- struct shmem_inode_info *info;
+- swp_entry_t *entry, swap;
+- struct address_space *mapping;
+- unsigned long index;
+- struct inode *inode;
+-
+- BUG_ON(!PageLocked(page));
+- /*
+- * shmem_backing_dev_info's capabilities prevent regular writeback or
+- * sync from ever calling shmem_writepage; but a stacking filesystem
+- * may use the ->writepage of its underlying filesystem, in which case
+- * we want to do nothing when that underlying filesystem is tmpfs
+- * (writing out to swap is useful as a response to memory pressure, but
+- * of no use to stabilize the data) - just redirty the page, unlock it
+- * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the
+- * page_mapped check below, must be avoided unless we're in reclaim.
+- */
+- if (!wbc->for_reclaim) {
+- set_page_dirty(page);
+- unlock_page(page);
+- return 0;
+- }
+- BUG_ON(page_mapped(page));
+-
+- mapping = page->mapping;
+- index = page->index;
+- inode = mapping->host;
+- info = SHMEM_I(inode);
+- if (info->flags & VM_LOCKED)
+- goto redirty;
+- swap = get_swap_page();
+- if (!swap.val)
+- goto redirty;
+-
+- spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
+- if (index >= info->next_index) {
+- BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+- goto unlock;
+- }
+- entry = shmem_swp_entry(info, index, NULL);
+- BUG_ON(!entry);
+- BUG_ON(entry->val);
+-
+- if (move_to_swap_cache(page, swap) == 0) {
+- shmem_swp_set(info, entry, swap.val);
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- if (list_empty(&info->swaplist)) {
+- spin_lock(&shmem_swaplist_lock);
+- /* move instead of add in case we're racing */
+- list_move_tail(&info->swaplist, &shmem_swaplist);
+- spin_unlock(&shmem_swaplist_lock);
+- }
+- unlock_page(page);
+- return 0;
+- }
+-
+- shmem_swp_unmap(entry);
+-unlock:
+- spin_unlock(&info->lock);
+- swap_free(swap);
+-redirty:
+- set_page_dirty(page);
+- return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
+-}
+-
+-#ifdef CONFIG_NUMA
+-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+-{
+- char *nodelist = strchr(value, ':');
+- int err = 1;
+-
+- if (nodelist) {
+- /* NUL-terminate policy string */
+- *nodelist++ = '\0';
+- if (nodelist_parse(nodelist, *policy_nodes))
+- goto out;
+- if (!nodes_subset(*policy_nodes, node_online_map))
+- goto out;
+- }
+- if (!strcmp(value, "default")) {
+- *policy = MPOL_DEFAULT;
+- /* Don't allow a nodelist */
+- if (!nodelist)
+- err = 0;
+- } else if (!strcmp(value, "prefer")) {
+- *policy = MPOL_PREFERRED;
+- /* Insist on a nodelist of one node only */
+- if (nodelist) {
+- char *rest = nodelist;
+- while (isdigit(*rest))
+- rest++;
+- if (!*rest)
+- err = 0;
+- }
+- } else if (!strcmp(value, "bind")) {
+- *policy = MPOL_BIND;
+- /* Insist on a nodelist */
+- if (nodelist)
+- err = 0;
+- } else if (!strcmp(value, "interleave")) {
+- *policy = MPOL_INTERLEAVE;
+- /* Default to nodes online if no nodelist */
+- if (!nodelist)
+- *policy_nodes = node_online_map;
+- err = 0;
+- }
+-out:
+- /* Restore string for error message */
+- if (nodelist)
+- *--nodelist = ':';
+- return err;
+-}
+-
+-static struct page *shmem_swapin_async(struct shared_policy *p,
+- swp_entry_t entry, unsigned long idx)
+-{
+- struct page *page;
+- struct vm_area_struct pvma;
+-
+- /* Create a pseudo vma that just contains the policy */
+- memset(&pvma, 0, sizeof(struct vm_area_struct));
+- pvma.vm_end = PAGE_SIZE;
+- pvma.vm_pgoff = idx;
+- pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
+- page = read_swap_cache_async(entry, &pvma, 0);
+- mpol_free(pvma.vm_policy);
+- return page;
+-}
+-
+-struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
+- unsigned long idx)
+-{
+- struct shared_policy *p = &info->policy;
+- int i, num;
+- struct page *page;
+- unsigned long offset;
+-
+- num = valid_swaphandles(entry, &offset);
+- for (i = 0; i < num; offset++, i++) {
+- page = shmem_swapin_async(p,
+- swp_entry(swp_type(entry), offset), idx);
+- if (!page)
+- break;
+- page_cache_release(page);
+- }
+- lru_add_drain(); /* Push any new pages onto the LRU now */
+- return shmem_swapin_async(p, entry, idx);
+-}
+-
+-static struct page *
+-shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
+- unsigned long idx)
+-{
+- struct vm_area_struct pvma;
+- struct page *page;
+-
+- memset(&pvma, 0, sizeof(struct vm_area_struct));
+- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+- pvma.vm_pgoff = idx;
+- pvma.vm_end = PAGE_SIZE;
+- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0);
+- mpol_free(pvma.vm_policy);
+- return page;
+-}
+-#else
+-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+-{
+- return 1;
+-}
+-
+-static inline struct page *
+-shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
+-{
+- swapin_readahead(entry, 0, NULL);
+- return read_swap_cache_async(entry, NULL, 0);
+-}
+-
+-static inline struct page *
+-shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
+-{
+- return alloc_page(gfp | __GFP_ZERO);
+-}
+-#endif
+-
+-/*
+- * shmem_getpage - either get the page from swap or allocate a new one
+- *
+- * If we allocate a new one we do not mark it dirty. That's up to the
+- * vm. If we swap it in we mark it dirty since we also free the swap
+- * entry since a page cannot live in both the swap and page cache
+- */
+-static int shmem_getpage(struct inode *inode, unsigned long idx,
+- struct page **pagep, enum sgp_type sgp, int *type)
+-{
+- struct address_space *mapping = inode->i_mapping;
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- struct shmem_sb_info *sbinfo;
+- struct page *filepage = *pagep;
+- struct page *swappage;
+- swp_entry_t *entry;
+- swp_entry_t swap;
+- int error;
+-
+- if (idx >= SHMEM_MAX_INDEX)
+- return -EFBIG;
+- /*
+- * Normally, filepage is NULL on entry, and either found
+- * uptodate immediately, or allocated and zeroed, or read
+- * in under swappage, which is then assigned to filepage.
+- * But shmem_prepare_write passes in a locked filepage,
+- * which may be found not uptodate by other callers too,
+- * and may need to be copied from the swappage read in.
+- */
+-repeat:
+- if (!filepage)
+- filepage = find_lock_page(mapping, idx);
+- if (filepage && PageUptodate(filepage))
+- goto done;
+- error = 0;
+- if (sgp == SGP_QUICK)
+- goto failed;
+-
+- spin_lock(&info->lock);
+- shmem_recalc_inode(inode);
+- entry = shmem_swp_alloc(info, idx, sgp);
+- if (IS_ERR(entry)) {
+- spin_unlock(&info->lock);
+- error = PTR_ERR(entry);
+- goto failed;
+- }
+- swap = *entry;
+-
+- if (swap.val) {
+- /* Look it up and read it in.. */
+- swappage = lookup_swap_cache(swap);
+- if (!swappage) {
+- shmem_swp_unmap(entry);
+- /* here we actually do the io */
+- if (type && *type == VM_FAULT_MINOR) {
+- __count_vm_event(PGMAJFAULT);
+- *type = VM_FAULT_MAJOR;
+- }
+- spin_unlock(&info->lock);
+- swappage = shmem_swapin(info, swap, idx);
+- if (!swappage) {
+- spin_lock(&info->lock);
+- entry = shmem_swp_alloc(info, idx, sgp);
+- if (IS_ERR(entry))
+- error = PTR_ERR(entry);
+- else {
+- if (entry->val == swap.val)
+- error = -ENOMEM;
+- shmem_swp_unmap(entry);
+- }
+- spin_unlock(&info->lock);
+- if (error)
+- goto failed;
+- goto repeat;
+- }
+- wait_on_page_locked(swappage);
+- page_cache_release(swappage);
+- goto repeat;
+- }
+-
+- /* We have to do this with page locked to prevent races */
+- if (TestSetPageLocked(swappage)) {
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- wait_on_page_locked(swappage);
+- page_cache_release(swappage);
+- goto repeat;
+- }
+- if (PageWriteback(swappage)) {
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- wait_on_page_writeback(swappage);
+- unlock_page(swappage);
+- page_cache_release(swappage);
+- goto repeat;
+- }
+- if (!PageUptodate(swappage)) {
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- unlock_page(swappage);
+- page_cache_release(swappage);
+- error = -EIO;
+- goto failed;
+- }
+-
+- if (filepage) {
+- shmem_swp_set(info, entry, 0);
+- shmem_swp_unmap(entry);
+- delete_from_swap_cache(swappage);
+- spin_unlock(&info->lock);
+- copy_highpage(filepage, swappage);
+- unlock_page(swappage);
+- page_cache_release(swappage);
+- flush_dcache_page(filepage);
+- SetPageUptodate(filepage);
+- set_page_dirty(filepage);
+- swap_free(swap);
+- } else if (!(error = move_from_swap_cache(
+- swappage, idx, mapping))) {
+- info->flags |= SHMEM_PAGEIN;
+- shmem_swp_set(info, entry, 0);
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- filepage = swappage;
+- swap_free(swap);
+- } else {
+- shmem_swp_unmap(entry);
+- spin_unlock(&info->lock);
+- unlock_page(swappage);
+- page_cache_release(swappage);
+- if (error == -ENOMEM) {
+- /* let kswapd refresh zone for GFP_ATOMICs */
+- congestion_wait(WRITE, HZ/50);
+- }
+- goto repeat;
+- }
+- } else if (sgp == SGP_READ && !filepage) {
+- shmem_swp_unmap(entry);
+- filepage = find_get_page(mapping, idx);
+- if (filepage &&
+- (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
+- spin_unlock(&info->lock);
+- wait_on_page_locked(filepage);
+- page_cache_release(filepage);
+- filepage = NULL;
+- goto repeat;
+- }
+- spin_unlock(&info->lock);
+- } else {
+- shmem_swp_unmap(entry);
+- sbinfo = SHMEM_SB(inode->i_sb);
+- if (sbinfo->max_blocks) {
+- spin_lock(&sbinfo->stat_lock);
+- if (sbinfo->free_blocks == 0 ||
+- shmem_acct_block(info->flags)) {
+- spin_unlock(&sbinfo->stat_lock);
+- spin_unlock(&info->lock);
+- error = -ENOSPC;
+- goto failed;
+- }
+- sbinfo->free_blocks--;
+- inode->i_blocks += BLOCKS_PER_PAGE;
+- spin_unlock(&sbinfo->stat_lock);
+- } else if (shmem_acct_block(info->flags)) {
+- spin_unlock(&info->lock);
+- error = -ENOSPC;
+- goto failed;
+- }
+-
+- if (!filepage) {
+- spin_unlock(&info->lock);
+- filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
+- info,
+- idx);
+- if (!filepage) {
+- shmem_unacct_blocks(info->flags, 1);
+- shmem_free_blocks(inode, 1);
+- error = -ENOMEM;
+- goto failed;
+- }
+-
+- spin_lock(&info->lock);
+- entry = shmem_swp_alloc(info, idx, sgp);
+- if (IS_ERR(entry))
+- error = PTR_ERR(entry);
+- else {
+- swap = *entry;
+- shmem_swp_unmap(entry);
+- }
+- if (error || swap.val || 0 != add_to_page_cache_lru(
+- filepage, mapping, idx, GFP_ATOMIC)) {
+- spin_unlock(&info->lock);
+- page_cache_release(filepage);
+- shmem_unacct_blocks(info->flags, 1);
+- shmem_free_blocks(inode, 1);
+- filepage = NULL;
+- if (error)
+- goto failed;
+- goto repeat;
+- }
+- info->flags |= SHMEM_PAGEIN;
+- }
+-
+- info->alloced++;
+- spin_unlock(&info->lock);
+- flush_dcache_page(filepage);
+- SetPageUptodate(filepage);
+- }
+-done:
+- if (*pagep != filepage) {
+- unlock_page(filepage);
+- *pagep = filepage;
+- }
+- return 0;
+-
+-failed:
+- if (*pagep != filepage) {
+- unlock_page(filepage);
+- page_cache_release(filepage);
+- }
+- return error;
+-}
+-
+-static struct page *shmem_nopage(struct vm_area_struct *vma,
+- unsigned long address, int *type)
+-{
+- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+- struct page *page = NULL;
+- unsigned long idx;
+- int error;
+-
+- idx = (address - vma->vm_start) >> PAGE_SHIFT;
+- idx += vma->vm_pgoff;
+- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
+- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+- return NOPAGE_SIGBUS;
+-
+- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+- if (error)
+- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
+-
+- mark_page_accessed(page);
+- return page;
+-}
+-
+-static int shmem_populate(struct vm_area_struct *vma,
+- unsigned long addr, unsigned long len,
+- pgprot_t prot, unsigned long pgoff, int nonblock)
+-{
+- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+- struct mm_struct *mm = vma->vm_mm;
+- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
+- unsigned long size;
+-
+- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
+- return -EINVAL;
+-
+- while ((long) len > 0) {
+- struct page *page = NULL;
+- int err;
+- /*
+- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
+- */
+- err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
+- if (err)
+- return err;
+- /* Page may still be null, but only if nonblock was set. */
+- if (page) {
+- mark_page_accessed(page);
+- err = install_page(mm, vma, addr, page, prot);
+- if (err) {
+- page_cache_release(page);
+- return err;
+- }
+- } else if (vma->vm_flags & VM_NONLINEAR) {
+- /* No page was found just because we can't read it in
+- * now (being here implies nonblock != 0), but the page
+- * may exist, so set the PTE to fault it in later. */
+- err = install_file_pte(mm, vma, addr, pgoff, prot);
+- if (err)
+- return err;
+- }
+-
+- len -= PAGE_SIZE;
+- addr += PAGE_SIZE;
+- pgoff++;
+- }
+- return 0;
+-}
+-
+-#ifdef CONFIG_NUMA
+-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+-{
+- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
+- return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
+-}
+-
+-struct mempolicy *
+-shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
+-{
+- struct inode *i = vma->vm_file->f_path.dentry->d_inode;
+- unsigned long idx;
+-
+- idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+- return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
+-}
+-#endif
+-
+-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+-{
+- struct inode *inode = file->f_path.dentry->d_inode;
+- struct shmem_inode_info *info = SHMEM_I(inode);
+- int retval = -ENOMEM;
+-
+- spin_lock(&info->lock);
+- if (lock && !(info->flags & VM_LOCKED)) {
+- if (!user_shm_lock(inode->i_size, user))
+- goto out_nomem;
+- info->flags |= VM_LOCKED;
+- }
+- if (!lock && (info->flags & VM_LOCKED) && user) {
+- user_shm_unlock(inode->i_size, user);
+- info->flags &= ~VM_LOCKED;
+- }
+- retval = 0;
+-out_nomem:
+- spin_unlock(&info->lock);
+- return retval;
+-}
+-
+-static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+- file_accessed(file);
+- vma->vm_ops = &shmem_vm_ops;
+- return 0;
+-}
+-
+-static struct inode *
+-shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
+-{
+- struct inode *inode;
+- struct shmem_inode_info *info;
+- struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+-
+- if (sbinfo->max_inodes) {
+- spin_lock(&sbinfo->stat_lock);
+- if (!sbinfo->free_inodes) {
+- spin_unlock(&sbinfo->stat_lock);
+- return NULL;
+- }
+- sbinfo->free_inodes--;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-
+- inode = new_inode(sb);
+- if (inode) {
+- inode->i_mode = mode;
+- inode->i_uid = current->fsuid;
+- inode->i_gid = current->fsgid;
+- inode->i_blocks = 0;
+- inode->i_mapping->a_ops = &shmem_aops;
+- inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
+- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+- inode->i_generation = get_seconds();
+- info = SHMEM_I(inode);
+- memset(info, 0, (char *)inode - (char *)info);
+- spin_lock_init(&info->lock);
+- INIT_LIST_HEAD(&info->swaplist);
+-
+- switch (mode & S_IFMT) {
+- default:
+- inode->i_op = &shmem_special_inode_operations;
+- init_special_inode(inode, mode, dev);
+- break;
+- case S_IFREG:
+- inode->i_op = &shmem_inode_operations;
+- inode->i_fop = &shmem_file_operations;
+- mpol_shared_policy_init(&info->policy, sbinfo->policy,
+- &sbinfo->policy_nodes);
+- break;
+- case S_IFDIR:
+- inc_nlink(inode);
+- /* Some things misbehave if size == 0 on a directory */
+- inode->i_size = 2 * BOGO_DIRENT_SIZE;
+- inode->i_op = &shmem_dir_inode_operations;
+- inode->i_fop = &simple_dir_operations;
+- break;
+- case S_IFLNK:
+- /*
+- * Must not load anything in the rbtree,
+- * mpol_free_shared_policy will not be called.
+- */
+- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
+- NULL);
+- break;
+- }
+- } else if (sbinfo->max_inodes) {
+- spin_lock(&sbinfo->stat_lock);
+- sbinfo->free_inodes++;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+- return inode;
+-}
+-
+-#ifdef CONFIG_TMPFS
+-static const struct inode_operations shmem_symlink_inode_operations;
+-static const struct inode_operations shmem_symlink_inline_operations;
+-
+-/*
+- * Normally tmpfs makes no use of shmem_prepare_write, but it
+- * lets a tmpfs file be used read-write below the loop driver.
+- */
+-static int
+-shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+-{
+- struct inode *inode = page->mapping->host;
+- return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
+-}
+-
+-static ssize_t
+-shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+-{
+- struct inode *inode = file->f_path.dentry->d_inode;
+- loff_t pos;
+- unsigned long written;
+- ssize_t err;
+-
+- if ((ssize_t) count < 0)
+- return -EINVAL;
+-
+- if (!access_ok(VERIFY_READ, buf, count))
+- return -EFAULT;
+-
+- mutex_lock(&inode->i_mutex);
+-
+- pos = *ppos;
+- written = 0;
+-
+- err = generic_write_checks(file, &pos, &count, 0);
+- if (err || !count)
+- goto out;
+-
+- err = remove_suid(file->f_path.dentry);
+- if (err)
+- goto out;
+-
+- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+-
+- do {
+- struct page *page = NULL;
+- unsigned long bytes, index, offset;
+- char *kaddr;
+- int left;
+-
+- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+- index = pos >> PAGE_CACHE_SHIFT;
+- bytes = PAGE_CACHE_SIZE - offset;
+- if (bytes > count)
+- bytes = count;
+-
+- /*
+- * We don't hold page lock across copy from user -
+- * what would it guard against? - so no deadlock here.
+- * But it still may be a good idea to prefault below.
+- */
+-
+- err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
+- if (err)
+- break;
+-
+- left = bytes;
+- if (PageHighMem(page)) {
+- volatile unsigned char dummy;
+- __get_user(dummy, buf);
+- __get_user(dummy, buf + bytes - 1);
+-
+- kaddr = kmap_atomic(page, KM_USER0);
+- left = __copy_from_user_inatomic(kaddr + offset,
+- buf, bytes);
+- kunmap_atomic(kaddr, KM_USER0);
+- }
+- if (left) {
+- kaddr = kmap(page);
+- left = __copy_from_user(kaddr + offset, buf, bytes);
+- kunmap(page);
+- }
+-
+- written += bytes;
+- count -= bytes;
+- pos += bytes;
+- buf += bytes;
+- if (pos > inode->i_size)
+- i_size_write(inode, pos);
+-
+- flush_dcache_page(page);
+- set_page_dirty(page);
+- mark_page_accessed(page);
+- page_cache_release(page);
+-
+- if (left) {
+- pos -= left;
+- written -= left;
+- err = -EFAULT;
+- break;
+- }
+-
+- /*
+- * Our dirty pages are not counted in nr_dirty,
+- * and we do not attempt to balance dirty pages.
+- */
+-
+- cond_resched();
+- } while (count);
+-
+- *ppos = pos;
+- if (written)
+- err = written;
+-out:
+- mutex_unlock(&inode->i_mutex);
+- return err;
+-}
+-
+-static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
+-{
+- struct inode *inode = filp->f_path.dentry->d_inode;
+- struct address_space *mapping = inode->i_mapping;
+- unsigned long index, offset;
+-
+- index = *ppos >> PAGE_CACHE_SHIFT;
+- offset = *ppos & ~PAGE_CACHE_MASK;
+-
+- for (;;) {
+- struct page *page = NULL;
+- unsigned long end_index, nr, ret;
+- loff_t i_size = i_size_read(inode);
+-
+- end_index = i_size >> PAGE_CACHE_SHIFT;
+- if (index > end_index)
+- break;
+- if (index == end_index) {
+- nr = i_size & ~PAGE_CACHE_MASK;
+- if (nr <= offset)
+- break;
+- }
+-
+- desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
+- if (desc->error) {
+- if (desc->error == -EINVAL)
+- desc->error = 0;
+- break;
+- }
+-
+- /*
+- * We must evaluate after, since reads (unlike writes)
+- * are called without i_mutex protection against truncate
+- */
+- nr = PAGE_CACHE_SIZE;
+- i_size = i_size_read(inode);
+- end_index = i_size >> PAGE_CACHE_SHIFT;
+- if (index == end_index) {
+- nr = i_size & ~PAGE_CACHE_MASK;
+- if (nr <= offset) {
+- if (page)
+- page_cache_release(page);
+- break;
+- }
+- }
+- nr -= offset;
+-
+- if (page) {
+- /*
+- * If users can be writing to this page using arbitrary
+- * virtual addresses, take care about potential aliasing
+- * before reading the page on the kernel side.
+- */
+- if (mapping_writably_mapped(mapping))
+- flush_dcache_page(page);
+- /*
+- * Mark the page accessed if we read the beginning.
+- */
+- if (!offset)
+- mark_page_accessed(page);
+- } else {
+- page = ZERO_PAGE(0);
+- page_cache_get(page);
+- }
+-
+- /*
+- * Ok, we have the page, and it's up-to-date, so
+- * now we can copy it to user space...
+- *
+- * The actor routine returns how many bytes were actually used..
+- * NOTE! This may not be the same as how much of a user buffer
+- * we filled up (we may be padding etc), so we can only update
+- * "pos" here (the actor routine has to update the user buffer
+- * pointers and the remaining count).
+- */
+- ret = actor(desc, page, offset, nr);
+- offset += ret;
+- index += offset >> PAGE_CACHE_SHIFT;
+- offset &= ~PAGE_CACHE_MASK;
+-
+- page_cache_release(page);
+- if (ret != nr || !desc->count)
+- break;
+-
+- cond_resched();
+- }
+-
+- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+- file_accessed(filp);
+-}
+-
+-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+-{
+- read_descriptor_t desc;
+-
+- if ((ssize_t) count < 0)
+- return -EINVAL;
+- if (!access_ok(VERIFY_WRITE, buf, count))
+- return -EFAULT;
+- if (!count)
+- return 0;
+-
+- desc.written = 0;
+- desc.count = count;
+- desc.arg.buf = buf;
+- desc.error = 0;
+-
+- do_shmem_file_read(filp, ppos, &desc, file_read_actor);
+- if (desc.written)
+- return desc.written;
+- return desc.error;
+-}
+-
+-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
+- size_t count, read_actor_t actor, void *target)
+-{
+- read_descriptor_t desc;
+-
+- if (!count)
+- return 0;
+-
+- desc.written = 0;
+- desc.count = count;
+- desc.arg.data = target;
+- desc.error = 0;
+-
+- do_shmem_file_read(in_file, ppos, &desc, actor);
+- if (desc.written)
+- return desc.written;
+- return desc.error;
+-}
+-
+-static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
+-{
+- struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
+-
+- buf->f_type = TMPFS_MAGIC;
+- buf->f_bsize = PAGE_CACHE_SIZE;
+- buf->f_namelen = NAME_MAX;
+- spin_lock(&sbinfo->stat_lock);
+- if (sbinfo->max_blocks) {
+- buf->f_blocks = sbinfo->max_blocks;
+- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+- }
+- if (sbinfo->max_inodes) {
+- buf->f_files = sbinfo->max_inodes;
+- buf->f_ffree = sbinfo->free_inodes;
+- }
+- /* else leave those fields 0 like simple_statfs */
+- spin_unlock(&sbinfo->stat_lock);
+- return 0;
+-}
+-
+-/*
+- * File creation. Allocate an inode, and we're done..
+- */
+-static int
+-shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+-{
+- struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
+- int error = -ENOSPC;
+-
+- if (inode) {
+- error = security_inode_init_security(inode, dir, NULL, NULL,
+- NULL);
+- if (error) {
+- if (error != -EOPNOTSUPP) {
+- iput(inode);
+- return error;
+- }
+- }
+- error = shmem_acl_init(inode, dir);
+- if (error) {
+- iput(inode);
+- return error;
+- }
+- if (dir->i_mode & S_ISGID) {
+- inode->i_gid = dir->i_gid;
+- if (S_ISDIR(mode))
+- inode->i_mode |= S_ISGID;
+- }
+- dir->i_size += BOGO_DIRENT_SIZE;
+- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- d_instantiate(dentry, inode);
+- dget(dentry); /* Extra count - pin the dentry in core */
+- }
+- return error;
+-}
+-
+-static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+-{
+- int error;
+-
+- if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
+- return error;
+- inc_nlink(dir);
+- return 0;
+-}
+-
+-static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
+- struct nameidata *nd)
+-{
+- return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
+-}
+-
+-/*
+- * Link a file..
+- */
+-static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+-{
+- struct inode *inode = old_dentry->d_inode;
+- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+-
+- /*
+- * No ordinary (disk based) filesystem counts links as inodes;
+- * but each new link needs a new dentry, pinning lowmem, and
+- * tmpfs dentries cannot be pruned until they are unlinked.
+- */
+- if (sbinfo->max_inodes) {
+- spin_lock(&sbinfo->stat_lock);
+- if (!sbinfo->free_inodes) {
+- spin_unlock(&sbinfo->stat_lock);
+- return -ENOSPC;
+- }
+- sbinfo->free_inodes--;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-
+- dir->i_size += BOGO_DIRENT_SIZE;
+- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- inc_nlink(inode);
+- atomic_inc(&inode->i_count); /* New dentry reference */
+- dget(dentry); /* Extra pinning count for the created dentry */
+- d_instantiate(dentry, inode);
+- return 0;
+-}
+-
+-static int shmem_unlink(struct inode *dir, struct dentry *dentry)
+-{
+- struct inode *inode = dentry->d_inode;
+-
+- if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) {
+- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+- if (sbinfo->max_inodes) {
+- spin_lock(&sbinfo->stat_lock);
+- sbinfo->free_inodes++;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+- }
+-
+- dir->i_size -= BOGO_DIRENT_SIZE;
+- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- drop_nlink(inode);
+- dput(dentry); /* Undo the count from "create" - this does all the work */
+- return 0;
+-}
+-
+-static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
+-{
+- if (!simple_empty(dentry))
+- return -ENOTEMPTY;
+-
+- drop_nlink(dentry->d_inode);
+- drop_nlink(dir);
+- return shmem_unlink(dir, dentry);
+-}
+-
+-/*
+- * The VFS layer already does all the dentry stuff for rename,
+- * we just have to decrement the usage count for the target if
+- * it exists so that the VFS layer correctly free's it when it
+- * gets overwritten.
+- */
+-static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
+-{
+- struct inode *inode = old_dentry->d_inode;
+- int they_are_dirs = S_ISDIR(inode->i_mode);
+-
+- if (!simple_empty(new_dentry))
+- return -ENOTEMPTY;
+-
+- if (new_dentry->d_inode) {
+- (void) shmem_unlink(new_dir, new_dentry);
+- if (they_are_dirs)
+- drop_nlink(old_dir);
+- } else if (they_are_dirs) {
+- drop_nlink(old_dir);
+- inc_nlink(new_dir);
+- }
+-
+- old_dir->i_size -= BOGO_DIRENT_SIZE;
+- new_dir->i_size += BOGO_DIRENT_SIZE;
+- old_dir->i_ctime = old_dir->i_mtime =
+- new_dir->i_ctime = new_dir->i_mtime =
+- inode->i_ctime = CURRENT_TIME;
+- return 0;
+-}
+-
+-static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+-{
+- int error;
+- int len;
+- struct inode *inode;
+- struct page *page = NULL;
+- char *kaddr;
+- struct shmem_inode_info *info;
+-
+- len = strlen(symname) + 1;
+- if (len > PAGE_CACHE_SIZE)
+- return -ENAMETOOLONG;
+-
+- inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+- if (!inode)
+- return -ENOSPC;
+-
+- error = security_inode_init_security(inode, dir, NULL, NULL,
+- NULL);
+- if (error) {
+- if (error != -EOPNOTSUPP) {
+- iput(inode);
+- return error;
+- }
+- error = 0;
+- }
+-
+- info = SHMEM_I(inode);
+- inode->i_size = len-1;
+- if (len <= (char *)inode - (char *)info) {
+- /* do it inline */
+- memcpy(info, symname, len);
+- inode->i_op = &shmem_symlink_inline_operations;
+- } else {
+- error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
+- if (error) {
+- iput(inode);
+- return error;
+- }
+- inode->i_op = &shmem_symlink_inode_operations;
+- kaddr = kmap_atomic(page, KM_USER0);
+- memcpy(kaddr, symname, len);
+- kunmap_atomic(kaddr, KM_USER0);
+- set_page_dirty(page);
+- page_cache_release(page);
+- }
+- if (dir->i_mode & S_ISGID)
+- inode->i_gid = dir->i_gid;
+- dir->i_size += BOGO_DIRENT_SIZE;
+- dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+- d_instantiate(dentry, inode);
+- dget(dentry);
+- return 0;
+-}
+-
+-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+-{
+- nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode));
+- return NULL;
+-}
+-
+-static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
+-{
+- struct page *page = NULL;
+- int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+- nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
+- return page;
+-}
+-
+-static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+-{
+- if (!IS_ERR(nd_get_link(nd))) {
+- struct page *page = cookie;
+- kunmap(page);
+- mark_page_accessed(page);
+- page_cache_release(page);
+- }
+-}
+-
+-static const struct inode_operations shmem_symlink_inline_operations = {
+- .readlink = generic_readlink,
+- .follow_link = shmem_follow_link_inline,
+-};
+-
+-static const struct inode_operations shmem_symlink_inode_operations = {
+- .truncate = shmem_truncate,
+- .readlink = generic_readlink,
+- .follow_link = shmem_follow_link,
+- .put_link = shmem_put_link,
+-};
+-
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+-/**
+- * Superblocks without xattr inode operations will get security.* xattr
+- * support from the VFS "for free". As soon as we have any other xattrs
+- * like ACLs, we also need to implement the security.* handlers at
+- * filesystem level, though.
+- */
+-
+-static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+- size_t list_len, const char *name,
+- size_t name_len)
+-{
+- return security_inode_listsecurity(inode, list, list_len);
+-}
+-
+-static int shmem_xattr_security_get(struct inode *inode, const char *name,
+- void *buffer, size_t size)
+-{
+- if (strcmp(name, "") == 0)
+- return -EINVAL;
+- return security_inode_getsecurity(inode, name, buffer, size,
+- -EOPNOTSUPP);
+-}
+-
+-static int shmem_xattr_security_set(struct inode *inode, const char *name,
+- const void *value, size_t size, int flags)
+-{
+- if (strcmp(name, "") == 0)
+- return -EINVAL;
+- return security_inode_setsecurity(inode, name, value, size, flags);
+-}
+-
+-static struct xattr_handler shmem_xattr_security_handler = {
+- .prefix = XATTR_SECURITY_PREFIX,
+- .list = shmem_xattr_security_list,
+- .get = shmem_xattr_security_get,
+- .set = shmem_xattr_security_set,
+-};
+-
+-static struct xattr_handler *shmem_xattr_handlers[] = {
+- &shmem_xattr_acl_access_handler,
+- &shmem_xattr_acl_default_handler,
+- &shmem_xattr_security_handler,
+- NULL
+-};
+-#endif
+-
+-static struct dentry *shmem_get_parent(struct dentry *child)
+-{
+- return ERR_PTR(-ESTALE);
+-}
+-
+-static int shmem_match(struct inode *ino, void *vfh)
+-{
+- __u32 *fh = vfh;
+- __u64 inum = fh[2];
+- inum = (inum << 32) | fh[1];
+- return ino->i_ino == inum && fh[0] == ino->i_generation;
+-}
+-
+-static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh)
+-{
+- struct dentry *de = NULL;
+- struct inode *inode;
+- __u32 *fh = vfh;
+- __u64 inum = fh[2];
+- inum = (inum << 32) | fh[1];
+-
+- inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh);
+- if (inode) {
+- de = d_find_alias(inode);
+- iput(inode);
+- }
+-
+- return de? de: ERR_PTR(-ESTALE);
+-}
+-
+-static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh,
+- int len, int type,
+- int (*acceptable)(void *context, struct dentry *de),
+- void *context)
+-{
+- if (len < 3)
+- return ERR_PTR(-ESTALE);
+-
+- return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable,
+- context);
+-}
+-
+-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+- int connectable)
+-{
+- struct inode *inode = dentry->d_inode;
+-
+- if (*len < 3)
+- return 255;
+-
+- if (hlist_unhashed(&inode->i_hash)) {
+- /* Unfortunately insert_inode_hash is not idempotent,
+- * so as we hash inodes here rather than at creation
+- * time, we need a lock to ensure we only try
+- * to do it once
+- */
+- static DEFINE_SPINLOCK(lock);
+- spin_lock(&lock);
+- if (hlist_unhashed(&inode->i_hash))
+- __insert_inode_hash(inode,
+- inode->i_ino + inode->i_generation);
+- spin_unlock(&lock);
+- }
+-
+- fh[0] = inode->i_generation;
+- fh[1] = inode->i_ino;
+- fh[2] = ((__u64)inode->i_ino) >> 32;
+-
+- *len = 3;
+- return 1;
+-}
+-
+-static struct export_operations shmem_export_ops = {
+- .get_parent = shmem_get_parent,
+- .get_dentry = shmem_get_dentry,
+- .encode_fh = shmem_encode_fh,
+- .decode_fh = shmem_decode_fh,
+-};
+-
+-static int shmem_parse_options(char *options, int *mode, uid_t *uid,
+- gid_t *gid, unsigned long *blocks, unsigned long *inodes,
+- int *policy, nodemask_t *policy_nodes)
+-{
+- char *this_char, *value, *rest;
+-
+- while (options != NULL) {
+- this_char = options;
+- for (;;) {
+- /*
+- * NUL-terminate this option: unfortunately,
+- * mount options form a comma-separated list,
+- * but mpol's nodelist may also contain commas.
+- */
+- options = strchr(options, ',');
+- if (options == NULL)
+- break;
+- options++;
+- if (!isdigit(*options)) {
+- options[-1] = '\0';
+- break;
+- }
+- }
+- if (!*this_char)
+- continue;
+- if ((value = strchr(this_char,'=')) != NULL) {
+- *value++ = 0;
+- } else {
+- printk(KERN_ERR
+- "tmpfs: No value for mount option '%s'\n",
+- this_char);
+- return 1;
+- }
+-
+- if (!strcmp(this_char,"size")) {
+- unsigned long long size;
+- size = memparse(value,&rest);
+- if (*rest == '%') {
+- size <<= PAGE_SHIFT;
+- size *= totalram_pages;
+- do_div(size, 100);
+- rest++;
+- }
+- if (*rest)
+- goto bad_val;
+- *blocks = size >> PAGE_CACHE_SHIFT;
+- } else if (!strcmp(this_char,"nr_blocks")) {
+- *blocks = memparse(value,&rest);
+- if (*rest)
+- goto bad_val;
+- } else if (!strcmp(this_char,"nr_inodes")) {
+- *inodes = memparse(value,&rest);
+- if (*rest)
+- goto bad_val;
+- } else if (!strcmp(this_char,"mode")) {
+- if (!mode)
+- continue;
+- *mode = simple_strtoul(value,&rest,8);
+- if (*rest)
+- goto bad_val;
+- } else if (!strcmp(this_char,"uid")) {
+- if (!uid)
+- continue;
+- *uid = simple_strtoul(value,&rest,0);
+- if (*rest)
+- goto bad_val;
+- } else if (!strcmp(this_char,"gid")) {
+- if (!gid)
+- continue;
+- *gid = simple_strtoul(value,&rest,0);
+- if (*rest)
+- goto bad_val;
+- } else if (!strcmp(this_char,"mpol")) {
+- if (shmem_parse_mpol(value,policy,policy_nodes))
+- goto bad_val;
+- } else {
+- printk(KERN_ERR "tmpfs: Bad mount option %s\n",
+- this_char);
+- return 1;
+- }
+- }
+- return 0;
+-
+-bad_val:
+- printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
+- value, this_char);
+- return 1;
+-
+-}
+-
+-static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
+-{
+- struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+- unsigned long max_blocks = sbinfo->max_blocks;
+- unsigned long max_inodes = sbinfo->max_inodes;
+- int policy = sbinfo->policy;
+- nodemask_t policy_nodes = sbinfo->policy_nodes;
+- unsigned long blocks;
+- unsigned long inodes;
+- int error = -EINVAL;
+-
+- if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks,
+- &max_inodes, &policy, &policy_nodes))
+- return error;
+-
+- spin_lock(&sbinfo->stat_lock);
+- blocks = sbinfo->max_blocks - sbinfo->free_blocks;
+- inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+- if (max_blocks < blocks)
+- goto out;
+- if (max_inodes < inodes)
+- goto out;
+- /*
+- * Those tests also disallow limited->unlimited while any are in
+- * use, so i_blocks will always be zero when max_blocks is zero;
+- * but we must separately disallow unlimited->limited, because
+- * in that case we have no record of how much is already in use.
+- */
+- if (max_blocks && !sbinfo->max_blocks)
+- goto out;
+- if (max_inodes && !sbinfo->max_inodes)
+- goto out;
+-
+- error = 0;
+- sbinfo->max_blocks = max_blocks;
+- sbinfo->free_blocks = max_blocks - blocks;
+- sbinfo->max_inodes = max_inodes;
+- sbinfo->free_inodes = max_inodes - inodes;
+- sbinfo->policy = policy;
+- sbinfo->policy_nodes = policy_nodes;
+-out:
+- spin_unlock(&sbinfo->stat_lock);
+- return error;
+-}
+-#endif
+-
+-static void shmem_put_super(struct super_block *sb)
+-{
+- kfree(sb->s_fs_info);
+- sb->s_fs_info = NULL;
+-}
+-
+-static int shmem_fill_super(struct super_block *sb,
+- void *data, int silent)
+-{
+- struct inode *inode;
+- struct dentry *root;
+- int mode = S_IRWXUGO | S_ISVTX;
+- uid_t uid = current->fsuid;
+- gid_t gid = current->fsgid;
+- int err = -ENOMEM;
+- struct shmem_sb_info *sbinfo;
+- unsigned long blocks = 0;
+- unsigned long inodes = 0;
+- int policy = MPOL_DEFAULT;
+- nodemask_t policy_nodes = node_online_map;
+-
+-#ifdef CONFIG_TMPFS
+- /*
+- * Per default we only allow half of the physical ram per
+- * tmpfs instance, limiting inodes to one per page of lowmem;
+- * but the internal instance is left unlimited.
+- */
+- if (!(sb->s_flags & MS_NOUSER)) {
+- blocks = totalram_pages / 2;
+- inodes = totalram_pages - totalhigh_pages;
+- if (inodes > blocks)
+- inodes = blocks;
+- if (shmem_parse_options(data, &mode, &uid, &gid, &blocks,
+- &inodes, &policy, &policy_nodes))
+- return -EINVAL;
+- }
+- sb->s_export_op = &shmem_export_ops;
+-#else
+- sb->s_flags |= MS_NOUSER;
+-#endif
+-
+- /* Round up to L1_CACHE_BYTES to resist false sharing */
+- sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info),
+- L1_CACHE_BYTES), GFP_KERNEL);
+- if (!sbinfo)
+- return -ENOMEM;
+-
+- spin_lock_init(&sbinfo->stat_lock);
+- sbinfo->max_blocks = blocks;
+- sbinfo->free_blocks = blocks;
+- sbinfo->max_inodes = inodes;
+- sbinfo->free_inodes = inodes;
+- sbinfo->policy = policy;
+- sbinfo->policy_nodes = policy_nodes;
+-
+- sb->s_fs_info = sbinfo;
+- sb->s_maxbytes = SHMEM_MAX_BYTES;
+- sb->s_blocksize = PAGE_CACHE_SIZE;
+- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+- sb->s_magic = TMPFS_MAGIC;
+- sb->s_op = &shmem_ops;
+- sb->s_time_gran = 1;
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- sb->s_xattr = shmem_xattr_handlers;
+- sb->s_flags |= MS_POSIXACL;
+-#endif
+-
+- inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
+- if (!inode)
+- goto failed;
+- inode->i_uid = uid;
+- inode->i_gid = gid;
+- root = d_alloc_root(inode);
+- if (!root)
+- goto failed_iput;
+- sb->s_root = root;
+- return 0;
+-
+-failed_iput:
+- iput(inode);
+-failed:
+- shmem_put_super(sb);
+- return err;
+-}
+-
+-static struct kmem_cache *shmem_inode_cachep;
+-
+-static struct inode *shmem_alloc_inode(struct super_block *sb)
+-{
+- struct shmem_inode_info *p;
+- p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
+- if (!p)
+- return NULL;
+- return &p->vfs_inode;
+-}
+-
+-static void shmem_destroy_inode(struct inode *inode)
+-{
+- if ((inode->i_mode & S_IFMT) == S_IFREG) {
+- /* only struct inode is valid if it's an inline symlink */
+- mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+- }
+- shmem_acl_destroy_inode(inode);
+- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+-}
+-
+-static void init_once(void *foo, struct kmem_cache *cachep,
+- unsigned long flags)
+-{
+- struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
+-
+- inode_init_once(&p->vfs_inode);
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- p->i_acl = NULL;
+- p->i_default_acl = NULL;
+-#endif
+-}
+-
+-static int init_inodecache(void)
+-{
+- shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
+- sizeof(struct shmem_inode_info),
+- 0, 0, init_once, NULL);
+- if (shmem_inode_cachep == NULL)
+- return -ENOMEM;
+- return 0;
+-}
+-
+-static void destroy_inodecache(void)
+-{
+- kmem_cache_destroy(shmem_inode_cachep);
+-}
+-
+-static const struct address_space_operations shmem_aops = {
+- .writepage = shmem_writepage,
+- .set_page_dirty = __set_page_dirty_no_writeback,
+-#ifdef CONFIG_TMPFS
+- .prepare_write = shmem_prepare_write,
+- .commit_write = simple_commit_write,
+-#endif
+- .migratepage = migrate_page,
+-};
+-
+-static const struct file_operations shmem_file_operations = {
+- .mmap = shmem_mmap,
+-#ifdef CONFIG_TMPFS
+- .llseek = generic_file_llseek,
+- .read = shmem_file_read,
+- .write = shmem_file_write,
+- .fsync = simple_sync_file,
+- .sendfile = shmem_file_sendfile,
+-#endif
+-};
+-
+-static const struct inode_operations shmem_inode_operations = {
+- .truncate = shmem_truncate,
+- .setattr = shmem_notify_change,
+- .truncate_range = shmem_truncate_range,
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- .setxattr = generic_setxattr,
+- .getxattr = generic_getxattr,
+- .listxattr = generic_listxattr,
+- .removexattr = generic_removexattr,
+- .permission = shmem_permission,
+-#endif
+-
+-};
+-
+-static const struct inode_operations shmem_dir_inode_operations = {
+-#ifdef CONFIG_TMPFS
+- .create = shmem_create,
+- .lookup = simple_lookup,
+- .link = shmem_link,
+- .unlink = shmem_unlink,
+- .symlink = shmem_symlink,
+- .mkdir = shmem_mkdir,
+- .rmdir = shmem_rmdir,
+- .mknod = shmem_mknod,
+- .rename = shmem_rename,
+-#endif
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- .setattr = shmem_notify_change,
+- .setxattr = generic_setxattr,
+- .getxattr = generic_getxattr,
+- .listxattr = generic_listxattr,
+- .removexattr = generic_removexattr,
+- .permission = shmem_permission,
+-#endif
+-};
+-
+-static const struct inode_operations shmem_special_inode_operations = {
+-#ifdef CONFIG_TMPFS_POSIX_ACL
+- .setattr = shmem_notify_change,
+- .setxattr = generic_setxattr,
+- .getxattr = generic_getxattr,
+- .listxattr = generic_listxattr,
+- .removexattr = generic_removexattr,
+- .permission = shmem_permission,
+-#endif
+-};
+-
+-static const struct super_operations shmem_ops = {
+- .alloc_inode = shmem_alloc_inode,
+- .destroy_inode = shmem_destroy_inode,
+-#ifdef CONFIG_TMPFS
+- .statfs = shmem_statfs,
+- .remount_fs = shmem_remount_fs,
+-#endif
+- .delete_inode = shmem_delete_inode,
+- .drop_inode = generic_delete_inode,
+- .put_super = shmem_put_super,
+-};
+-
+-static struct vm_operations_struct shmem_vm_ops = {
+- .nopage = shmem_nopage,
+- .populate = shmem_populate,
+-#ifdef CONFIG_NUMA
+- .set_policy = shmem_set_policy,
+- .get_policy = shmem_get_policy,
+-#endif
+-};
+-
+-
+-static int shmem_get_sb(struct file_system_type *fs_type,
+- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+-{
+- return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
+-}
+-
+-static struct file_system_type tmpfs_fs_type = {
+- .owner = THIS_MODULE,
+- .name = "tmpfs",
+- .get_sb = shmem_get_sb,
+- .kill_sb = kill_litter_super,
+-};
+-static struct vfsmount *shm_mnt;
+-
+-static int __init init_tmpfs(void)
+-{
+- int error;
+-
+- error = init_inodecache();
+- if (error)
+- goto out3;
+-
+- error = register_filesystem(&tmpfs_fs_type);
+- if (error) {
+- printk(KERN_ERR "Could not register tmpfs\n");
+- goto out2;
+- }
+-
+- shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
+- tmpfs_fs_type.name, NULL);
+- if (IS_ERR(shm_mnt)) {
+- error = PTR_ERR(shm_mnt);
+- printk(KERN_ERR "Could not kern_mount tmpfs\n");
+- goto out1;
+- }
+- return 0;
+-
+-out1:
+- unregister_filesystem(&tmpfs_fs_type);
+-out2:
+- destroy_inodecache();
+-out3:
+- shm_mnt = ERR_PTR(error);
+- return error;
+-}
+-module_init(init_tmpfs)
+-
+-/*
+- * shmem_file_setup - get an unlinked file living in tmpfs
+- *
+- * @name: name for dentry (to be seen in /proc/<pid>/maps
+- * @size: size to be set for the file
+- *
+- */
+-struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
+-{
+- int error;
+- struct file *file;
+- struct inode *inode;
+- struct dentry *dentry, *root;
+- struct qstr this;
+-
+- if (IS_ERR(shm_mnt))
+- return (void *)shm_mnt;
+-
+- if (size < 0 || size > SHMEM_MAX_BYTES)
+- return ERR_PTR(-EINVAL);
+-
+- if (shmem_acct_size(flags, size))
+- return ERR_PTR(-ENOMEM);
+-
+- error = -ENOMEM;
+- this.name = name;
+- this.len = strlen(name);
+- this.hash = 0; /* will go */
+- root = shm_mnt->mnt_root;
+- dentry = d_alloc(root, &this);
+- if (!dentry)
+- goto put_memory;
+-
+- error = -ENFILE;
+- file = get_empty_filp();
+- if (!file)
+- goto put_dentry;
+-
+- error = -ENOSPC;
+- inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
+- if (!inode)
+- goto close_file;
+-
+- SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
+- d_instantiate(dentry, inode);
+- inode->i_size = size;
+- inode->i_nlink = 0; /* It is unlinked */
+- file->f_path.mnt = mntget(shm_mnt);
+- file->f_path.dentry = dentry;
+- file->f_mapping = inode->i_mapping;
+- file->f_op = &shmem_file_operations;
+- file->f_mode = FMODE_WRITE | FMODE_READ;
+- return file;
+-
+-close_file:
+- put_filp(file);
+-put_dentry:
+- dput(dentry);
+-put_memory:
+- shmem_unacct_size(flags, size);
+- return ERR_PTR(error);
+-}
+-
+-/*
+- * shmem_zero_setup - setup a shared anonymous mapping
+- *
+- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
+- */
+-int shmem_zero_setup(struct vm_area_struct *vma)
+-{
+- struct file *file;
+- loff_t size = vma->vm_end - vma->vm_start;
+-
+- file = shmem_file_setup("dev/zero", size, vma->vm_flags);
+- if (IS_ERR(file))
+- return PTR_ERR(file);
+-
+- if (vma->vm_file)
+- fput(vma->vm_file);
+- vma->vm_file = file;
+- vma->vm_ops = &shmem_vm_ops;
+- return 0;
+-}
+diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-591/mm/slab.c
+--- linux-2.6.22-570/mm/slab.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/mm/slab.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1639,6 +1639,8 @@
+ #endif
+
+ flags |= cachep->gfpflags;
++ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
++ flags |= __GFP_RECLAIMABLE;
+
+ page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+ if (!page)
+diff -Nurb linux-2.6.22-570/mm/slub.c linux-2.6.22-591/mm/slub.c
+--- linux-2.6.22-570/mm/slub.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/mm/slub.c 2007-12-21 15:36:12.000000000 -0500
+@@ -985,6 +985,9 @@
+ if (s->flags & SLAB_CACHE_DMA)
+ flags |= SLUB_DMA;
+
++ if (s->flags & SLAB_RECLAIM_ACCOUNT)
++ flags |= __GFP_RECLAIMABLE;
++
+ if (node == -1)
+ page = alloc_pages(flags, s->order);
+ else
+@@ -1989,6 +1992,7 @@
+ #ifdef CONFIG_NUMA
+ s->defrag_ratio = 100;
+ #endif
++ raise_kswapd_order(s->order);
+
+ if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
+ return 1;
+diff -Nurb linux-2.6.22-570/mm/swap_state.c linux-2.6.22-591/mm/swap_state.c
+--- linux-2.6.22-570/mm/swap_state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/swap_state.c 2007-12-21 15:36:12.000000000 -0500
+@@ -334,7 +334,8 @@
+ * Get a new page to read into from swap.
+ */
+ if (!new_page) {
+- new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
++ vma, addr);
+ if (!new_page)
+ break; /* Out of memory */
+ }
+diff -Nurb linux-2.6.22-570/mm/truncate.c linux-2.6.22-591/mm/truncate.c
+--- linux-2.6.22-570/mm/truncate.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/truncate.c 2007-12-21 15:36:12.000000000 -0500
+@@ -82,7 +82,7 @@
+ /*
+ * If truncate cannot remove the fs-private metadata from the page, the page
+ * becomes anonymous. It will be left on the LRU and may even be mapped into
+- * user pagetables if we're racing with filemap_nopage().
++ * user pagetables if we're racing with filemap_fault().
+ *
+ * We need to bale out if page->mapping is no longer equal to the original
+ * mapping. This happens a) when the VM reclaimed the page while we waited on
+@@ -192,6 +192,11 @@
+ unlock_page(page);
+ continue;
+ }
++ if (page_mapped(page)) {
++ unmap_mapping_range(mapping,
++ (loff_t)page_index<<PAGE_CACHE_SHIFT,
++ PAGE_CACHE_SIZE, 0);
++ }
+ truncate_complete_page(mapping, page);
+ unlock_page(page);
+ }
+@@ -229,6 +234,11 @@
+ break;
+ lock_page(page);
+ wait_on_page_writeback(page);
++ if (page_mapped(page)) {
++ unmap_mapping_range(mapping,
++ (loff_t)page->index<<PAGE_CACHE_SHIFT,
++ PAGE_CACHE_SIZE, 0);
++ }
+ if (page->index > next)
+ next = page->index;
+ next++;
+@@ -397,7 +407,7 @@
+ break;
+ }
+ wait_on_page_writeback(page);
+- while (page_mapped(page)) {
++ if (page_mapped(page)) {
+ if (!did_range_unmap) {
+ /*
+ * Zap the rest of the file in one hit.
+@@ -417,6 +427,7 @@
+ PAGE_CACHE_SIZE, 0);
+ }
+ }
++ BUG_ON(page_mapped(page));
+ ret = do_launder_page(mapping, page);
+ if (ret == 0 && !invalidate_complete_page2(mapping, page))
+ ret = -EIO;
+diff -Nurb linux-2.6.22-570/mm/util.c linux-2.6.22-591/mm/util.c
+--- linux-2.6.22-570/mm/util.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/util.c 2007-12-21 15:36:12.000000000 -0500
+@@ -18,9 +18,8 @@
+ }
+ EXPORT_SYMBOL(__kzalloc);
+
+-/*
++/**
+ * kstrdup - allocate space for and copy an existing string
+- *
+ * @s: the string to duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ */
+@@ -41,6 +40,32 @@
+ EXPORT_SYMBOL(kstrdup);
+
+ /**
++ * kstrndup - allocate space for and copy an existing string
++ * @s: the string to duplicate
++ * @max: read at most @max chars from @s
++ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
++ */
++char *kstrndup(const char *s, size_t max, gfp_t gfp)
++{
++ size_t len;
++ char *buf;
++
++ if (!s)
++ return NULL;
++
++ len = strlen(s);
++ if (len > max)
++ len = max;
++ buf = kmalloc_track_caller(len+1, gfp);
++ if (buf) {
++ memcpy(buf, s, len);
++ buf[len] = '\0';
++ }
++ return buf;
++}
++EXPORT_SYMBOL(kstrndup);
++
++/**
+ * kmemdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+@@ -60,7 +85,6 @@
+
+ /*
+ * strndup_user - duplicate an existing string from user space
+- *
+ * @s: The string to duplicate
+ * @n: Maximum number of bytes to copy, including the trailing NUL.
+ */
+diff -Nurb linux-2.6.22-570/mm/vmalloc.c linux-2.6.22-591/mm/vmalloc.c
+--- linux-2.6.22-570/mm/vmalloc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/vmalloc.c 2007-12-21 15:36:12.000000000 -0500
+@@ -68,12 +68,12 @@
+ } while (pud++, addr = next, addr != end);
+ }
+
+-void unmap_vm_area(struct vm_struct *area)
++void unmap_kernel_range(unsigned long addr, unsigned long size)
+ {
+ pgd_t *pgd;
+ unsigned long next;
+- unsigned long addr = (unsigned long) area->addr;
+- unsigned long end = addr + area->size;
++ unsigned long start = addr;
++ unsigned long end = addr + size;
+
+ BUG_ON(addr >= end);
+ pgd = pgd_offset_k(addr);
+@@ -84,7 +84,12 @@
+ continue;
+ vunmap_pud_range(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+- flush_tlb_kernel_range((unsigned long) area->addr, end);
++ flush_tlb_kernel_range(start, end);
++}
++
++static void unmap_vm_area(struct vm_struct *area)
++{
++ unmap_kernel_range((unsigned long)area->addr, area->size);
+ }
+
+ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+diff -Nurb linux-2.6.22-570/mm/vmscan.c linux-2.6.22-591/mm/vmscan.c
+--- linux-2.6.22-570/mm/vmscan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/mm/vmscan.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1272,6 +1272,34 @@
+ return nr_reclaimed;
+ }
+
++static unsigned int kswapd_min_order __read_mostly;
++
++static inline int kswapd_order(unsigned int order)
++{
++ return max(kswapd_min_order, order);
++}
++
++/**
++ * raise_kswapd_order - Raise the minimum order that kswapd reclaims
++ * @order: The minimum order kswapd should reclaim at
++ *
++ * kswapd normally reclaims at order 0 unless there is a higher-order
++ * allocation being serviced. This function is used to set the minimum
++ * order that kswapd reclaims at when it is known there will be regular
++ * high-order allocations at a given order.
++ */
++void raise_kswapd_order(unsigned int order)
++{
++ if (order >= MAX_ORDER)
++ return;
++
++ /* Update order if necessary and inform if changed */
++ if (order > kswapd_min_order) {
++ kswapd_min_order = order;
++ printk(KERN_INFO "kswapd reclaim order set to %d\n", order);
++ }
++}
++
+ /*
+ * The background pageout daemon, started as a kernel thread
+ * from the init process.
+@@ -1314,13 +1342,14 @@
+ * trying to free the first piece of memory in the first place).
+ */
+ tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
++ set_freezable();
+
+- order = 0;
++ order = kswapd_order(0);
+ for ( ; ; ) {
+ unsigned long new_order;
+
+ prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+- new_order = pgdat->kswapd_max_order;
++ new_order = kswapd_order(pgdat->kswapd_max_order);
+ pgdat->kswapd_max_order = 0;
+ if (order < new_order) {
+ /*
+@@ -1332,7 +1361,7 @@
+ if (!freezing(current))
+ schedule();
+
+- order = pgdat->kswapd_max_order;
++ order = kswapd_order(pgdat->kswapd_max_order);
+ }
+ finish_wait(&pgdat->kswapd_wait, &wait);
+
+diff -Nurb linux-2.6.22-570/net/802/tr.c linux-2.6.22-591/net/802/tr.c
+--- linux-2.6.22-570/net/802/tr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/802/tr.c 2007-12-21 15:36:15.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/init.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
+ static void rif_check_expire(unsigned long dummy);
+@@ -532,7 +533,7 @@
+ seq_puts(seq,
+ "if TR address TTL rcf routing segments\n");
+ else {
+- struct net_device *dev = dev_get_by_index(entry->iface);
++ struct net_device *dev = dev_get_by_index(&init_net, entry->iface);
+ long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
+ - (long) jiffies;
+
+@@ -639,7 +640,7 @@
+ rif_timer.function = rif_check_expire;
+ add_timer(&rif_timer);
+
+- proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops);
++ proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/net/8021q/Makefile linux-2.6.22-591/net/8021q/Makefile
+--- linux-2.6.22-570/net/8021q/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/8021q/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -4,7 +4,7 @@
+
+ obj-$(CONFIG_VLAN_8021Q) += 8021q.o
+
+-8021q-objs := vlan.o vlan_dev.o
++8021q-objs := vlan.o vlan_dev.o vlan_netlink.o
+
+ ifeq ($(CONFIG_PROC_FS),y)
+ 8021q-objs += vlanproc.o
+diff -Nurb linux-2.6.22-570/net/8021q/vlan.c linux-2.6.22-591/net/8021q/vlan.c
+--- linux-2.6.22-570/net/8021q/vlan.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/8021q/vlan.c 2007-12-21 15:36:15.000000000 -0500
+@@ -31,6 +31,7 @@
+ #include <net/arp.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/notifier.h>
++#include <net/net_namespace.h>
+
+ #include <linux/if_vlan.h>
+ #include "vlan.h"
+@@ -50,7 +51,7 @@
+ static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
+
+ static int vlan_device_event(struct notifier_block *, unsigned long, void *);
+-static int vlan_ioctl_handler(void __user *);
++static int vlan_ioctl_handler(struct net *net, void __user *);
+ static int unregister_vlan_dev(struct net_device *, unsigned short );
+
+ static struct notifier_block vlan_notifier_block = {
+@@ -97,15 +98,22 @@
+
+ /* Register us to receive netdevice events */
+ err = register_netdevice_notifier(&vlan_notifier_block);
+- if (err < 0) {
+- dev_remove_pack(&vlan_packet_type);
+- vlan_proc_cleanup();
+- return err;
+- }
++ if (err < 0)
++ goto err1;
+
+- vlan_ioctl_set(vlan_ioctl_handler);
++ err = vlan_netlink_init();
++ if (err < 0)
++ goto err2;
+
++ vlan_ioctl_set(vlan_ioctl_handler);
+ return 0;
++
++err2:
++ unregister_netdevice_notifier(&vlan_notifier_block);
++err1:
++ vlan_proc_cleanup();
++ dev_remove_pack(&vlan_packet_type);
++ return err;
+ }
+
+ /* Cleanup all vlan devices
+@@ -117,7 +125,7 @@
+ struct net_device *dev, *nxt;
+
+ rtnl_lock();
+- for_each_netdev_safe(dev, nxt) {
++ for_each_netdev_safe(&init_net, dev, nxt) {
+ if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+ VLAN_DEV_INFO(dev)->vlan_id);
+@@ -136,6 +144,7 @@
+ {
+ int i;
+
++ vlan_netlink_fini();
+ vlan_ioctl_set(NULL);
+
+ /* Un-register us from receiving netdevice events */
+@@ -197,6 +206,34 @@
+ kfree(grp);
+ }
+
++static struct vlan_group *vlan_group_alloc(int ifindex)
++{
++ struct vlan_group *grp;
++ unsigned int size;
++ unsigned int i;
++
++ grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
++ if (!grp)
++ return NULL;
++
++ size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
++
++ for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
++ grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL);
++ if (!grp->vlan_devices_arrays[i])
++ goto err;
++ }
++
++ grp->real_dev_ifindex = ifindex;
++ hlist_add_head_rcu(&grp->hlist,
++ &vlan_group_hash[vlan_grp_hashfn(ifindex)]);
++ return grp;
++
++err:
++ vlan_group_free(grp);
++ return NULL;
++}
++
+ static void vlan_rcu_free(struct rcu_head *rcu)
+ {
+ vlan_group_free(container_of(rcu, struct vlan_group, rcu));
+@@ -278,47 +315,62 @@
+ return ret;
+ }
+
+-static int unregister_vlan_device(const char *vlan_IF_name)
++int unregister_vlan_device(struct net_device *dev)
+ {
+- struct net_device *dev = NULL;
+ int ret;
+
+-
+- dev = dev_get_by_name(vlan_IF_name);
+- ret = -EINVAL;
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+- rtnl_lock();
+-
+ ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+ VLAN_DEV_INFO(dev)->vlan_id);
+-
+- dev_put(dev);
+ unregister_netdevice(dev);
+
+- rtnl_unlock();
+-
+ if (ret == 1)
+ ret = 0;
++ return ret;
++}
++
++/*
++ * vlan network devices have devices nesting below it, and are a special
++ * "super class" of normal network devices; split their locks off into a
++ * separate class since they always nest.
++ */
++static struct lock_class_key vlan_netdev_xmit_lock_key;
++
++static int vlan_dev_init(struct net_device *dev)
++{
++ struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
++
++ /* IFF_BROADCAST|IFF_MULTICAST; ??? */
++ dev->flags = real_dev->flags & ~IFF_UP;
++ dev->iflink = real_dev->ifindex;
++ dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
++ (1<<__LINK_STATE_DORMANT))) |
++ (1<<__LINK_STATE_PRESENT);
++
++ /* TODO: maybe just assign it to be ETHERNET? */
++ dev->type = real_dev->type;
++
++ memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len);
++ memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
++ dev->addr_len = real_dev->addr_len;
++
++ if (real_dev->features & NETIF_F_HW_VLAN_TX) {
++ dev->hard_header = real_dev->hard_header;
++ dev->hard_header_len = real_dev->hard_header_len;
++ dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
++ dev->rebuild_header = real_dev->rebuild_header;
+ } else {
+- printk(VLAN_ERR
+- "%s: ERROR: Tried to remove a non-vlan device "
+- "with VLAN code, name: %s priv_flags: %hX\n",
+- __FUNCTION__, dev->name, dev->priv_flags);
+- dev_put(dev);
+- ret = -EPERM;
+- }
+- } else {
+-#ifdef VLAN_DEBUG
+- printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__);
+-#endif
+- ret = -EINVAL;
++ dev->hard_header = vlan_dev_hard_header;
++ dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
++ dev->hard_start_xmit = vlan_dev_hard_start_xmit;
++ dev->rebuild_header = vlan_dev_rebuild_header;
+ }
++ dev->hard_header_parse = real_dev->hard_header_parse;
+
+- return ret;
++ lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
++ return 0;
+ }
+
+-static void vlan_setup(struct net_device *new_dev)
++void vlan_setup(struct net_device *new_dev)
+ {
+ SET_MODULE_OWNER(new_dev);
+
+@@ -338,6 +390,7 @@
+
+ /* set up method calls */
+ new_dev->change_mtu = vlan_dev_change_mtu;
++ new_dev->init = vlan_dev_init;
+ new_dev->open = vlan_dev_open;
+ new_dev->stop = vlan_dev_stop;
+ new_dev->set_mac_address = vlan_dev_set_mac_address;
+@@ -366,77 +419,110 @@
+ }
+ }
+
+-/*
+- * vlan network devices have devices nesting below it, and are a special
+- * "super class" of normal network devices; split their locks off into a
+- * separate class since they always nest.
+- */
+-static struct lock_class_key vlan_netdev_xmit_lock_key;
+-
+-
+-/* Attach a VLAN device to a mac address (ie Ethernet Card).
+- * Returns the device that was created, or NULL if there was
+- * an error of some kind.
+- */
+-static struct net_device *register_vlan_device(const char *eth_IF_name,
+- unsigned short VLAN_ID)
++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
+ {
+- struct vlan_group *grp;
+- struct net_device *new_dev;
+- struct net_device *real_dev; /* the ethernet device */
+- char name[IFNAMSIZ];
+- int i;
+-
+-#ifdef VLAN_DEBUG
+- printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n",
+- __FUNCTION__, eth_IF_name, VLAN_ID);
+-#endif
+-
+- if (VLAN_ID >= VLAN_VID_MASK)
+- goto out_ret_null;
+-
+- /* find the device relating to eth_IF_name. */
+- real_dev = dev_get_by_name(eth_IF_name);
+- if (!real_dev)
+- goto out_ret_null;
+-
+ if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
+ printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
+ __FUNCTION__, real_dev->name);
+- goto out_put_dev;
++ return -EOPNOTSUPP;
+ }
+
+ if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
+ !real_dev->vlan_rx_register) {
+ printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+ __FUNCTION__, real_dev->name);
+- goto out_put_dev;
++ return -EOPNOTSUPP;
+ }
+
+ if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
+ (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
+ printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+ __FUNCTION__, real_dev->name);
+- goto out_put_dev;
++ return -EOPNOTSUPP;
+ }
+
+- /* From this point on, all the data structures must remain
+- * consistent.
+- */
+- rtnl_lock();
+-
+ /* The real device must be up and operating in order to
+ * assosciate a VLAN device with it.
+ */
+ if (!(real_dev->flags & IFF_UP))
+- goto out_unlock;
++ return -ENETDOWN;
+
+- if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) {
++ if (__find_vlan_dev(real_dev, vlan_id) != NULL) {
+ /* was already registered. */
+ printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
+- goto out_unlock;
++ return -EEXIST;
+ }
+
++ return 0;
++}
++
++int register_vlan_dev(struct net_device *dev)
++{
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++ struct net_device *real_dev = vlan->real_dev;
++ unsigned short vlan_id = vlan->vlan_id;
++ struct vlan_group *grp, *ngrp = NULL;
++ int err;
++
++ grp = __vlan_find_group(real_dev->ifindex);
++ if (!grp) {
++ ngrp = grp = vlan_group_alloc(real_dev->ifindex);
++ if (!grp)
++ return -ENOBUFS;
++ }
++
++ err = register_netdevice(dev);
++ if (err < 0)
++ goto out_free_group;
++
++ /* Account for reference in struct vlan_dev_info */
++ dev_hold(real_dev);
++
++ vlan_transfer_operstate(real_dev, dev);
++ linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
++
++ /* So, got the sucker initialized, now lets place
++ * it into our local structure.
++ */
++ vlan_group_set_device(grp, vlan_id, dev);
++ if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
++ real_dev->vlan_rx_register(real_dev, ngrp);
++ if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
++ real_dev->vlan_rx_add_vid(real_dev, vlan_id);
++
++ if (vlan_proc_add_dev(dev) < 0)
++ printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
++ dev->name);
++ return 0;
++
++out_free_group:
++ if (ngrp)
++ vlan_group_free(ngrp);
++ return err;
++}
++
++/* Attach a VLAN device to a mac address (ie Ethernet Card).
++ * Returns 0 if the device was created or a negative error code otherwise.
++ */
++static int register_vlan_device(struct net_device *real_dev,
++ unsigned short VLAN_ID)
++{
++ struct net_device *new_dev;
++ char name[IFNAMSIZ];
++ int err;
++
++#ifdef VLAN_DEBUG
++ printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n",
++ __FUNCTION__, eth_IF_name, VLAN_ID);
++#endif
++
++ if (VLAN_ID >= VLAN_VID_MASK)
++ return -ERANGE;
++
++ err = vlan_check_real_dev(real_dev, VLAN_ID);
++ if (err < 0)
++ return err;
++
+ /* Gotta set up the fields for the device. */
+ #ifdef VLAN_DEBUG
+ printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
+@@ -471,138 +557,40 @@
+ vlan_setup);
+
+ if (new_dev == NULL)
+- goto out_unlock;
+-
+-#ifdef VLAN_DEBUG
+- printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
+-#endif
+- /* IFF_BROADCAST|IFF_MULTICAST; ??? */
+- new_dev->flags = real_dev->flags;
+- new_dev->flags &= ~IFF_UP;
+-
+- new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+- (1<<__LINK_STATE_DORMANT))) |
+- (1<<__LINK_STATE_PRESENT);
++ return -ENOBUFS;
+
+ /* need 4 bytes for extra VLAN header info,
+ * hope the underlying device can handle it.
+ */
+ new_dev->mtu = real_dev->mtu;
+
+- /* TODO: maybe just assign it to be ETHERNET? */
+- new_dev->type = real_dev->type;
+-
+- new_dev->hard_header_len = real_dev->hard_header_len;
+- if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) {
+- /* Regular ethernet + 4 bytes (18 total). */
+- new_dev->hard_header_len += VLAN_HLEN;
+- }
+-
++#ifdef VLAN_DEBUG
++ printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
+ VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n",
+ new_dev->priv,
+ sizeof(struct vlan_dev_info));
+-
+- memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len);
+- memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
+- new_dev->addr_len = real_dev->addr_len;
+-
+- if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+- new_dev->hard_header = real_dev->hard_header;
+- new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
+- new_dev->rebuild_header = real_dev->rebuild_header;
+- } else {
+- new_dev->hard_header = vlan_dev_hard_header;
+- new_dev->hard_start_xmit = vlan_dev_hard_start_xmit;
+- new_dev->rebuild_header = vlan_dev_rebuild_header;
+- }
+- new_dev->hard_header_parse = real_dev->hard_header_parse;
++#endif
+
+ VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+ VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
+ VLAN_DEV_INFO(new_dev)->dent = NULL;
+- VLAN_DEV_INFO(new_dev)->flags = 1;
++ VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+
+-#ifdef VLAN_DEBUG
+- printk(VLAN_DBG "About to go find the group for idx: %i\n",
+- real_dev->ifindex);
+-#endif
+-
+- if (register_netdevice(new_dev))
++ new_dev->rtnl_link_ops = &vlan_link_ops;
++ err = register_vlan_dev(new_dev);
++ if (err < 0)
+ goto out_free_newdev;
+
+- lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+-
+- new_dev->iflink = real_dev->ifindex;
+- vlan_transfer_operstate(real_dev, new_dev);
+- linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
+-
+- /* So, got the sucker initialized, now lets place
+- * it into our local structure.
+- */
+- grp = __vlan_find_group(real_dev->ifindex);
+-
+- /* Note, we are running under the RTNL semaphore
+- * so it cannot "appear" on us.
+- */
+- if (!grp) { /* need to add a new group */
+- grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
+- if (!grp)
+- goto out_free_unregister;
+-
+- for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
+- grp->vlan_devices_arrays[i] = kzalloc(
+- sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN,
+- GFP_KERNEL);
+-
+- if (!grp->vlan_devices_arrays[i])
+- goto out_free_arrays;
+- }
+-
+- /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */
+- grp->real_dev_ifindex = real_dev->ifindex;
+-
+- hlist_add_head_rcu(&grp->hlist,
+- &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
+-
+- if (real_dev->features & NETIF_F_HW_VLAN_RX)
+- real_dev->vlan_rx_register(real_dev, grp);
+- }
+-
+- vlan_group_set_device(grp, VLAN_ID, new_dev);
+-
+- if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */
+- printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
+- new_dev->name);
+-
+- if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+- real_dev->vlan_rx_add_vid(real_dev, VLAN_ID);
+-
+- rtnl_unlock();
+-
+-
++ /* Account for reference in struct vlan_dev_info */
++ dev_hold(real_dev);
+ #ifdef VLAN_DEBUG
+ printk(VLAN_DBG "Allocated new device successfully, returning.\n");
+ #endif
+- return new_dev;
+-
+-out_free_arrays:
+- vlan_group_free(grp);
+-
+-out_free_unregister:
+- unregister_netdev(new_dev);
+- goto out_unlock;
++ return 0;
+
+ out_free_newdev:
+ free_netdev(new_dev);
+-
+-out_unlock:
+- rtnl_unlock();
+-
+-out_put_dev:
+- dev_put(real_dev);
+-
+-out_ret_null:
+- return NULL;
++ return err;
+ }
+
+ static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+@@ -612,6 +600,9 @@
+ int i, flgs;
+ struct net_device *vlandev;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (!grp)
+ goto out;
+
+@@ -691,11 +682,12 @@
+ * o execute requested action or pass command to the device driver
+ * arg is really a struct vlan_ioctl_args __user *.
+ */
+-static int vlan_ioctl_handler(void __user *arg)
++static int vlan_ioctl_handler(struct net *net, void __user *arg)
+ {
+- int err = 0;
++ int err;
+ unsigned short vid = 0;
+ struct vlan_ioctl_args args;
++ struct net_device *dev = NULL;
+
+ if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args)))
+ return -EFAULT;
+@@ -708,35 +700,61 @@
+ printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
+ #endif
+
++ rtnl_lock();
++
+ switch (args.cmd) {
+ case SET_VLAN_INGRESS_PRIORITY_CMD:
++ case SET_VLAN_EGRESS_PRIORITY_CMD:
++ case SET_VLAN_FLAG_CMD:
++ case ADD_VLAN_CMD:
++ case DEL_VLAN_CMD:
++ case GET_VLAN_REALDEV_NAME_CMD:
++ case GET_VLAN_VID_CMD:
++ err = -ENODEV;
++ dev = __dev_get_by_name(&init_net, args.device1);
++ if (!dev)
++ goto out;
++
++ err = -EINVAL;
++ if (args.cmd != ADD_VLAN_CMD &&
++ !(dev->priv_flags & IFF_802_1Q_VLAN))
++ goto out;
++ }
++
++ switch (args.cmd) {
++ case SET_VLAN_INGRESS_PRIORITY_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- err = vlan_dev_set_ingress_priority(args.device1,
++ break;
++ vlan_dev_set_ingress_priority(dev,
+ args.u.skb_priority,
+ args.vlan_qos);
+ break;
+
+ case SET_VLAN_EGRESS_PRIORITY_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- err = vlan_dev_set_egress_priority(args.device1,
++ break;
++ err = vlan_dev_set_egress_priority(dev,
+ args.u.skb_priority,
+ args.vlan_qos);
+ break;
+
+ case SET_VLAN_FLAG_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- err = vlan_dev_set_vlan_flag(args.device1,
++ break;
++ err = vlan_dev_set_vlan_flag(dev,
+ args.u.flag,
+ args.vlan_qos);
+ break;
+
+ case SET_VLAN_NAME_TYPE_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+- if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
++ if ((args.u.name_type >= 0) &&
++ (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
+ vlan_name_type = args.u.name_type;
+ err = 0;
+ } else {
+@@ -745,26 +763,17 @@
+ break;
+
+ case ADD_VLAN_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- /* we have been given the name of the Ethernet Device we want to
+- * talk to: args.dev1 We also have the
+- * VLAN ID: args.u.VID
+- */
+- if (register_vlan_device(args.device1, args.u.VID)) {
+- err = 0;
+- } else {
+- err = -EINVAL;
+- }
++ break;
++ err = register_vlan_device(dev, args.u.VID);
+ break;
+
+ case DEL_VLAN_CMD:
++ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+- /* Here, the args.dev1 is the actual VLAN we want
+- * to get rid of.
+- */
+- err = unregister_vlan_device(args.device1);
++ break;
++ err = unregister_vlan_device(dev);
+ break;
+
+ case GET_VLAN_INGRESS_PRIORITY_CMD:
+@@ -788,9 +797,7 @@
+ err = -EINVAL;
+ break;
+ case GET_VLAN_REALDEV_NAME_CMD:
+- err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+- if (err)
+- goto out;
++ vlan_dev_get_realdev_name(dev, args.u.device2);
+ if (copy_to_user(arg, &args,
+ sizeof(struct vlan_ioctl_args))) {
+ err = -EFAULT;
+@@ -798,9 +805,7 @@
+ break;
+
+ case GET_VLAN_VID_CMD:
+- err = vlan_dev_get_vid(args.device1, &vid);
+- if (err)
+- goto out;
++ vlan_dev_get_vid(dev, &vid);
+ args.u.VID = vid;
+ if (copy_to_user(arg, &args,
+ sizeof(struct vlan_ioctl_args))) {
+@@ -812,9 +817,11 @@
+ /* pass on to underlying device instead?? */
+ printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
+ __FUNCTION__, args.cmd);
+- return -EINVAL;
++ err = -EINVAL;
++ break;
+ }
+ out:
++ rtnl_unlock();
+ return err;
+ }
+
+diff -Nurb linux-2.6.22-570/net/8021q/vlan.h linux-2.6.22-591/net/8021q/vlan.h
+--- linux-2.6.22-570/net/8021q/vlan.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/8021q/vlan.h 2007-12-21 15:36:12.000000000 -0500
+@@ -62,11 +62,24 @@
+ int vlan_dev_open(struct net_device* dev);
+ int vlan_dev_stop(struct net_device* dev);
+ int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
+-int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+-int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+-int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val);
+-int vlan_dev_get_realdev_name(const char* dev_name, char* result);
+-int vlan_dev_get_vid(const char* dev_name, unsigned short* result);
++void vlan_dev_set_ingress_priority(const struct net_device *dev,
++ u32 skb_prio, short vlan_prio);
++int vlan_dev_set_egress_priority(const struct net_device *dev,
++ u32 skb_prio, short vlan_prio);
++int vlan_dev_set_vlan_flag(const struct net_device *dev,
++ u32 flag, short flag_val);
++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
+ void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
+
++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
++void vlan_setup(struct net_device *dev);
++int register_vlan_dev(struct net_device *dev);
++int unregister_vlan_device(struct net_device *dev);
++
++int vlan_netlink_init(void);
++void vlan_netlink_fini(void);
++
++extern struct rtnl_link_ops vlan_link_ops;
++
+ #endif /* !(__BEN_VLAN_802_1Q_INC__) */
+diff -Nurb linux-2.6.22-570/net/8021q/vlan_dev.c linux-2.6.22-591/net/8021q/vlan_dev.c
+--- linux-2.6.22-570/net/8021q/vlan_dev.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/8021q/vlan_dev.c 2007-12-21 15:36:15.000000000 -0500
+@@ -73,7 +73,7 @@
+
+ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+ {
+- if (VLAN_DEV_INFO(skb->dev)->flags & 1) {
++ if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ if (skb_shared(skb) || skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ kfree_skb(skb);
+@@ -132,6 +132,11 @@
+
+ vhdr = (struct vlan_hdr *)(skb->data);
+
++ if (dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
+ vlan_TCI = ntohs(vhdr->h_vlan_TCI);
+
+@@ -360,7 +365,8 @@
+ * header shuffling in the hard_start_xmit. Users can turn off this
+ * REORDER behaviour with the vconfig tool.
+ */
+- build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0);
++ if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR))
++ build_vlan_header = 1;
+
+ if (build_vlan_header) {
+ vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
+@@ -544,136 +550,83 @@
+ return 0;
+ }
+
+-int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
++void vlan_dev_set_ingress_priority(const struct net_device *dev,
++ u32 skb_prio, short vlan_prio)
+ {
+- struct net_device *dev = dev_get_by_name(dev_name);
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+- /* see if a priority mapping exists.. */
+- VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+- dev_put(dev);
+- return 0;
+- }
++ if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
++ vlan->nr_ingress_mappings--;
++ else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio)
++ vlan->nr_ingress_mappings++;
+
+- dev_put(dev);
+- }
+- return -EINVAL;
++ vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+ }
+
+-int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
++int vlan_dev_set_egress_priority(const struct net_device *dev,
++ u32 skb_prio, short vlan_prio)
+ {
+- struct net_device *dev = dev_get_by_name(dev_name);
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+ struct vlan_priority_tci_mapping *mp = NULL;
+ struct vlan_priority_tci_mapping *np;
++ u32 vlan_qos = (vlan_prio << 13) & 0xE000;
+
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ /* See if a priority mapping exists.. */
+- mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
++ mp = vlan->egress_priority_map[skb_prio & 0xF];
+ while (mp) {
+ if (mp->priority == skb_prio) {
+- mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
+- dev_put(dev);
++ if (mp->vlan_qos && !vlan_qos)
++ vlan->nr_egress_mappings--;
++ else if (!mp->vlan_qos && vlan_qos)
++ vlan->nr_egress_mappings++;
++ mp->vlan_qos = vlan_qos;
+ return 0;
+ }
+ mp = mp->next;
+ }
+
+ /* Create a new mapping then. */
+- mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
++ mp = vlan->egress_priority_map[skb_prio & 0xF];
+ np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
+- if (np) {
++ if (!np)
++ return -ENOBUFS;
++
+ np->next = mp;
+ np->priority = skb_prio;
+- np->vlan_qos = ((vlan_prio << 13) & 0xE000);
+- VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
+- dev_put(dev);
++ np->vlan_qos = vlan_qos;
++ vlan->egress_priority_map[skb_prio & 0xF] = np;
++ if (vlan_qos)
++ vlan->nr_egress_mappings++;
+ return 0;
+- } else {
+- dev_put(dev);
+- return -ENOBUFS;
+- }
+- }
+- dev_put(dev);
+- }
+- return -EINVAL;
+ }
+
+-/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */
+-int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val)
++/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
++int vlan_dev_set_vlan_flag(const struct net_device *dev,
++ u32 flag, short flag_val)
+ {
+- struct net_device *dev = dev_get_by_name(dev_name);
+-
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ /* verify flag is supported */
+- if (flag == 1) {
++ if (flag == VLAN_FLAG_REORDER_HDR) {
+ if (flag_val) {
+- VLAN_DEV_INFO(dev)->flags |= 1;
++ VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR;
+ } else {
+- VLAN_DEV_INFO(dev)->flags &= ~1;
++ VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
+ }
+- dev_put(dev);
+ return 0;
+- } else {
+- printk(KERN_ERR "%s: flag %i is not valid.\n",
+- __FUNCTION__, (int)(flag));
+- dev_put(dev);
+- return -EINVAL;
+ }
+- } else {
+- printk(KERN_ERR
+- "%s: %s is not a vlan device, priv_flags: %hX.\n",
+- __FUNCTION__, dev->name, dev->priv_flags);
+- dev_put(dev);
+- }
+- } else {
+- printk(KERN_ERR "%s: Could not find device: %s\n",
+- __FUNCTION__, dev_name);
+- }
+-
++ printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
+ return -EINVAL;
+ }
+
+-
+-int vlan_dev_get_realdev_name(const char *dev_name, char* result)
++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
+ {
+- struct net_device *dev = dev_get_by_name(dev_name);
+- int rv = 0;
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
+- rv = 0;
+- } else {
+- rv = -EINVAL;
+- }
+- dev_put(dev);
+- } else {
+- rv = -ENODEV;
+- }
+- return rv;
+ }
+
+-int vlan_dev_get_vid(const char *dev_name, unsigned short* result)
++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
+ {
+- struct net_device *dev = dev_get_by_name(dev_name);
+- int rv = 0;
+- if (dev) {
+- if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ *result = VLAN_DEV_INFO(dev)->vlan_id;
+- rv = 0;
+- } else {
+- rv = -EINVAL;
+- }
+- dev_put(dev);
+- } else {
+- rv = -ENODEV;
+- }
+- return rv;
+ }
+
+-
+ int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p)
+ {
+ struct sockaddr *addr = (struct sockaddr *)(addr_struct_p);
+@@ -828,7 +781,7 @@
+ break;
+
+ case SIOCETHTOOL:
+- err = dev_ethtool(&ifrr);
++ err = dev_ethtool(real_dev->nd_net, &ifrr);
+ }
+
+ if (!err)
+diff -Nurb linux-2.6.22-570/net/8021q/vlan_netlink.c linux-2.6.22-591/net/8021q/vlan_netlink.c
+--- linux-2.6.22-570/net/8021q/vlan_netlink.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/net/8021q/vlan_netlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -0,0 +1,237 @@
++/*
++ * VLAN netlink control interface
++ *
++ * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * version 2 as published by the Free Software Foundation.
++ */
++
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/if_vlan.h>
++#include <net/net_namespace.h>
++#include <net/netlink.h>
++#include <net/rtnetlink.h>
++#include "vlan.h"
++
++
++static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
++ [IFLA_VLAN_ID] = { .type = NLA_U16 },
++ [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
++ [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED },
++ [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
++};
++
++static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
++ [IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) },
++};
++
++
++static inline int vlan_validate_qos_map(struct nlattr *attr)
++{
++ if (!attr)
++ return 0;
++ return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy);
++}
++
++static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
++{
++ struct ifla_vlan_flags *flags;
++ u16 id;
++ int err;
++
++ if (!data)
++ return -EINVAL;
++
++ if (data[IFLA_VLAN_ID]) {
++ id = nla_get_u16(data[IFLA_VLAN_ID]);
++ if (id >= VLAN_VID_MASK)
++ return -ERANGE;
++ }
++ if (data[IFLA_VLAN_FLAGS]) {
++ flags = nla_data(data[IFLA_VLAN_FLAGS]);
++ if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR)
++ return -EINVAL;
++ }
++
++ err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]);
++ if (err < 0)
++ return err;
++ err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]);
++ if (err < 0)
++ return err;
++ return 0;
++}
++
++static int vlan_changelink(struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[])
++{
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++ struct ifla_vlan_flags *flags;
++ struct ifla_vlan_qos_mapping *m;
++ struct nlattr *attr;
++ int rem;
++
++ if (data[IFLA_VLAN_FLAGS]) {
++ flags = nla_data(data[IFLA_VLAN_FLAGS]);
++ vlan->flags = (vlan->flags & ~flags->mask) |
++ (flags->flags & flags->mask);
++ }
++ if (data[IFLA_VLAN_INGRESS_QOS]) {
++ nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
++ m = nla_data(attr);
++ vlan_dev_set_ingress_priority(dev, m->to, m->from);
++ }
++ }
++ if (data[IFLA_VLAN_EGRESS_QOS]) {
++ nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
++ m = nla_data(attr);
++ vlan_dev_set_egress_priority(dev, m->from, m->to);
++ }
++ }
++ return 0;
++}
++
++static int vlan_newlink(struct net_device *dev,
++ struct nlattr *tb[], struct nlattr *data[])
++{
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++ struct net_device *real_dev;
++ int err;
++
++ if (!data[IFLA_VLAN_ID])
++ return -EINVAL;
++
++ if (!tb[IFLA_LINK])
++ return -EINVAL;
++ real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK]));
++ if (!real_dev)
++ return -ENODEV;
++
++ vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
++ vlan->real_dev = real_dev;
++ vlan->flags = VLAN_FLAG_REORDER_HDR;
++
++ err = vlan_check_real_dev(real_dev, vlan->vlan_id);
++ if (err < 0)
++ return err;
++
++ if (!tb[IFLA_MTU])
++ dev->mtu = real_dev->mtu;
++ else if (dev->mtu > real_dev->mtu)
++ return -EINVAL;
++
++ err = vlan_changelink(dev, tb, data);
++ if (err < 0)
++ return err;
++
++ return register_vlan_dev(dev);
++}
++
++static void vlan_dellink(struct net_device *dev)
++{
++ unregister_vlan_device(dev);
++}
++
++static inline size_t vlan_qos_map_size(unsigned int n)
++{
++ if (n == 0)
++ return 0;
++ /* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */
++ return nla_total_size(sizeof(struct nlattr)) +
++ nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n;
++}
++
++static size_t vlan_get_size(const struct net_device *dev)
++{
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++
++ return nla_total_size(2) + /* IFLA_VLAN_ID */
++ vlan_qos_map_size(vlan->nr_ingress_mappings) +
++ vlan_qos_map_size(vlan->nr_egress_mappings);
++}
++
++static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
++{
++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++ struct vlan_priority_tci_mapping *pm;
++ struct ifla_vlan_flags f;
++ struct ifla_vlan_qos_mapping m;
++ struct nlattr *nest;
++ unsigned int i;
++
++ NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id);
++ if (vlan->flags) {
++ f.flags = vlan->flags;
++ f.mask = ~0;
++ NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f);
++ }
++ if (vlan->nr_ingress_mappings) {
++ nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS);
++ if (nest == NULL)
++ goto nla_put_failure;
++
++ for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) {
++ if (!vlan->ingress_priority_map[i])
++ continue;
++
++ m.from = i;
++ m.to = vlan->ingress_priority_map[i];
++ NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
++ sizeof(m), &m);
++ }
++ nla_nest_end(skb, nest);
++ }
++
++ if (vlan->nr_egress_mappings) {
++ nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS);
++ if (nest == NULL)
++ goto nla_put_failure;
++
++ for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
++ for (pm = vlan->egress_priority_map[i]; pm;
++ pm = pm->next) {
++ if (!pm->vlan_qos)
++ continue;
++
++ m.from = pm->priority;
++ m.to = (pm->vlan_qos >> 13) & 0x7;
++ NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
++ sizeof(m), &m);
++ }
++ }
++ nla_nest_end(skb, nest);
++ }
++ return 0;
++
++nla_put_failure:
++ return -EMSGSIZE;
++}
++
++struct rtnl_link_ops vlan_link_ops __read_mostly = {
++ .kind = "vlan",
++ .maxtype = IFLA_VLAN_MAX,
++ .policy = vlan_policy,
++ .priv_size = sizeof(struct vlan_dev_info),
++ .setup = vlan_setup,
++ .validate = vlan_validate,
++ .newlink = vlan_newlink,
++ .changelink = vlan_changelink,
++ .dellink = vlan_dellink,
++ .get_size = vlan_get_size,
++ .fill_info = vlan_fill_info,
++};
++
++int __init vlan_netlink_init(void)
++{
++ return rtnl_link_register(&vlan_link_ops);
++}
++
++void __exit vlan_netlink_fini(void)
++{
++ rtnl_link_unregister(&vlan_link_ops);
++}
++
++MODULE_ALIAS_RTNL_LINK("vlan");
+diff -Nurb linux-2.6.22-570/net/8021q/vlanproc.c linux-2.6.22-591/net/8021q/vlanproc.c
+--- linux-2.6.22-570/net/8021q/vlanproc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/8021q/vlanproc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/fs.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_vlan.h>
++#include <net/net_namespace.h>
+ #include "vlanproc.h"
+ #include "vlan.h"
+
+@@ -143,7 +144,7 @@
+ remove_proc_entry(name_conf, proc_vlan_dir);
+
+ if (proc_vlan_dir)
+- proc_net_remove(name_root);
++ proc_net_remove(&init_net, name_root);
+
+ /* Dynamically added entries should be cleaned up as their vlan_device
+ * is removed, so we should not have to take care of it here...
+@@ -156,7 +157,7 @@
+
+ int __init vlan_proc_init(void)
+ {
+- proc_vlan_dir = proc_mkdir(name_root, proc_net);
++ proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net);
+ if (proc_vlan_dir) {
+ proc_vlan_conf = create_proc_entry(name_conf,
+ S_IFREG|S_IRUSR|S_IWUSR,
+@@ -253,7 +254,7 @@
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+@@ -272,9 +273,9 @@
+
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+- dev = net_device_entry(&dev_base_head);
++ dev = net_device_entry(&init_net.dev_base_head);
+
+- for_each_netdev_continue(dev) {
++ for_each_netdev_continue(&init_net, dev) {
+ if (!is_vlan_dev(dev))
+ continue;
+
+@@ -342,7 +343,7 @@
+ seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+ /* now show all PRIORITY mappings relating to this VLAN */
+ seq_printf(seq,
+- "\nINGRESS priority mappings: 0:%lu 1:%lu 2:%lu 3:%lu 4:%lu 5:%lu 6:%lu 7:%lu\n",
++ "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n",
+ dev_info->ingress_priority_map[0],
+ dev_info->ingress_priority_map[1],
+ dev_info->ingress_priority_map[2],
+@@ -357,7 +358,7 @@
+ const struct vlan_priority_tci_mapping *mp
+ = dev_info->egress_priority_map[i];
+ while (mp) {
+- seq_printf(seq, "%lu:%hu ",
++ seq_printf(seq, "%u:%hu ",
+ mp->priority, ((mp->vlan_qos >> 13) & 0x7));
+ mp = mp->next;
+ }
+diff -Nurb linux-2.6.22-570/net/Kconfig linux-2.6.22-591/net/Kconfig
+--- linux-2.6.22-570/net/Kconfig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/Kconfig 2007-12-21 15:36:15.000000000 -0500
+@@ -27,6 +27,13 @@
+
+ menu "Networking options"
+
++config NET_NS
++ bool "Network namespace support"
++ depends on EXPERIMENTAL
++ help
++ Support what appear to user space as multiple instances of the
++ network stack.
++
+ source "net/packet/Kconfig"
+ source "net/unix/Kconfig"
+ source "net/xfrm/Kconfig"
+diff -Nurb linux-2.6.22-570/net/Makefile linux-2.6.22-591/net/Makefile
+--- linux-2.6.22-570/net/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/Makefile 2007-12-23 03:20:02.000000000 -0500
+@@ -14,7 +14,7 @@
+
+ # LLC has to be linked before the files in net/802/
+ obj-$(CONFIG_LLC) += llc/
+-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
++obj-$(CONFIG_NET) += ethernet/ sched/ netlink/ 802/
+ obj-$(CONFIG_NETFILTER) += netfilter/
+ obj-$(CONFIG_INET) += ipv4/
+ obj-$(CONFIG_XFRM) += xfrm/
+diff -Nurb linux-2.6.22-570/net/Makefile.orig linux-2.6.22-591/net/Makefile.orig
+--- linux-2.6.22-570/net/Makefile.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/net/Makefile.orig 2007-07-08 19:32:17.000000000 -0400
+@@ -0,0 +1,58 @@
++#
++# Makefile for the linux networking.
++#
++# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
++# Rewritten to use lists instead of if-statements.
++#
++
++obj-y := nonet.o
++
++obj-$(CONFIG_NET) := socket.o core/
++
++tmp-$(CONFIG_COMPAT) := compat.o
++obj-$(CONFIG_NET) += $(tmp-y)
++
++# LLC has to be linked before the files in net/802/
++obj-$(CONFIG_LLC) += llc/
++obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
++obj-$(CONFIG_NETFILTER) += netfilter/
++obj-$(CONFIG_INET) += ipv4/
++obj-$(CONFIG_XFRM) += xfrm/
++obj-$(CONFIG_UNIX) += unix/
++ifneq ($(CONFIG_IPV6),)
++obj-y += ipv6/
++endif
++obj-$(CONFIG_PACKET) += packet/
++obj-$(CONFIG_NET_KEY) += key/
++obj-$(CONFIG_NET_SCHED) += sched/
++obj-$(CONFIG_BRIDGE) += bridge/
++obj-$(CONFIG_IPX) += ipx/
++obj-$(CONFIG_ATALK) += appletalk/
++obj-$(CONFIG_WAN_ROUTER) += wanrouter/
++obj-$(CONFIG_X25) += x25/
++obj-$(CONFIG_LAPB) += lapb/
++obj-$(CONFIG_NETROM) += netrom/
++obj-$(CONFIG_ROSE) += rose/
++obj-$(CONFIG_AX25) += ax25/
++obj-$(CONFIG_IRDA) += irda/
++obj-$(CONFIG_BT) += bluetooth/
++obj-$(CONFIG_SUNRPC) += sunrpc/
++obj-$(CONFIG_RXRPC) += rxrpc/
++obj-$(CONFIG_AF_RXRPC) += rxrpc/
++obj-$(CONFIG_ATM) += atm/
++obj-$(CONFIG_DECNET) += decnet/
++obj-$(CONFIG_ECONET) += econet/
++obj-$(CONFIG_VLAN_8021Q) += 8021q/
++obj-$(CONFIG_IP_DCCP) += dccp/
++obj-$(CONFIG_IP_SCTP) += sctp/
++obj-y += wireless/
++obj-$(CONFIG_MAC80211) += mac80211/
++obj-$(CONFIG_IEEE80211) += ieee80211/
++obj-$(CONFIG_TIPC) += tipc/
++obj-$(CONFIG_NETLABEL) += netlabel/
++obj-$(CONFIG_IUCV) += iucv/
++obj-$(CONFIG_RFKILL) += rfkill/
++
++ifeq ($(CONFIG_NET),y)
++obj-$(CONFIG_SYSCTL) += sysctl_net.o
++endif
+diff -Nurb linux-2.6.22-570/net/appletalk/aarp.c linux-2.6.22-591/net/appletalk/aarp.c
+--- linux-2.6.22-570/net/appletalk/aarp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/appletalk/aarp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -330,15 +330,19 @@
+ static int aarp_device_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+ {
++ struct net_device *dev = ptr;
+ int ct;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_DOWN) {
+ write_lock_bh(&aarp_lock);
+
+ for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+- __aarp_expire_device(&resolved[ct], ptr);
+- __aarp_expire_device(&unresolved[ct], ptr);
+- __aarp_expire_device(&proxies[ct], ptr);
++ __aarp_expire_device(&resolved[ct], dev);
++ __aarp_expire_device(&unresolved[ct], dev);
++ __aarp_expire_device(&proxies[ct], dev);
+ }
+
+ write_unlock_bh(&aarp_lock);
+@@ -712,6 +716,9 @@
+ struct atalk_addr sa, *ma, da;
+ struct atalk_iface *ifa;
+
++ if (dev->nd_net != &init_net)
++ goto out0;
++
+ /* We only do Ethernet SNAP AARP. */
+ if (dev->type != ARPHRD_ETHER)
+ goto out0;
+diff -Nurb linux-2.6.22-570/net/appletalk/atalk_proc.c linux-2.6.22-591/net/appletalk/atalk_proc.c
+--- linux-2.6.22-570/net/appletalk/atalk_proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/appletalk/atalk_proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
+ #include <linux/atalk.h>
++#include <net/net_namespace.h>
+
+
+ static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
+@@ -271,7 +272,7 @@
+ struct proc_dir_entry *p;
+ int rc = -ENOMEM;
+
+- atalk_proc_dir = proc_mkdir("atalk", proc_net);
++ atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net);
+ if (!atalk_proc_dir)
+ goto out;
+ atalk_proc_dir->owner = THIS_MODULE;
+@@ -306,7 +307,7 @@
+ out_route:
+ remove_proc_entry("interface", atalk_proc_dir);
+ out_interface:
+- remove_proc_entry("atalk", proc_net);
++ remove_proc_entry("atalk", init_net.proc_net);
+ goto out;
+ }
+
+@@ -316,5 +317,5 @@
+ remove_proc_entry("route", atalk_proc_dir);
+ remove_proc_entry("socket", atalk_proc_dir);
+ remove_proc_entry("arp", atalk_proc_dir);
+- remove_proc_entry("atalk", proc_net);
++ remove_proc_entry("atalk", init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-570/net/appletalk/ddp.c linux-2.6.22-591/net/appletalk/ddp.c
+--- linux-2.6.22-570/net/appletalk/ddp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/appletalk/ddp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -647,9 +647,14 @@
+ static int ddp_device_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+ {
++ struct net_device *dev = ptr;
++
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_DOWN)
+ /* Discard any use of this */
+- atalk_dev_down(ptr);
++ atalk_dev_down(dev);
+
+ return NOTIFY_DONE;
+ }
+@@ -672,7 +677,7 @@
+ if (copy_from_user(&atreq, arg, sizeof(atreq)))
+ return -EFAULT;
+
+- dev = __dev_get_by_name(atreq.ifr_name);
++ dev = __dev_get_by_name(&init_net, atreq.ifr_name);
+ if (!dev)
+ return -ENODEV;
+
+@@ -896,7 +901,7 @@
+ if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
+ return -EFAULT;
+ name[IFNAMSIZ-1] = '\0';
+- dev = __dev_get_by_name(name);
++ dev = __dev_get_by_name(&init_net, name);
+ if (!dev)
+ return -ENODEV;
+ }
+@@ -1024,11 +1029,14 @@
+ * Create a socket. Initialise the socket, blank the addresses
+ * set the state.
+ */
+-static int atalk_create(struct socket *sock, int protocol)
++static int atalk_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ int rc = -ESOCKTNOSUPPORT;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ /*
+ * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do
+ * and gives you the full ELAP frame. Should be handy for CAP 8)
+@@ -1036,7 +1044,7 @@
+ if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+ goto out;
+ rc = -ENOMEM;
+- sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1);
++ sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1);
+ if (!sk)
+ goto out;
+ rc = 0;
+@@ -1265,7 +1273,7 @@
+
+ static int handle_ip_over_ddp(struct sk_buff *skb)
+ {
+- struct net_device *dev = __dev_get_by_name("ipddp0");
++ struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0");
+ struct net_device_stats *stats;
+
+ /* This needs to be able to handle ipddp"N" devices */
+@@ -1398,6 +1406,9 @@
+ int origlen;
+ __u16 len_hops;
+
++ if (dev->nd_net != &init_net)
++ goto freeit;
++
+ /* Don't mangle buffer if shared */
+ if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
+ goto out;
+@@ -1483,6 +1494,9 @@
+ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+ {
++ if (dev->nd_net != &init_net)
++ goto freeit;
++
+ /* Expand any short form frames */
+ if (skb_mac_header(skb)[2] == 1) {
+ struct ddpehdr *ddp;
+diff -Nurb linux-2.6.22-570/net/atm/clip.c linux-2.6.22-591/net/atm/clip.c
+--- linux-2.6.22-570/net/atm/clip.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/clip.c 2007-12-21 15:36:15.000000000 -0500
+@@ -293,7 +293,7 @@
+ struct neigh_parms *parms;
+
+ DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
+- neigh->type = inet_addr_type(entry->ip);
++ neigh->type = inet_addr_type(&init_net, entry->ip);
+ if (neigh->type != RTN_UNICAST)
+ return -EINVAL;
+
+@@ -525,7 +525,10 @@
+ struct atmarp_entry *entry;
+ int error;
+ struct clip_vcc *clip_vcc;
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} };
++ struct flowi fl = {
++ .fl_net = &init_net,
++ .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}}
++ };
+ struct rtable *rt;
+
+ if (vcc->push != clip_push) {
+@@ -620,6 +623,9 @@
+ {
+ struct net_device *dev = arg;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_UNREGISTER) {
+ neigh_ifdown(&clip_tbl, dev);
+ return NOTIFY_DONE;
+@@ -954,6 +960,7 @@
+
+ seq = file->private_data;
+ seq->private = state;
++ state->ns.net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+
+@@ -962,11 +969,19 @@
+ goto out;
+ }
+
++static int arp_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct clip_seq_state *state = seq->private;
++ put_net(state->ns.net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations arp_seq_fops = {
+ .open = arp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = arp_seq_release,
+ .owner = THIS_MODULE
+ };
+ #endif
+diff -Nurb linux-2.6.22-570/net/atm/common.c linux-2.6.22-591/net/atm/common.c
+--- linux-2.6.22-570/net/atm/common.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/common.c 2007-12-21 15:36:15.000000000 -0500
+@@ -132,7 +132,7 @@
+ .obj_size = sizeof(struct atm_vcc),
+ };
+
+-int vcc_create(struct socket *sock, int protocol, int family)
++int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
+ {
+ struct sock *sk;
+ struct atm_vcc *vcc;
+@@ -140,7 +140,7 @@
+ sock->sk = NULL;
+ if (sock->type == SOCK_STREAM)
+ return -EINVAL;
+- sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1);
++ sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+ sock_init_data(sock, sk);
+diff -Nurb linux-2.6.22-570/net/atm/common.h linux-2.6.22-591/net/atm/common.h
+--- linux-2.6.22-570/net/atm/common.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/common.h 2007-12-21 15:36:15.000000000 -0500
+@@ -10,7 +10,7 @@
+ #include <linux/poll.h> /* for poll_table */
+
+
+-int vcc_create(struct socket *sock, int protocol, int family);
++int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
+ int vcc_release(struct socket *sock);
+ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
+ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+diff -Nurb linux-2.6.22-570/net/atm/mpc.c linux-2.6.22-591/net/atm/mpc.c
+--- linux-2.6.22-570/net/atm/mpc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/mpc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -244,7 +244,7 @@
+ char name[IFNAMSIZ];
+
+ sprintf(name, "lec%d", itf);
+- dev = dev_get_by_name(name);
++ dev = dev_get_by_name(&init_net, name);
+
+ return dev;
+ }
+@@ -956,6 +956,10 @@
+ struct lec_priv *priv;
+
+ dev = (struct net_device *)dev_ptr;
++
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+ return NOTIFY_DONE; /* we are only interested in lec:s */
+
+diff -Nurb linux-2.6.22-570/net/atm/proc.c linux-2.6.22-591/net/atm/proc.c
+--- linux-2.6.22-570/net/atm/proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/atmclip.h>
+ #include <linux/init.h> /* for __init */
++#include <net/net_namespace.h>
+ #include <net/atmclip.h>
+ #include <asm/uaccess.h>
+ #include <asm/atomic.h>
+@@ -475,7 +476,7 @@
+ if (e->dirent)
+ remove_proc_entry(e->name, atm_proc_root);
+ }
+- remove_proc_entry("net/atm", NULL);
++ remove_proc_entry("atm", init_net.proc_net);
+ }
+
+ int __init atm_proc_init(void)
+@@ -483,7 +484,7 @@
+ static struct atm_proc_entry *e;
+ int ret;
+
+- atm_proc_root = proc_mkdir("net/atm",NULL);
++ atm_proc_root = proc_mkdir("atm", init_net.proc_net);
+ if (!atm_proc_root)
+ goto err_out;
+ for (e = atm_proc_ents; e->name; e++) {
+diff -Nurb linux-2.6.22-570/net/atm/pvc.c linux-2.6.22-591/net/atm/pvc.c
+--- linux-2.6.22-570/net/atm/pvc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/pvc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -124,10 +124,13 @@
+ };
+
+
+-static int pvc_create(struct socket *sock,int protocol)
++static int pvc_create(struct net *net, struct socket *sock,int protocol)
+ {
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ sock->ops = &pvc_proto_ops;
+- return vcc_create(sock, protocol, PF_ATMPVC);
++ return vcc_create(net, sock, protocol, PF_ATMPVC);
+ }
+
+
+diff -Nurb linux-2.6.22-570/net/atm/svc.c linux-2.6.22-591/net/atm/svc.c
+--- linux-2.6.22-570/net/atm/svc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/atm/svc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -33,7 +33,7 @@
+ #endif
+
+
+-static int svc_create(struct socket *sock,int protocol);
++static int svc_create(struct net *net, struct socket *sock,int protocol);
+
+
+ /*
+@@ -335,7 +335,7 @@
+
+ lock_sock(sk);
+
+- error = svc_create(newsock,0);
++ error = svc_create(sk->sk_net, newsock,0);
+ if (error)
+ goto out;
+
+@@ -636,12 +636,15 @@
+ };
+
+
+-static int svc_create(struct socket *sock,int protocol)
++static int svc_create(struct net *net, struct socket *sock,int protocol)
+ {
+ int error;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ sock->ops = &svc_proto_ops;
+- error = vcc_create(sock, protocol, AF_ATMSVC);
++ error = vcc_create(net, sock, protocol, AF_ATMSVC);
+ if (error) return error;
+ ATM_SD(sock)->local.sas_family = AF_ATMSVC;
+ ATM_SD(sock)->remote.sas_family = AF_ATMSVC;
+diff -Nurb linux-2.6.22-570/net/ax25/af_ax25.c linux-2.6.22-591/net/ax25/af_ax25.c
+--- linux-2.6.22-570/net/ax25/af_ax25.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ax25/af_ax25.c 2007-12-21 15:36:15.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <net/tcp_states.h>
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+
+
+@@ -103,6 +104,9 @@
+ {
+ struct net_device *dev = (struct net_device *)ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* Reject non AX.25 devices */
+ if (dev->type != ARPHRD_AX25)
+ return NOTIFY_DONE;
+@@ -627,7 +631,7 @@
+ break;
+ }
+
+- dev = dev_get_by_name(devname);
++ dev = dev_get_by_name(&init_net, devname);
+ if (dev == NULL) {
+ res = -ENODEV;
+ break;
+@@ -779,11 +783,14 @@
+ .obj_size = sizeof(struct sock),
+ };
+
+-static int ax25_create(struct socket *sock, int protocol)
++static int ax25_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ ax25_cb *ax25;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ switch (sock->type) {
+ case SOCK_DGRAM:
+ if (protocol == 0 || protocol == PF_AX25)
+@@ -829,7 +836,7 @@
+ return -ESOCKTNOSUPPORT;
+ }
+
+- if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL)
++ if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL)
+ return -ENOMEM;
+
+ ax25 = sk->sk_protinfo = ax25_create_cb();
+@@ -854,7 +861,7 @@
+ struct sock *sk;
+ ax25_cb *ax25, *oax25;
+
+- if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
++ if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+ return NULL;
+
+ if ((ax25 = ax25_create_cb()) == NULL) {
+@@ -1998,9 +2005,9 @@
+ register_netdevice_notifier(&ax25_dev_notifier);
+ ax25_register_sysctl();
+
+- proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops);
+- proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops);
+- proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops);
++ proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops);
++ proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops);
++ proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops);
+ out:
+ return rc;
+ }
+@@ -2014,9 +2021,9 @@
+
+ static void __exit ax25_exit(void)
+ {
+- proc_net_remove("ax25_route");
+- proc_net_remove("ax25");
+- proc_net_remove("ax25_calls");
++ proc_net_remove(&init_net, "ax25_route");
++ proc_net_remove(&init_net, "ax25");
++ proc_net_remove(&init_net, "ax25_calls");
+ ax25_rt_free();
+ ax25_uid_free();
+ ax25_dev_free();
+diff -Nurb linux-2.6.22-570/net/ax25/ax25_in.c linux-2.6.22-591/net/ax25/ax25_in.c
+--- linux-2.6.22-570/net/ax25/ax25_in.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ax25/ax25_in.c 2007-12-21 15:36:15.000000000 -0500
+@@ -451,6 +451,11 @@
+ skb->sk = NULL; /* Initially we don't know who it's for */
+ skb->destructor = NULL; /* Who initializes this, dammit?! */
+
++ if (dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ if ((*skb->data & 0x0F) != 0) {
+ kfree_skb(skb); /* Not a KISS data frame */
+ return 0;
+diff -Nurb linux-2.6.22-570/net/bluetooth/af_bluetooth.c linux-2.6.22-591/net/bluetooth/af_bluetooth.c
+--- linux-2.6.22-570/net/bluetooth/af_bluetooth.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/af_bluetooth.c 2007-12-21 15:36:15.000000000 -0500
+@@ -95,10 +95,13 @@
+ }
+ EXPORT_SYMBOL(bt_sock_unregister);
+
+-static int bt_sock_create(struct socket *sock, int proto)
++static int bt_sock_create(struct net *net, struct socket *sock, int proto)
+ {
+ int err;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (proto < 0 || proto >= BT_MAX_PROTO)
+ return -EINVAL;
+
+@@ -113,7 +116,7 @@
+ read_lock(&bt_proto_lock);
+
+ if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) {
+- err = bt_proto[proto]->create(sock, proto);
++ err = bt_proto[proto]->create(net, sock, proto);
+ module_put(bt_proto[proto]->owner);
+ }
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/core.c linux-2.6.22-591/net/bluetooth/bnep/core.c
+--- linux-2.6.22-570/net/bluetooth/bnep/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/bnep/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/signal.h>
+ #include <linux/init.h>
+ #include <linux/wait.h>
++#include <linux/freezer.h>
+ #include <linux/errno.h>
+ #include <linux/net.h>
+ #include <net/sock.h>
+@@ -474,7 +475,6 @@
+
+ daemonize("kbnepd %s", dev->name);
+ set_user_nice(current, -15);
+- current->flags |= PF_NOFREEZE;
+
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(sk->sk_sleep, &wait);
+diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/sock.c linux-2.6.22-591/net/bluetooth/bnep/sock.c
+--- linux-2.6.22-570/net/bluetooth/bnep/sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/bnep/sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -204,7 +204,7 @@
+ .obj_size = sizeof(struct bt_sock)
+ };
+
+-static int bnep_sock_create(struct socket *sock, int protocol)
++static int bnep_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -213,7 +213,7 @@
+ if (sock->type != SOCK_RAW)
+ return -ESOCKTNOSUPPORT;
+
+- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/core.c linux-2.6.22-591/net/bluetooth/cmtp/core.c
+--- linux-2.6.22-570/net/bluetooth/cmtp/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/cmtp/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/fcntl.h>
++#include <linux/freezer.h>
+ #include <linux/skbuff.h>
+ #include <linux/socket.h>
+ #include <linux/ioctl.h>
+@@ -287,7 +288,6 @@
+
+ daemonize("kcmtpd_ctr_%d", session->num);
+ set_user_nice(current, -15);
+- current->flags |= PF_NOFREEZE;
+
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(sk->sk_sleep, &wait);
+diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/sock.c linux-2.6.22-591/net/bluetooth/cmtp/sock.c
+--- linux-2.6.22-570/net/bluetooth/cmtp/sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/cmtp/sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -195,7 +195,7 @@
+ .obj_size = sizeof(struct bt_sock)
+ };
+
+-static int cmtp_sock_create(struct socket *sock, int protocol)
++static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -204,7 +204,7 @@
+ if (sock->type != SOCK_RAW)
+ return -ESOCKTNOSUPPORT;
+
+- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/hci_sock.c linux-2.6.22-591/net/bluetooth/hci_sock.c
+--- linux-2.6.22-570/net/bluetooth/hci_sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/hci_sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -618,7 +618,7 @@
+ .obj_size = sizeof(struct hci_pinfo)
+ };
+
+-static int hci_sock_create(struct socket *sock, int protocol)
++static int hci_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -629,7 +629,7 @@
+
+ sock->ops = &hci_sock_ops;
+
+- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/core.c linux-2.6.22-591/net/bluetooth/hidp/core.c
+--- linux-2.6.22-570/net/bluetooth/hidp/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/hidp/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/freezer.h>
+ #include <linux/fcntl.h>
+ #include <linux/skbuff.h>
+ #include <linux/socket.h>
+@@ -547,7 +548,6 @@
+
+ daemonize("khidpd_%04x%04x", vendor, product);
+ set_user_nice(current, -15);
+- current->flags |= PF_NOFREEZE;
+
+ init_waitqueue_entry(&ctrl_wait, current);
+ init_waitqueue_entry(&intr_wait, current);
+diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/sock.c linux-2.6.22-591/net/bluetooth/hidp/sock.c
+--- linux-2.6.22-570/net/bluetooth/hidp/sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/hidp/sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -246,7 +246,7 @@
+ .obj_size = sizeof(struct bt_sock)
+ };
+
+-static int hidp_sock_create(struct socket *sock, int protocol)
++static int hidp_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -255,7 +255,7 @@
+ if (sock->type != SOCK_RAW)
+ return -ESOCKTNOSUPPORT;
+
+- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/l2cap.c linux-2.6.22-591/net/bluetooth/l2cap.c
+--- linux-2.6.22-570/net/bluetooth/l2cap.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/l2cap.c 2007-12-21 15:36:15.000000000 -0500
+@@ -518,11 +518,11 @@
+ .obj_size = sizeof(struct l2cap_pinfo)
+ };
+
+-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ struct sock *sk;
+
+- sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1);
+ if (!sk)
+ return NULL;
+
+@@ -543,7 +543,7 @@
+ return sk;
+ }
+
+-static int l2cap_sock_create(struct socket *sock, int protocol)
++static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -560,7 +560,7 @@
+
+ sock->ops = &l2cap_sock_ops;
+
+- sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC);
++ sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ if (!sk)
+ return -ENOMEM;
+
+@@ -1425,7 +1425,7 @@
+ goto response;
+ }
+
+- sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC);
++ sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC);
+ if (!sk)
+ goto response;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/core.c linux-2.6.22-591/net/bluetooth/rfcomm/core.c
+--- linux-2.6.22-570/net/bluetooth/rfcomm/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/rfcomm/core.c 2007-12-21 15:36:12.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/sched.h>
+ #include <linux/signal.h>
+ #include <linux/init.h>
++#include <linux/freezer.h>
+ #include <linux/wait.h>
+ #include <linux/device.h>
+ #include <linux/net.h>
+@@ -1940,7 +1941,6 @@
+
+ daemonize("krfcommd");
+ set_user_nice(current, -10);
+- current->flags |= PF_NOFREEZE;
+
+ BT_DBG("");
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/sock.c linux-2.6.22-591/net/bluetooth/rfcomm/sock.c
+--- linux-2.6.22-570/net/bluetooth/rfcomm/sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/rfcomm/sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -282,12 +282,12 @@
+ .obj_size = sizeof(struct rfcomm_pinfo)
+ };
+
+-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ struct rfcomm_dlc *d;
+ struct sock *sk;
+
+- sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1);
+ if (!sk)
+ return NULL;
+
+@@ -323,7 +323,7 @@
+ return sk;
+ }
+
+-static int rfcomm_sock_create(struct socket *sock, int protocol)
++static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -336,7 +336,7 @@
+
+ sock->ops = &rfcomm_sock_ops;
+
+- sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC);
++ sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ if (!sk)
+ return -ENOMEM;
+
+@@ -868,7 +868,7 @@
+ goto done;
+ }
+
+- sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
++ sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
+ if (!sk)
+ goto done;
+
+diff -Nurb linux-2.6.22-570/net/bluetooth/sco.c linux-2.6.22-591/net/bluetooth/sco.c
+--- linux-2.6.22-570/net/bluetooth/sco.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bluetooth/sco.c 2007-12-21 15:36:15.000000000 -0500
+@@ -414,11 +414,11 @@
+ .obj_size = sizeof(struct sco_pinfo)
+ };
+
+-static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ struct sock *sk;
+
+- sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1);
++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1);
+ if (!sk)
+ return NULL;
+
+@@ -439,7 +439,7 @@
+ return sk;
+ }
+
+-static int sco_sock_create(struct socket *sock, int protocol)
++static int sco_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
+@@ -452,7 +452,7 @@
+
+ sock->ops = &sco_sock_ops;
+
+- sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC);
++ sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ if (!sk)
+ return -ENOMEM;
+
+@@ -807,7 +807,7 @@
+
+ bh_lock_sock(parent);
+
+- sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC);
++ sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC);
+ if (!sk) {
+ bh_unlock_sock(parent);
+ goto done;
+diff -Nurb linux-2.6.22-570/net/bridge/br_if.c linux-2.6.22-591/net/bridge/br_if.c
+--- linux-2.6.22-570/net/bridge/br_if.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/bridge/br_if.c 2007-12-21 15:36:15.000000000 -0500
+@@ -45,7 +45,7 @@
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+- err = dev_ethtool(&ifr);
++ err = dev_ethtool(dev->nd_net, &ifr);
+ set_fs(old_fs);
+
+ if (!err) {
+@@ -314,7 +314,7 @@
+ int ret = 0;
+
+ rtnl_lock();
+- dev = __dev_get_by_name(name);
++ dev = __dev_get_by_name(&init_net, name);
+ if (dev == NULL)
+ ret = -ENXIO; /* Could not find device */
+
+@@ -455,7 +455,7 @@
+ struct net_device *dev, *nxt;
+
+ rtnl_lock();
+- for_each_netdev_safe(dev, nxt)
++ for_each_netdev_safe(&init_net, dev, nxt)
+ if (dev->priv_flags & IFF_EBRIDGE)
+ del_br(dev->priv);
+ rtnl_unlock();
+diff -Nurb linux-2.6.22-570/net/bridge/br_ioctl.c linux-2.6.22-591/net/bridge/br_ioctl.c
+--- linux-2.6.22-570/net/bridge/br_ioctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_ioctl.c 2007-12-21 15:36:15.000000000 -0500
+@@ -18,6 +18,7 @@
+ #include <linux/if_bridge.h>
+ #include <linux/netdevice.h>
+ #include <linux/times.h>
++#include <net/net_namespace.h>
+ #include <asm/uaccess.h>
+ #include "br_private.h"
+
+@@ -27,7 +28,7 @@
+ struct net_device *dev;
+ int i = 0;
+
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (i >= num)
+ break;
+ if (dev->priv_flags & IFF_EBRIDGE)
+@@ -90,7 +91,7 @@
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(&init_net, ifindex);
+ if (dev == NULL)
+ return -EINVAL;
+
+@@ -364,7 +365,7 @@
+ return -EOPNOTSUPP;
+ }
+
+-int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
++int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+ {
+ switch (cmd) {
+ case SIOCGIFBR:
+diff -Nurb linux-2.6.22-570/net/bridge/br_netfilter.c linux-2.6.22-591/net/bridge/br_netfilter.c
+--- linux-2.6.22-570/net/bridge/br_netfilter.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/bridge/br_netfilter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -310,6 +310,7 @@
+ if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+ struct rtable *rt;
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = iph->daddr,
+@@ -518,6 +519,10 @@
+ if (unlikely(!pskb_may_pull(skb, len)))
+ goto out;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+ IS_PPPOE_IPV6(skb)) {
+ #ifdef CONFIG_SYSCTL
+@@ -591,6 +596,10 @@
+ {
+ struct sk_buff *skb = *pskb;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+@@ -635,6 +644,10 @@
+ struct net_device *parent;
+ int pf;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (!skb->nf_bridge)
+ return NF_ACCEPT;
+
+@@ -674,6 +687,10 @@
+ struct sk_buff *skb = *pskb;
+ struct net_device **d = (struct net_device **)(skb->cb);
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ #ifdef CONFIG_SYSCTL
+ if (!brnf_call_arptables)
+ return NF_ACCEPT;
+@@ -718,6 +735,10 @@
+ struct sk_buff *skb = *pskb;
+ struct nf_bridge_info *nf_bridge;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (!skb->nf_bridge)
+ return NF_ACCEPT;
+
+@@ -762,6 +783,10 @@
+ struct net_device *realoutdev = bridge_parent(skb->dev);
+ int pf;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ #ifdef CONFIG_NETFILTER_DEBUG
+ /* Be very paranoid. This probably won't happen anymore, but let's
+ * keep the check just to be sure... */
+@@ -833,6 +858,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if ((*pskb)->nf_bridge &&
+ !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ return NF_STOP;
+diff -Nurb linux-2.6.22-570/net/bridge/br_netlink.c linux-2.6.22-591/net/bridge/br_netlink.c
+--- linux-2.6.22-570/net/bridge/br_netlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_netlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -12,6 +12,8 @@
+
+ #include <linux/kernel.h>
+ #include <net/rtnetlink.h>
++#include <net/net_namespace.h>
++#include <net/sock.h>
+ #include "br_private.h"
+
+ static inline size_t br_nlmsg_size(void)
+@@ -95,10 +97,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++ err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_LINK, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
+ }
+
+ /*
+@@ -106,11 +108,15 @@
+ */
+ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct net_device *dev;
+ int idx;
+
++ if (net != &init_net)
++ return 0;
++
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ /* not a bridge port */
+ if (dev->br_port == NULL || idx < cb->args[0])
+ goto skip;
+@@ -134,12 +140,16 @@
+ */
+ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifinfomsg *ifm;
+ struct nlattr *protinfo;
+ struct net_device *dev;
+ struct net_bridge_port *p;
+ u8 new_state;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ if (nlmsg_len(nlh) < sizeof(*ifm))
+ return -EINVAL;
+
+@@ -155,7 +165,7 @@
+ if (new_state > BR_STATE_BLOCKING)
+ return -EINVAL;
+
+- dev = __dev_get_by_index(ifm->ifi_index);
++ dev = __dev_get_by_index(&init_net, ifm->ifi_index);
+ if (!dev)
+ return -ENODEV;
+
+diff -Nurb linux-2.6.22-570/net/bridge/br_notify.c linux-2.6.22-591/net/bridge/br_notify.c
+--- linux-2.6.22-570/net/bridge/br_notify.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_notify.c 2007-12-21 15:36:15.000000000 -0500
+@@ -15,6 +15,7 @@
+
+ #include <linux/kernel.h>
+ #include <linux/rtnetlink.h>
++#include <net/net_namespace.h>
+
+ #include "br_private.h"
+
+@@ -36,6 +37,9 @@
+ struct net_bridge_port *p = dev->br_port;
+ struct net_bridge *br;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* not a port of a bridge */
+ if (p == NULL)
+ return NOTIFY_DONE;
+diff -Nurb linux-2.6.22-570/net/bridge/br_private.h linux-2.6.22-591/net/bridge/br_private.h
+--- linux-2.6.22-570/net/bridge/br_private.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_private.h 2007-12-21 15:36:15.000000000 -0500
+@@ -196,7 +196,7 @@
+
+ /* br_ioctl.c */
+ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+-extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg);
++extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
+
+ /* br_netfilter.c */
+ #ifdef CONFIG_BRIDGE_NETFILTER
+diff -Nurb linux-2.6.22-570/net/bridge/br_stp_bpdu.c linux-2.6.22-591/net/bridge/br_stp_bpdu.c
+--- linux-2.6.22-570/net/bridge/br_stp_bpdu.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_stp_bpdu.c 2007-12-21 15:36:15.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/netfilter_bridge.h>
+ #include <linux/etherdevice.h>
+ #include <linux/llc.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_pdu.h>
+ #include <asm/unaligned.h>
+@@ -141,6 +142,9 @@
+ struct net_bridge *br;
+ const unsigned char *buf;
+
++ if (dev->nd_net != &init_net)
++ goto err;
++
+ if (!p)
+ goto err;
+
+diff -Nurb linux-2.6.22-570/net/bridge/br_stp_if.c linux-2.6.22-591/net/bridge/br_stp_if.c
+--- linux-2.6.22-570/net/bridge/br_stp_if.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_stp_if.c 2007-12-21 15:36:12.000000000 -0500
+@@ -125,7 +125,7 @@
+ char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ char *envp[] = { NULL };
+
+- r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (r == 0) {
+ br->stp_enabled = BR_USER_STP;
+ printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_br.c linux-2.6.22-591/net/bridge/br_sysfs_br.c
+--- linux-2.6.22-570/net/bridge/br_sysfs_br.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_sysfs_br.c 2007-12-21 15:36:12.000000000 -0500
+@@ -360,8 +360,9 @@
+ *
+ * Returns the number of bytes read.
+ */
+-static ssize_t brforward_read(struct kobject *kobj, char *buf,
+- loff_t off, size_t count)
++static ssize_t brforward_read(struct kobject *kobj,
++ struct bin_attribute *bin_attr,
++ char *buf, loff_t off, size_t count)
+ {
+ struct device *dev = to_dev(kobj);
+ struct net_bridge *br = to_bridge(dev);
+@@ -383,8 +384,7 @@
+
+ static struct bin_attribute bridge_forward = {
+ .attr = { .name = SYSFS_BRIDGE_FDB,
+- .mode = S_IRUGO,
+- .owner = THIS_MODULE, },
++ .mode = S_IRUGO, },
+ .read = brforward_read,
+ };
+
+diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_if.c linux-2.6.22-591/net/bridge/br_sysfs_if.c
+--- linux-2.6.22-570/net/bridge/br_sysfs_if.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/br_sysfs_if.c 2007-12-21 15:36:12.000000000 -0500
+@@ -29,8 +29,7 @@
+ #define BRPORT_ATTR(_name,_mode,_show,_store) \
+ struct brport_attribute brport_attr_##_name = { \
+ .attr = {.name = __stringify(_name), \
+- .mode = _mode, \
+- .owner = THIS_MODULE, }, \
++ .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c
+--- linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:36:15.000000000 -0500
+@@ -301,8 +301,9 @@
+ spin_lock_init(&ulog_buffers[i].lock);
+ }
+
+- ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
+- NULL, NULL, THIS_MODULE);
++ ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
++ EBT_ULOG_MAXNLGROUPS, NULL, NULL,
++ THIS_MODULE);
+ if (!ebtulognl)
+ ret = -ENOMEM;
+ else if ((ret = ebt_register_watcher(&ulog)))
+diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c
+--- linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -64,6 +64,10 @@
+ ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+ const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ebt_do_table(hook, pskb, in, out, &frame_filter);
+ }
+
+diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c
+--- linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c 2007-12-21 15:36:15.000000000 -0500
+@@ -64,6 +64,10 @@
+ ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ }
+
+@@ -71,6 +75,10 @@
+ ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ }
+
+diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtables.c linux-2.6.22-591/net/bridge/netfilter/ebtables.c
+--- linux-2.6.22-570/net/bridge/netfilter/ebtables.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/bridge/netfilter/ebtables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/smp.h>
+ #include <linux/cpumask.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ /* needed for logical [in,out]-dev filtering */
+ #include "../br_private.h"
+
+@@ -1438,6 +1439,9 @@
+ {
+ int ret;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ switch(cmd) {
+ case EBT_SO_SET_ENTRIES:
+ ret = do_replace(user, len);
+@@ -1457,6 +1461,9 @@
+ struct ebt_replace tmp;
+ struct ebt_table *t;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (copy_from_user(&tmp, user, sizeof(tmp)))
+ return -EFAULT;
+
+diff -Nurb linux-2.6.22-570/net/core/Makefile linux-2.6.22-591/net/core/Makefile
+--- linux-2.6.22-570/net/core/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/Makefile 2007-12-21 15:36:15.000000000 -0500
+@@ -3,7 +3,7 @@
+ #
+
+ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+- gen_stats.o gen_estimator.o
++ gen_stats.o gen_estimator.o net_namespace.o
+
+ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+
+diff -Nurb linux-2.6.22-570/net/core/dev.c linux-2.6.22-591/net/core/dev.c
+--- linux-2.6.22-570/net/core/dev.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/core/dev.c 2007-12-21 15:36:15.000000000 -0500
+@@ -116,6 +116,7 @@
+ #include <linux/dmaengine.h>
+ #include <linux/err.h>
+ #include <linux/ctype.h>
++#include <net/net_namespace.h>
+ #include <linux/if_arp.h>
+ #include <linux/vs_inet.h>
+
+@@ -152,9 +153,22 @@
+ static struct list_head ptype_all __read_mostly; /* Taps */
+
+ #ifdef CONFIG_NET_DMA
+-static struct dma_client *net_dma_client;
+-static unsigned int net_dma_count;
+-static spinlock_t net_dma_event_lock;
++struct net_dma {
++ struct dma_client client;
++ spinlock_t lock;
++ cpumask_t channel_mask;
++ struct dma_chan *channels[NR_CPUS];
++};
++
++static enum dma_state_client
++netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
++ enum dma_state state);
++
++static struct net_dma net_dma = {
++ .client = {
++ .event_callback = netdev_dma_event,
++ },
++};
+ #endif
+
+ /*
+@@ -176,25 +190,50 @@
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+-LIST_HEAD(dev_base_head);
+ DEFINE_RWLOCK(dev_base_lock);
+
+-EXPORT_SYMBOL(dev_base_head);
+ EXPORT_SYMBOL(dev_base_lock);
+
+ #define NETDEV_HASHBITS 8
+-static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+-static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
++#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
+
+-static inline struct hlist_head *dev_name_hash(const char *name)
++static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
+ {
+ unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+- return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
++ return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
++}
++
++static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
++{
++ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
++}
++
++/* Device list insertion */
++static int list_netdevice(struct net_device *dev)
++{
++ struct net *net = dev->nd_net;
++
++ ASSERT_RTNL();
++
++ write_lock_bh(&dev_base_lock);
++ list_add_tail(&dev->dev_list, &net->dev_base_head);
++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
++ write_unlock_bh(&dev_base_lock);
++ return 0;
+ }
+
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++/* Device list removal */
++static void unlist_netdevice(struct net_device *dev)
+ {
+- return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
++ ASSERT_RTNL();
++
++ /* Unlink dev from the device chain */
++ write_lock_bh(&dev_base_lock);
++ list_del(&dev->dev_list);
++ hlist_del(&dev->name_hlist);
++ hlist_del(&dev->index_hlist);
++ write_unlock_bh(&dev_base_lock);
+ }
+
+ /*
+@@ -477,7 +516,7 @@
+ * If device already registered then return base of 1
+ * to indicate not to probe for this interface
+ */
+- if (__dev_get_by_name(name))
++ if (__dev_get_by_name(&init_net, name))
+ return 1;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+@@ -532,11 +571,11 @@
+ * careful with locks.
+ */
+
+-struct net_device *__dev_get_by_name(const char *name)
++struct net_device *__dev_get_by_name(struct net *net, const char *name)
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_name_hash(name)) {
++ hlist_for_each(p, dev_name_hash(net, name)) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -556,12 +595,12 @@
+ * matching device is found.
+ */
+
+-struct net_device *dev_get_by_name(const char *name)
++struct net_device *dev_get_by_name(struct net *net, const char *name)
+ {
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- dev = __dev_get_by_name(name);
++ dev = __dev_get_by_name(net, name);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+@@ -579,11 +618,11 @@
+ * or @dev_base_lock.
+ */
+
+-struct net_device *__dev_get_by_index(int ifindex)
++struct net_device *__dev_get_by_index(struct net *net, int ifindex)
+ {
+ struct hlist_node *p;
+
+- hlist_for_each(p, dev_index_hash(ifindex)) {
++ hlist_for_each(p, dev_index_hash(net, ifindex)) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, index_hlist);
+ if (dev->ifindex == ifindex)
+@@ -603,12 +642,12 @@
+ * dev_put to indicate they have finished with it.
+ */
+
+-struct net_device *dev_get_by_index(int ifindex)
++struct net_device *dev_get_by_index(struct net *net, int ifindex)
+ {
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- dev = __dev_get_by_index(ifindex);
++ dev = __dev_get_by_index(net, ifindex);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+@@ -629,13 +668,13 @@
+ * If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+ {
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+
+- for_each_netdev(dev)
++ for_each_netdev(&init_net, dev)
+ if (dev->type == type &&
+ !memcmp(dev->dev_addr, ha, dev->addr_len))
+ return dev;
+@@ -645,12 +684,12 @@
+
+ EXPORT_SYMBOL(dev_getbyhwaddr);
+
+-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
++struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
+ {
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+- for_each_netdev(dev)
++ for_each_netdev(net, dev)
+ if (dev->type == type)
+ return dev;
+
+@@ -659,12 +698,12 @@
+
+ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
+-struct net_device *dev_getfirstbyhwtype(unsigned short type)
++struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
+ {
+ struct net_device *dev;
+
+ rtnl_lock();
+- dev = __dev_getfirstbyhwtype(type);
++ dev = __dev_getfirstbyhwtype(net, type);
+ if (dev)
+ dev_hold(dev);
+ rtnl_unlock();
+@@ -684,13 +723,13 @@
+ * dev_put to indicate they have finished with it.
+ */
+
+-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
++struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
+ {
+ struct net_device *dev, *ret;
+
+ ret = NULL;
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if (((dev->flags ^ if_flags) & mask) == 0) {
+ dev_hold(dev);
+ ret = dev;
+@@ -727,9 +766,10 @@
+ }
+
+ /**
+- * dev_alloc_name - allocate a name for a device
+- * @dev: device
++ * __dev_alloc_name - allocate a name for a device
++ * @net: network namespace to allocate the device name in
+ * @name: name format string
++ * @buf: scratch buffer and result name string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. It scans list of devices to build up a free map, then chooses
+@@ -740,10 +780,9 @@
+ * Returns the number of the unit assigned or a negative errno code.
+ */
+
+-int dev_alloc_name(struct net_device *dev, const char *name)
++static int __dev_alloc_name(struct net *net, const char *name, char *buf)
+ {
+ int i = 0;
+- char buf[IFNAMSIZ];
+ const char *p;
+ const int max_netdevices = 8*PAGE_SIZE;
+ long *inuse;
+@@ -764,14 +803,14 @@
+ if (!inuse)
+ return -ENOMEM;
+
+- for_each_netdev(d) {
++ for_each_netdev(net, d) {
+ if (!sscanf(d->name, name, &i))
+ continue;
+ if (i < 0 || i >= max_netdevices)
+ continue;
+
+ /* avoid cases where sscanf is not exact inverse of printf */
+- snprintf(buf, sizeof(buf), name, i);
++ snprintf(buf, IFNAMSIZ, name, i);
+ if (!strncmp(buf, d->name, IFNAMSIZ))
+ set_bit(i, inuse);
+ }
+@@ -780,11 +819,9 @@
+ free_page((unsigned long) inuse);
+ }
+
+- snprintf(buf, sizeof(buf), name, i);
+- if (!__dev_get_by_name(buf)) {
+- strlcpy(dev->name, buf, IFNAMSIZ);
++ snprintf(buf, IFNAMSIZ, name, i);
++ if (!__dev_get_by_name(net, buf))
+ return i;
+- }
+
+ /* It is possible to run out of possible slots
+ * when the name is long and there isn't enough space left
+@@ -793,6 +830,34 @@
+ return -ENFILE;
+ }
+
++/**
++ * dev_alloc_name - allocate a name for a device
++ * @dev: device
++ * @name: name format string
++ *
++ * Passed a format string - eg "lt%d" it will try and find a suitable
++ * id. It scans list of devices to build up a free map, then chooses
++ * the first empty slot. The caller must hold the dev_base or rtnl lock
++ * while allocating the name and adding the device in order to avoid
++ * duplicates.
++ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
++ * Returns the number of the unit assigned or a negative errno code.
++ */
++
++int dev_alloc_name(struct net_device *dev, const char *name)
++{
++ char buf[IFNAMSIZ];
++ struct net *net;
++ int ret;
++
++ BUG_ON(!dev->nd_net);
++ net = dev->nd_net;
++ ret = __dev_alloc_name(net, name, buf);
++ if (ret >= 0)
++ strlcpy(dev->name, buf, IFNAMSIZ);
++ return ret;
++}
++
+
+ /**
+ * dev_change_name - change name of a device
+@@ -805,9 +870,12 @@
+ int dev_change_name(struct net_device *dev, char *newname)
+ {
+ int err = 0;
++ struct net *net;
+
+ ASSERT_RTNL();
++ BUG_ON(!dev->nd_net);
+
++ net = dev->nd_net;
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+
+@@ -820,14 +888,18 @@
+ return err;
+ strcpy(newname, dev->name);
+ }
+- else if (__dev_get_by_name(newname))
++ else if (__dev_get_by_name(net, newname))
+ return -EEXIST;
+- else
++ else {
++ if (strncmp(newname, dev->name, IFNAMSIZ))
++ printk(KERN_INFO "%s renamed to %s\n",
++ dev->name, newname);
+ strlcpy(dev->name, newname, IFNAMSIZ);
++ }
+
+ device_rename(&dev->dev, dev->name);
+ hlist_del(&dev->name_hlist);
+- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+
+ return err;
+@@ -871,12 +943,12 @@
+ * available in this kernel then it becomes a nop.
+ */
+
+-void dev_load(const char *name)
++void dev_load(struct net *net, const char *name)
+ {
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- dev = __dev_get_by_name(name);
++ dev = __dev_get_by_name(net, name);
+ read_unlock(&dev_base_lock);
+
+ if (!dev && capable(CAP_SYS_MODULE))
+@@ -1019,6 +1091,8 @@
+ }
+
+
++static int dev_boot_phase = 1;
++
+ /*
+ * Device change register/unregister. These are not inline or static
+ * as we export them to the world.
+@@ -1045,14 +1119,17 @@
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(&netdev_chain, nb);
+- if (!err) {
+- for_each_netdev(dev) {
++ if (!err && !dev_boot_phase) {
++ struct net *net;
++ for_each_net(net) {
++ for_each_netdev(net, dev) {
+ nb->notifier_call(nb, NETDEV_REGISTER, dev);
+
+ if (dev->flags & IFF_UP)
+ nb->notifier_call(nb, NETDEV_UP, dev);
+ }
+ }
++ }
+ rtnl_unlock();
+ return err;
+ }
+@@ -1086,9 +1163,9 @@
+ * are as for raw_notifier_call_chain().
+ */
+
+-int call_netdevice_notifiers(unsigned long val, void *v)
++int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
+ {
+- return raw_notifier_call_chain(&netdev_chain, val, v);
++ return raw_notifier_call_chain(&netdev_chain, val, dev);
+ }
+
+ /* When > 0 there are consumers of rx skb time stamps */
+@@ -1510,9 +1587,11 @@
+ skb_set_transport_header(skb, skb->csum_start -
+ skb_headroom(skb));
+
+- if (!(dev->features & NETIF_F_GEN_CSUM) &&
+- (!(dev->features & NETIF_F_IP_CSUM) ||
+- skb->protocol != htons(ETH_P_IP)))
++ if (!(dev->features & NETIF_F_GEN_CSUM)
++ || ((dev->features & NETIF_F_IP_CSUM)
++ && skb->protocol == htons(ETH_P_IP))
++ || ((dev->features & NETIF_F_IPV6_CSUM)
++ && skb->protocol == htons(ETH_P_IPV6)))
+ if (skb_checksum_help(skb))
+ goto out_kfree_skb;
+ }
+@@ -2016,12 +2095,13 @@
+ * There may not be any more sk_buffs coming right now, so push
+ * any pending DMA copies to hardware
+ */
+- if (net_dma_client) {
+- struct dma_chan *chan;
+- rcu_read_lock();
+- list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
++ if (!cpus_empty(net_dma.channel_mask)) {
++ int chan_idx;
++ for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
++ struct dma_chan *chan = net_dma.channels[chan_idx];
++ if (chan)
+ dma_async_memcpy_issue_pending(chan);
+- rcu_read_unlock();
++ }
+ }
+ #endif
+ return;
+@@ -2063,7 +2143,7 @@
+ * match. --pb
+ */
+
+-static int dev_ifname(struct ifreq __user *arg)
++static int dev_ifname(struct net *net, struct ifreq __user *arg)
+ {
+ struct net_device *dev;
+ struct ifreq ifr;
+@@ -2076,7 +2156,7 @@
+ return -EFAULT;
+
+ read_lock(&dev_base_lock);
+- dev = __dev_get_by_index(ifr.ifr_ifindex);
++ dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+ if (!dev) {
+ read_unlock(&dev_base_lock);
+ return -ENODEV;
+@@ -2096,7 +2176,7 @@
+ * Thus we will need a 'compatibility mode'.
+ */
+
+-static int dev_ifconf(char __user *arg)
++static int dev_ifconf(struct net *net, char __user *arg)
+ {
+ struct ifconf ifc;
+ struct net_device *dev;
+@@ -2120,7 +2200,7 @@
+ */
+
+ total = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if (!nx_dev_visible(current->nx_info, dev))
+ continue;
+ for (i = 0; i < NPROTO; i++) {
+@@ -2156,6 +2236,7 @@
+ */
+ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++ struct net *net = seq->private;
+ loff_t off;
+ struct net_device *dev;
+
+@@ -2164,7 +2245,7 @@
+ return SEQ_START_TOKEN;
+
+ off = 1;
+- for_each_netdev(dev)
++ for_each_netdev(net, dev)
+ if (off++ == *pos)
+ return dev;
+
+@@ -2173,9 +2254,10 @@
+
+ void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++ struct net *net = seq->private;
+ ++*pos;
+ return v == SEQ_START_TOKEN ?
+- first_net_device() : next_net_device((struct net_device *)v);
++ first_net_device(net) : next_net_device((struct net_device *)v);
+ }
+
+ void dev_seq_stop(struct seq_file *seq, void *v)
+@@ -2274,7 +2356,22 @@
+
+ static int dev_seq_open(struct inode *inode, struct file *file)
+ {
+- return seq_open(file, &dev_seq_ops);
++ struct seq_file *seq;
++ int res;
++ res = seq_open(file, &dev_seq_ops);
++ if (!res) {
++ seq = file->private_data;
++ seq->private = get_net(PROC_NET(inode));
++ }
++ return res;
++}
++
++static int dev_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct net *net = seq->private;
++ put_net(net);
++ return seq_release(inode, file);
+ }
+
+ static const struct file_operations dev_seq_fops = {
+@@ -2282,7 +2379,7 @@
+ .open = dev_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = dev_seq_release,
+ };
+
+ static const struct seq_operations softnet_seq_ops = {
+@@ -2434,30 +2531,49 @@
+ };
+
+
+-static int __init dev_proc_init(void)
++static int dev_proc_net_init(struct net *net)
+ {
+ int rc = -ENOMEM;
+
+- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
++ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
+ goto out;
+- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
++ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
+ goto out_dev;
+- if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+- goto out_dev2;
+-
+- if (wext_proc_init())
++ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
+ goto out_softnet;
++
++ if (wext_proc_init(net))
++ goto out_ptype;
+ rc = 0;
+ out:
+ return rc;
++out_ptype:
++ proc_net_remove(net, "ptype");
+ out_softnet:
+- proc_net_remove("ptype");
+-out_dev2:
+- proc_net_remove("softnet_stat");
++ proc_net_remove(net, "softnet_stat");
+ out_dev:
+- proc_net_remove("dev");
++ proc_net_remove(net, "dev");
+ goto out;
+ }
++
++static void dev_proc_net_exit(struct net *net)
++{
++ wext_proc_exit(net);
++
++ proc_net_remove(net, "ptype");
++ proc_net_remove(net, "softnet_stat");
++ proc_net_remove(net, "dev");
++}
++
++static struct pernet_operations dev_proc_ops = {
++ .init = dev_proc_net_init,
++ .exit = dev_proc_net_exit,
++};
++
++static int __init dev_proc_init(void)
++{
++ return register_pernet_subsys(&dev_proc_ops);
++}
+ #else
+ #define dev_proc_init() 0
+ #endif /* CONFIG_PROC_FS */
+@@ -2691,10 +2807,10 @@
+ /*
+ * Perform the SIOCxIFxxx calls.
+ */
+-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
++static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+ {
+ int err;
+- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+@@ -2847,7 +2963,7 @@
+ * positive or a negative errno code on error.
+ */
+
+-int dev_ioctl(unsigned int cmd, void __user *arg)
++int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ struct ifreq ifr;
+ int ret;
+@@ -2860,12 +2976,12 @@
+
+ if (cmd == SIOCGIFCONF) {
+ rtnl_lock();
+- ret = dev_ifconf((char __user *) arg);
++ ret = dev_ifconf(net, (char __user *) arg);
+ rtnl_unlock();
+ return ret;
+ }
+ if (cmd == SIOCGIFNAME)
+- return dev_ifname((struct ifreq __user *)arg);
++ return dev_ifname(net, (struct ifreq __user *)arg);
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+@@ -2895,9 +3011,9 @@
+ case SIOCGIFMAP:
+ case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ read_lock(&dev_base_lock);
+- ret = dev_ifsioc(&ifr, cmd);
++ ret = dev_ifsioc(net, &ifr, cmd);
+ read_unlock(&dev_base_lock);
+ if (!ret) {
+ if (colon)
+@@ -2909,9 +3025,9 @@
+ return ret;
+
+ case SIOCETHTOOL:
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ rtnl_lock();
+- ret = dev_ethtool(&ifr);
++ ret = dev_ethtool(net, &ifr);
+ rtnl_unlock();
+ if (!ret) {
+ if (colon)
+@@ -2933,9 +3049,9 @@
+ case SIOCSIFNAME:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ rtnl_lock();
+- ret = dev_ifsioc(&ifr, cmd);
++ ret = dev_ifsioc(net, &ifr, cmd);
+ rtnl_unlock();
+ if (!ret) {
+ if (colon)
+@@ -2974,9 +3090,9 @@
+ /* fall through */
+ case SIOCBONDSLAVEINFOQUERY:
+ case SIOCBONDINFOQUERY:
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ rtnl_lock();
+- ret = dev_ifsioc(&ifr, cmd);
++ ret = dev_ifsioc(net, &ifr, cmd);
+ rtnl_unlock();
+ return ret;
+
+@@ -2996,9 +3112,9 @@
+ if (cmd == SIOCWANDEV ||
+ (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)) {
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ rtnl_lock();
+- ret = dev_ifsioc(&ifr, cmd);
++ ret = dev_ifsioc(net, &ifr, cmd);
+ rtnl_unlock();
+ if (!ret && copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+@@ -3007,7 +3123,7 @@
+ }
+ /* Take care of Wireless Extensions */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+- return wext_handle_ioctl(&ifr, cmd, arg);
++ return wext_handle_ioctl(net, &ifr, cmd, arg);
+ return -EINVAL;
+ }
+ }
+@@ -3020,19 +3136,17 @@
+ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
+ */
+-static int dev_new_index(void)
++static int dev_new_index(struct net *net)
+ {
+ static int ifindex;
+ for (;;) {
+ if (++ifindex <= 0)
+ ifindex = 1;
+- if (!__dev_get_by_index(ifindex))
++ if (!__dev_get_by_index(net, ifindex))
+ return ifindex;
+ }
+ }
+
+-static int dev_boot_phase = 1;
+-
+ /* Delayed registration/unregisteration */
+ static DEFINE_SPINLOCK(net_todo_list_lock);
+ static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+@@ -3066,6 +3180,7 @@
+ struct hlist_head *head;
+ struct hlist_node *p;
+ int ret;
++ struct net *net;
+
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+@@ -3074,6 +3189,8 @@
+
+ /* When net_device's are persistent, this will be fatal. */
+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
++ BUG_ON(!dev->nd_net);
++ net = dev->nd_net;
+
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->_xmit_lock);
+@@ -3098,12 +3215,12 @@
+ goto out;
+ }
+
+- dev->ifindex = dev_new_index();
++ dev->ifindex = dev_new_index(net);
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
+ /* Check for existence of name */
+- head = dev_name_hash(dev->name);
++ head = dev_name_hash(net, dev->name);
+ hlist_for_each(p, head) {
+ struct net_device *d
+ = hlist_entry(p, struct net_device, name_hlist);
+@@ -3113,6 +3230,22 @@
+ }
+ }
+
++ /* Fix illegal checksum combinations */
++ if ((dev->features & NETIF_F_HW_CSUM) &&
++ (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++ printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
++ dev->name);
++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
++ }
++
++ if ((dev->features & NETIF_F_NO_CSUM) &&
++ (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++ printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
++ dev->name);
++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
++ }
++
++
+ /* Fix illegal SG+CSUM combinations. */
+ if ((dev->features & NETIF_F_SG) &&
+ !(dev->features & NETIF_F_ALL_CSUM)) {
+@@ -3164,12 +3297,8 @@
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+ dev_init_scheduler(dev);
+- write_lock_bh(&dev_base_lock);
+- list_add_tail(&dev->dev_list, &dev_base_head);
+- hlist_add_head(&dev->name_hlist, head);
+- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+ dev_hold(dev);
+- write_unlock_bh(&dev_base_lock);
++ list_netdevice(dev);
+
+ /* Notify protocols, that a new device appeared. */
+ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+@@ -3379,6 +3508,7 @@
+ dev = (struct net_device *)
+ (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+ dev->padded = (char *)dev - (char *)p;
++ dev->nd_net = &init_net;
+
+ if (sizeof_priv)
+ dev->priv = netdev_priv(dev);
+@@ -3457,11 +3587,7 @@
+ dev_close(dev);
+
+ /* And unlink it from device chain. */
+- write_lock_bh(&dev_base_lock);
+- list_del(&dev->dev_list);
+- hlist_del(&dev->name_hlist);
+- hlist_del(&dev->index_hlist);
+- write_unlock_bh(&dev_base_lock);
++ unlist_netdevice(dev);
+
+ dev->reg_state = NETREG_UNREGISTERING;
+
+@@ -3519,6 +3645,122 @@
+
+ EXPORT_SYMBOL(unregister_netdev);
+
++/**
++ * dev_change_net_namespace - move device to different nethost namespace
++ * @dev: device
++ * @net: network namespace
++ * @pat: If not NULL name pattern to try if the current device name
++ * is already taken in the destination network namespace.
++ *
++ * This function shuts down a device interface and moves it
++ * to a new network namespace. On success 0 is returned, on
++ * a failure a netagive errno code is returned.
++ *
++ * Callers must hold the rtnl semaphore.
++ */
++
++int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
++{
++ char buf[IFNAMSIZ];
++ const char *destname;
++ int err;
++
++ ASSERT_RTNL();
++
++ /* Don't allow namespace local devices to be moved. */
++ err = -EINVAL;
++ if (dev->features & NETIF_F_NETNS_LOCAL)
++ goto out;
++
++ /* Ensure the device has been registrered */
++ err = -EINVAL;
++ if (dev->reg_state != NETREG_REGISTERED)
++ goto out;
++
++ /* Get out if there is nothing todo */
++ err = 0;
++ if (dev->nd_net == net)
++ goto out;
++
++ /* Pick the destination device name, and ensure
++ * we can use it in the destination network namespace.
++ */
++ err = -EEXIST;
++ destname = dev->name;
++ if (__dev_get_by_name(net, destname)) {
++ /* We get here if we can't use the current device name */
++ if (!pat)
++ goto out;
++ if (!dev_valid_name(pat))
++ goto out;
++ if (strchr(pat, '%')) {
++ if (__dev_alloc_name(net, pat, buf) < 0)
++ goto out;
++ destname = buf;
++ } else
++ destname = pat;
++ if (__dev_get_by_name(net, destname))
++ goto out;
++ }
++
++ /*
++ * And now a mini version of register_netdevice unregister_netdevice.
++ */
++
++ /* If device is running close it first. */
++ if (dev->flags & IFF_UP)
++ dev_close(dev);
++
++ /* And unlink it from device chain */
++ err = -ENODEV;
++ unlist_netdevice(dev);
++
++ synchronize_net();
++
++ /* Shutdown queueing discipline. */
++ dev_shutdown(dev);
++
++ /* Notify protocols, that we are about to destroy
++ this device. They should clean all the things.
++ */
++ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++
++ /*
++ * Flush the multicast chain
++ */
++ dev_mc_discard(dev);
++
++ /* Actually switch the network namespace */
++ dev->nd_net = net;
++
++ /* Assign the new device name */
++ if (destname != dev->name)
++ strcpy(dev->name, destname);
++
++ /* If there is an ifindex conflict assign a new one */
++ if (__dev_get_by_index(net, dev->ifindex)) {
++ int iflink = (dev->iflink == dev->ifindex);
++ dev->ifindex = dev_new_index(net);
++ if (iflink)
++ dev->iflink = dev->ifindex;
++ }
++
++ /* Fixup sysfs */
++ err = device_rename(&dev->dev, dev->name);
++ BUG_ON(err);
++
++ /* Add the device back in the hashes */
++ list_netdevice(dev);
++
++ /* Notify protocols, that a new device appeared. */
++ call_netdevice_notifiers(NETDEV_REGISTER, dev);
++
++ synchronize_net();
++ err = 0;
++out:
++ return err;
++}
++
+ static int dev_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *ocpu)
+@@ -3569,12 +3811,13 @@
+ * This is called when the number of channels allocated to the net_dma_client
+ * changes. The net_dma_client tries to have one DMA channel per CPU.
+ */
+-static void net_dma_rebalance(void)
++
++static void net_dma_rebalance(struct net_dma *net_dma)
+ {
+- unsigned int cpu, i, n;
++ unsigned int cpu, i, n, chan_idx;
+ struct dma_chan *chan;
+
+- if (net_dma_count == 0) {
++ if (cpus_empty(net_dma->channel_mask)) {
+ for_each_online_cpu(cpu)
+ rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
+ return;
+@@ -3583,10 +3826,12 @@
+ i = 0;
+ cpu = first_cpu(cpu_online_map);
+
+- rcu_read_lock();
+- list_for_each_entry(chan, &net_dma_client->channels, client_node) {
+- n = ((num_online_cpus() / net_dma_count)
+- + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
++ for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
++ chan = net_dma->channels[chan_idx];
++
++ n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
++ + (i < (num_online_cpus() %
++ cpus_weight(net_dma->channel_mask)) ? 1 : 0));
+
+ while(n) {
+ per_cpu(softnet_data, cpu).net_dma = chan;
+@@ -3595,7 +3840,6 @@
+ }
+ i++;
+ }
+- rcu_read_unlock();
+ }
+
+ /**
+@@ -3604,23 +3848,53 @@
+ * @chan: DMA channel for the event
+ * @event: event type
+ */
+-static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
+- enum dma_event event)
+-{
+- spin_lock(&net_dma_event_lock);
+- switch (event) {
+- case DMA_RESOURCE_ADDED:
+- net_dma_count++;
+- net_dma_rebalance();
++static enum dma_state_client
++netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
++ enum dma_state state)
++{
++ int i, found = 0, pos = -1;
++ struct net_dma *net_dma =
++ container_of(client, struct net_dma, client);
++ enum dma_state_client ack = DMA_DUP; /* default: take no action */
++
++ spin_lock(&net_dma->lock);
++ switch (state) {
++ case DMA_RESOURCE_AVAILABLE:
++ for (i = 0; i < NR_CPUS; i++)
++ if (net_dma->channels[i] == chan) {
++ found = 1;
++ break;
++ } else if (net_dma->channels[i] == NULL && pos < 0)
++ pos = i;
++
++ if (!found && pos >= 0) {
++ ack = DMA_ACK;
++ net_dma->channels[pos] = chan;
++ cpu_set(pos, net_dma->channel_mask);
++ net_dma_rebalance(net_dma);
++ }
+ break;
+ case DMA_RESOURCE_REMOVED:
+- net_dma_count--;
+- net_dma_rebalance();
++ for (i = 0; i < NR_CPUS; i++)
++ if (net_dma->channels[i] == chan) {
++ found = 1;
++ pos = i;
++ break;
++ }
++
++ if (found) {
++ ack = DMA_ACK;
++ cpu_clear(pos, net_dma->channel_mask);
++ net_dma->channels[i] = NULL;
++ net_dma_rebalance(net_dma);
++ }
+ break;
+ default:
+ break;
+ }
+- spin_unlock(&net_dma_event_lock);
++ spin_unlock(&net_dma->lock);
++
++ return ack;
+ }
+
+ /**
+@@ -3628,12 +3902,10 @@
+ */
+ static int __init netdev_dma_register(void)
+ {
+- spin_lock_init(&net_dma_event_lock);
+- net_dma_client = dma_async_client_register(netdev_dma_event);
+- if (net_dma_client == NULL)
+- return -ENOMEM;
+-
+- dma_async_client_chan_request(net_dma_client, num_online_cpus());
++ spin_lock_init(&net_dma.lock);
++ dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
++ dma_async_client_register(&net_dma.client);
++ dma_async_client_chan_request(&net_dma.client);
+ return 0;
+ }
+
+@@ -3679,6 +3951,75 @@
+ }
+ EXPORT_SYMBOL(netdev_compute_features);
+
++/* Initialize per network namespace state */
++static int netdev_init(struct net *net)
++{
++ int i;
++ INIT_LIST_HEAD(&net->dev_base_head);
++ rwlock_init(&dev_base_lock);
++
++ net->dev_name_head = kmalloc(
++ sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
++ if (!net->dev_name_head)
++ return -ENOMEM;
++
++ net->dev_index_head = kmalloc(
++ sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
++ if (!net->dev_index_head) {
++ kfree(net->dev_name_head);
++ return -ENOMEM;
++ }
++
++ for (i = 0; i < NETDEV_HASHENTRIES; i++)
++ INIT_HLIST_HEAD(&net->dev_name_head[i]);
++
++ for (i = 0; i < NETDEV_HASHENTRIES; i++)
++ INIT_HLIST_HEAD(&net->dev_index_head[i]);
++
++ return 0;
++}
++
++static void netdev_exit(struct net *net)
++{
++ kfree(net->dev_name_head);
++ kfree(net->dev_index_head);
++}
++
++static struct pernet_operations netdev_net_ops = {
++ .init = netdev_init,
++ .exit = netdev_exit,
++};
++
++static void default_device_exit(struct net *net)
++{
++ struct net_device *dev, *next;
++ /*
++ * Push all migratable of the network devices back to the
++ * initial network namespace
++ */
++ rtnl_lock();
++ for_each_netdev_safe(net, dev, next) {
++ int err;
++
++ /* Ignore unmoveable devices (i.e. loopback) */
++ if (dev->features & NETIF_F_NETNS_LOCAL)
++ continue;
++
++ /* Push remaing network devices to init_net */
++ err = dev_change_net_namespace(dev, &init_net, "dev%d");
++ if (err) {
++ printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
++ __func__, dev->name, err);
++ unregister_netdevice(dev);
++ }
++ }
++ rtnl_unlock();
++}
++
++static struct pernet_operations default_device_ops = {
++ .exit = default_device_exit,
++};
++
+ /*
+ * Initialize the DEV module. At boot time this walks the device list and
+ * unhooks any devices that fail to initialise (normally hardware not
+@@ -3706,11 +4047,11 @@
+ for (i = 0; i < 16; i++)
+ INIT_LIST_HEAD(&ptype_base[i]);
+
+- for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
+- INIT_HLIST_HEAD(&dev_name_head[i]);
++ if (register_pernet_subsys(&netdev_net_ops))
++ goto out;
+
+- for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
+- INIT_HLIST_HEAD(&dev_index_head[i]);
++ if (register_pernet_device(&default_device_ops))
++ goto out;
+
+ /*
+ * Initialise the packet receive queues.
+diff -Nurb linux-2.6.22-570/net/core/dev_mcast.c linux-2.6.22-591/net/core/dev_mcast.c
+--- linux-2.6.22-570/net/core/dev_mcast.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/dev_mcast.c 2007-12-21 15:36:15.000000000 -0500
+@@ -46,6 +46,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+
+ /*
+@@ -219,11 +220,12 @@
+ #ifdef CONFIG_PROC_FS
+ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++ struct net *net = seq->private;
+ struct net_device *dev;
+ loff_t off = 0;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if (off++ == *pos)
+ return dev;
+ }
+@@ -272,7 +274,22 @@
+
+ static int dev_mc_seq_open(struct inode *inode, struct file *file)
+ {
+- return seq_open(file, &dev_mc_seq_ops);
++ struct seq_file *seq;
++ int res;
++ res = seq_open(file, &dev_mc_seq_ops);
++ if (!res) {
++ seq = file->private_data;
++ seq->private = get_net(PROC_NET(inode));
++ }
++ return res;
++}
++
++static int dev_mc_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct net *net = seq->private;
++ put_net(net);
++ return seq_release(inode, file);
+ }
+
+ static const struct file_operations dev_mc_seq_fops = {
+@@ -280,14 +297,31 @@
+ .open = dev_mc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = dev_mc_seq_release,
+ };
+
+ #endif
+
++static int dev_mc_net_init(struct net *net)
++{
++ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
++ return -ENOMEM;
++ return 0;
++}
++
++static void dev_mc_net_exit(struct net *net)
++{
++ proc_net_remove(net, "dev_mcast");
++}
++
++static struct pernet_operations dev_mc_net_ops = {
++ .init = dev_mc_net_init,
++ .exit = dev_mc_net_exit,
++};
++
+ void __init dev_mcast_init(void)
+ {
+- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
++ register_pernet_subsys(&dev_mc_net_ops);
+ }
+
+ EXPORT_SYMBOL(dev_mc_add);
+diff -Nurb linux-2.6.22-570/net/core/dst.c linux-2.6.22-591/net/core/dst.c
+--- linux-2.6.22-570/net/core/dst.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/dst.c 2007-12-21 15:36:15.000000000 -0500
+@@ -15,7 +15,9 @@
+ #include <linux/skbuff.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
++#include <net/net_namespace.h>
+
++#include <net/net_namespace.h>
+ #include <net/dst.h>
+
+ /* Locking strategy:
+@@ -236,13 +238,14 @@
+ if (!unregister) {
+ dst->input = dst->output = dst_discard;
+ } else {
+- dst->dev = &loopback_dev;
+- dev_hold(&loopback_dev);
++ struct net *net = dev->nd_net;
++ dst->dev = &net->loopback_dev;
++ dev_hold(dst->dev);
+ dev_put(dev);
+ if (dst->neighbour && dst->neighbour->dev == dev) {
+- dst->neighbour->dev = &loopback_dev;
++ dst->neighbour->dev = &net->loopback_dev;
+ dev_put(dev);
+- dev_hold(&loopback_dev);
++ dev_hold(dst->neighbour->dev);
+ }
+ }
+ }
+@@ -252,6 +255,9 @@
+ struct net_device *dev = ptr;
+ struct dst_entry *dst;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ case NETDEV_DOWN:
+diff -Nurb linux-2.6.22-570/net/core/ethtool.c linux-2.6.22-591/net/core/ethtool.c
+--- linux-2.6.22-570/net/core/ethtool.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/ethtool.c 2007-12-21 15:36:15.000000000 -0500
+@@ -798,9 +798,9 @@
+
+ /* The main entry point in this file. Called from net/core/dev.c */
+
+-int dev_ethtool(struct ifreq *ifr)
++int dev_ethtool(struct net *net, struct ifreq *ifr)
+ {
+- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ void __user *useraddr = ifr->ifr_data;
+ u32 ethcmd;
+ int rc;
+diff -Nurb linux-2.6.22-570/net/core/fib_rules.c linux-2.6.22-591/net/core/fib_rules.c
+--- linux-2.6.22-570/net/core/fib_rules.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/fib_rules.c 2007-12-21 15:36:15.000000000 -0500
+@@ -11,21 +11,20 @@
+ #include <linux/types.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
++#include <net/net_namespace.h>
++#include <net/sock.h>
+ #include <net/fib_rules.h>
+
+-static LIST_HEAD(rules_ops);
+-static DEFINE_SPINLOCK(rules_mod_lock);
+-
+-static void notify_rule_change(int event, struct fib_rule *rule,
++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
+ struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ u32 pid);
+
+-static struct fib_rules_ops *lookup_rules_ops(int family)
++static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
+ {
+ struct fib_rules_ops *ops;
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(ops, &rules_ops, list) {
++ list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+ if (ops->family == family) {
+ if (!try_module_get(ops->owner))
+ ops = NULL;
+@@ -47,10 +46,10 @@
+ static void flush_route_cache(struct fib_rules_ops *ops)
+ {
+ if (ops->flush_cache)
+- ops->flush_cache();
++ ops->flush_cache(ops);
+ }
+
+-int fib_rules_register(struct fib_rules_ops *ops)
++int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
+ {
+ int err = -EEXIST;
+ struct fib_rules_ops *o;
+@@ -63,15 +62,16 @@
+ ops->action == NULL)
+ return -EINVAL;
+
+- spin_lock(&rules_mod_lock);
+- list_for_each_entry(o, &rules_ops, list)
++ spin_lock(&net->rules_mod_lock);
++ list_for_each_entry(o, &net->rules_ops, list)
+ if (ops->family == o->family)
+ goto errout;
+
+- list_add_tail_rcu(&ops->list, &rules_ops);
++ hold_net(net);
++ list_add_tail_rcu(&ops->list, &net->rules_ops);
+ err = 0;
+ errout:
+- spin_unlock(&rules_mod_lock);
++ spin_unlock(&net->rules_mod_lock);
+
+ return err;
+ }
+@@ -88,13 +88,13 @@
+ }
+ }
+
+-int fib_rules_unregister(struct fib_rules_ops *ops)
++int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
+ {
+ int err = 0;
+ struct fib_rules_ops *o;
+
+- spin_lock(&rules_mod_lock);
+- list_for_each_entry(o, &rules_ops, list) {
++ spin_lock(&net->rules_mod_lock);
++ list_for_each_entry(o, &net->rules_ops, list) {
+ if (o == ops) {
+ list_del_rcu(&o->list);
+ cleanup_ops(ops);
+@@ -104,9 +104,11 @@
+
+ err = -ENOENT;
+ out:
+- spin_unlock(&rules_mod_lock);
++ spin_unlock(&net->rules_mod_lock);
+
+ synchronize_rcu();
++ if (!err)
++ release_net(net);
+
+ return err;
+ }
+@@ -197,6 +199,7 @@
+
+ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule, *r, *last = NULL;
+@@ -206,7 +209,7 @@
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ goto errout;
+
+- ops = lookup_rules_ops(frh->family);
++ ops = lookup_rules_ops(net, frh->family);
+ if (ops == NULL) {
+ err = EAFNOSUPPORT;
+ goto errout;
+@@ -234,7 +237,7 @@
+
+ rule->ifindex = -1;
+ nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
+- dev = __dev_get_by_name(rule->ifname);
++ dev = __dev_get_by_name(net, rule->ifname);
+ if (dev)
+ rule->ifindex = dev->ifindex;
+ }
+@@ -256,7 +259,7 @@
+ rule->table = frh_get_table(frh, tb);
+
+ if (!rule->pref && ops->default_pref)
+- rule->pref = ops->default_pref();
++ rule->pref = ops->default_pref(ops);
+
+ err = -EINVAL;
+ if (tb[FRA_GOTO]) {
+@@ -319,7 +322,7 @@
+ else
+ list_add_rcu(&rule->list, ops->rules_list);
+
+- notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
++ notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ flush_route_cache(ops);
+ rules_ops_put(ops);
+ return 0;
+@@ -333,6 +336,7 @@
+
+ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rules_ops *ops = NULL;
+ struct fib_rule *rule, *tmp;
+@@ -342,7 +346,7 @@
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ goto errout;
+
+- ops = lookup_rules_ops(frh->family);
++ ops = lookup_rules_ops(net, frh->family);
+ if (ops == NULL) {
+ err = EAFNOSUPPORT;
+ goto errout;
+@@ -408,7 +412,7 @@
+ }
+
+ synchronize_rcu();
+- notify_rule_change(RTM_DELRULE, rule, ops, nlh,
++ notify_rule_change(net, RTM_DELRULE, rule, ops, nlh,
+ NETLINK_CB(skb).pid);
+ fib_rule_put(rule);
+ flush_route_cache(ops);
+@@ -514,13 +518,17 @@
+
+ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib_rules_ops *ops;
+ int idx = 0, family;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ family = rtnl_msg_family(cb->nlh);
+ if (family != AF_UNSPEC) {
+ /* Protocol specific dump request */
+- ops = lookup_rules_ops(family);
++ ops = lookup_rules_ops(net, family);
+ if (ops == NULL)
+ return -EAFNOSUPPORT;
+
+@@ -528,7 +536,7 @@
+ }
+
+ rcu_read_lock();
+- list_for_each_entry_rcu(ops, &rules_ops, list) {
++ list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+ if (idx < cb->args[0] || !try_module_get(ops->owner))
+ goto skip;
+
+@@ -545,7 +553,7 @@
+ return skb->len;
+ }
+
+-static void notify_rule_change(int event, struct fib_rule *rule,
++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
+ struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ u32 pid)
+ {
+@@ -563,10 +571,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
++ err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(ops->nlgroup, err);
++ rtnl_set_sk_err(net, ops->nlgroup, err);
+ }
+
+ static void attach_rules(struct list_head *rules, struct net_device *dev)
+@@ -594,19 +602,23 @@
+ void *ptr)
+ {
+ struct net_device *dev = ptr;
++ struct net *net = dev->nd_net;
+ struct fib_rules_ops *ops;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ ASSERT_RTNL();
+ rcu_read_lock();
+
+ switch (event) {
+ case NETDEV_REGISTER:
+- list_for_each_entry(ops, &rules_ops, list)
++ list_for_each_entry(ops, &net->rules_ops, list)
+ attach_rules(ops->rules_list, dev);
+ break;
+
+ case NETDEV_UNREGISTER:
+- list_for_each_entry(ops, &rules_ops, list)
++ list_for_each_entry(ops, &net->rules_ops, list)
+ detach_rules(ops->rules_list, dev);
+ break;
+ }
+@@ -620,13 +632,28 @@
+ .notifier_call = fib_rules_event,
+ };
+
++static int fib_rules_net_init(struct net *net)
++{
++ INIT_LIST_HEAD(&net->rules_ops);
++ spin_lock_init(&net->rules_mod_lock);
++ return 0;
++}
++
++static struct pernet_operations fib_rules_net_ops = {
++ .init = fib_rules_net_init,
++};
++
+ static int __init fib_rules_init(void)
+ {
++ int ret;
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+
+- return register_netdevice_notifier(&fib_rules_notifier);
++ ret = register_pernet_subsys(&fib_rules_net_ops);
++ if (!ret)
++ ret = register_netdevice_notifier(&fib_rules_notifier);
++ return ret;
+ }
+
+ subsys_initcall(fib_rules_init);
+diff -Nurb linux-2.6.22-570/net/core/neighbour.c linux-2.6.22-591/net/core/neighbour.c
+--- linux-2.6.22-570/net/core/neighbour.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/neighbour.c 2007-12-21 15:36:15.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/random.h>
+ #include <linux/string.h>
++#include <net/net_namespace.h>
+
+ #define NEIGH_DEBUG 1
+
+@@ -361,7 +362,7 @@
+ return n;
+ }
+
+-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
++struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net * net, const void *pkey)
+ {
+ struct neighbour *n;
+ int key_len = tbl->key_len;
+@@ -371,7 +372,8 @@
+
+ read_lock_bh(&tbl->lock);
+ for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+- if (!memcmp(n->primary_key, pkey, key_len)) {
++ if (!memcmp(n->primary_key, pkey, key_len) &&
++ (net == n->dev->nd_net)) {
+ neigh_hold(n);
+ NEIGH_CACHE_STAT_INC(tbl, hits);
+ break;
+@@ -449,7 +451,8 @@
+ goto out;
+ }
+
+-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
++struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
++ struct net * net, const void *pkey,
+ struct net_device *dev, int creat)
+ {
+ struct pneigh_entry *n;
+@@ -465,6 +468,7 @@
+
+ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ if (!memcmp(n->key, pkey, key_len) &&
++ (n->net == net) &&
+ (n->dev == dev || !n->dev)) {
+ read_unlock_bh(&tbl->lock);
+ goto out;
+@@ -479,6 +483,7 @@
+ if (!n)
+ goto out;
+
++ n->net = hold_net(net);
+ memcpy(n->key, pkey, key_len);
+ n->dev = dev;
+ if (dev)
+@@ -501,7 +506,7 @@
+ }
+
+
+-int pneigh_delete(struct neigh_table *tbl, const void *pkey,
++int pneigh_delete(struct neigh_table *tbl, struct net * net, const void *pkey,
+ struct net_device *dev)
+ {
+ struct pneigh_entry *n, **np;
+@@ -516,13 +521,15 @@
+ write_lock_bh(&tbl->lock);
+ for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ np = &n->next) {
+- if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
++ if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
++ (n->net == net)) {
+ *np = n->next;
+ write_unlock_bh(&tbl->lock);
+ if (tbl->pdestructor)
+ tbl->pdestructor(n);
+ if (n->dev)
+ dev_put(n->dev);
++ release_net(n->net);
+ kfree(n);
+ return 0;
+ }
+@@ -545,6 +552,7 @@
+ tbl->pdestructor(n);
+ if (n->dev)
+ dev_put(n->dev);
++ release_net(n->net);
+ kfree(n);
+ continue;
+ }
+@@ -1266,12 +1274,37 @@
+ spin_unlock(&tbl->proxy_queue.lock);
+ }
+
++static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
++ struct net * net, int ifindex)
++{
++ struct neigh_parms *p;
++
++ for (p = &tbl->parms; p; p = p->next) {
++ if (p->net != net)
++ continue;
++ if ((p->dev && p->dev->ifindex == ifindex) ||
++ (!p->dev && !ifindex))
++ return p;
++ }
++
++ return NULL;
++}
+
+ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
+ struct neigh_table *tbl)
+ {
+- struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
++ struct neigh_parms *p, *ref;
++ struct net * net;
++
++ net = &init_net;
++ if (dev)
++ net = dev->nd_net;
++
++ ref = lookup_neigh_params(tbl, net, 0);
++ if (!ref)
++ return NULL;
+
++ p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
+ if (p) {
+ p->tbl = tbl;
+ atomic_set(&p->refcnt, 1);
+@@ -1287,6 +1320,7 @@
+ dev_hold(dev);
+ p->dev = dev;
+ }
++ p->net = hold_net(net);
+ p->sysctl_table = NULL;
+ write_lock_bh(&tbl->lock);
+ p->next = tbl->parms.next;
+@@ -1296,6 +1330,20 @@
+ return p;
+ }
+
++struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl,
++ struct net *net)
++{
++ struct neigh_parms *parms;
++ if (net != &init_net) {
++ parms = neigh_parms_alloc(NULL, tbl);
++ release_net(parms->net);
++ parms->net = hold_net(net);
++ }
++ else
++ parms = neigh_parms_clone(&tbl->parms);
++ return parms;
++}
++
+ static void neigh_rcu_free_parms(struct rcu_head *head)
+ {
+ struct neigh_parms *parms =
+@@ -1328,6 +1376,7 @@
+
+ void neigh_parms_destroy(struct neigh_parms *parms)
+ {
++ release_net(parms->net);
+ kfree(parms);
+ }
+
+@@ -1338,6 +1387,7 @@
+ unsigned long now = jiffies;
+ unsigned long phsize;
+
++ tbl->parms.net = &init_net;
+ atomic_set(&tbl->parms.refcnt, 1);
+ INIT_RCU_HEAD(&tbl->parms.rcu_head);
+ tbl->parms.reachable_time =
+@@ -1353,7 +1403,7 @@
+ panic("cannot create neighbour cache statistics");
+
+ #ifdef CONFIG_PROC_FS
+- tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
++ tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
+ if (!tbl->pde)
+ panic("cannot create neighbour proc dir entry");
+ tbl->pde->proc_fops = &neigh_stat_seq_fops;
+@@ -1443,6 +1493,7 @@
+
+ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ndmsg *ndm;
+ struct nlattr *dst_attr;
+ struct neigh_table *tbl;
+@@ -1458,7 +1509,7 @@
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_ifindex) {
+- dev = dev_get_by_index(ndm->ndm_ifindex);
++ dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto out;
+@@ -1477,7 +1528,7 @@
+ goto out_dev_put;
+
+ if (ndm->ndm_flags & NTF_PROXY) {
+- err = pneigh_delete(tbl, nla_data(dst_attr), dev);
++ err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
+ goto out_dev_put;
+ }
+
+@@ -1508,6 +1559,7 @@
+
+ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX+1];
+ struct neigh_table *tbl;
+@@ -1524,7 +1576,7 @@
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_ifindex) {
+- dev = dev_get_by_index(ndm->ndm_ifindex);
++ dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto out;
+@@ -1553,7 +1605,7 @@
+ struct pneigh_entry *pn;
+
+ err = -ENOBUFS;
+- pn = pneigh_lookup(tbl, dst, dev, 1);
++ pn = pneigh_lookup(tbl, net, dst, dev, 1);
+ if (pn) {
+ pn->flags = ndm->ndm_flags;
+ err = 0;
+@@ -1748,19 +1800,6 @@
+ return -EMSGSIZE;
+ }
+
+-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+- int ifindex)
+-{
+- struct neigh_parms *p;
+-
+- for (p = &tbl->parms; p; p = p->next)
+- if ((p->dev && p->dev->ifindex == ifindex) ||
+- (!p->dev && !ifindex))
+- return p;
+-
+- return NULL;
+-}
+-
+ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
+ [NDTA_NAME] = { .type = NLA_STRING },
+ [NDTA_THRESH1] = { .type = NLA_U32 },
+@@ -1788,6 +1827,7 @@
+
+ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct neigh_table *tbl;
+ struct ndtmsg *ndtmsg;
+ struct nlattr *tb[NDTA_MAX+1];
+@@ -1837,7 +1877,7 @@
+ if (tbp[NDTPA_IFINDEX])
+ ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
+
+- p = lookup_neigh_params(tbl, ifindex);
++ p = lookup_neigh_params(tbl, net, ifindex);
+ if (p == NULL) {
+ err = -ENOENT;
+ goto errout_tbl_lock;
+@@ -1912,6 +1952,7 @@
+
+ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int family, tidx, nidx = 0;
+ int tbl_skip = cb->args[0];
+ int neigh_skip = cb->args[1];
+@@ -1931,8 +1972,11 @@
+ NLM_F_MULTI) <= 0)
+ break;
+
+- for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+- if (nidx < neigh_skip)
++ for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
++ if (net != p->net)
++ continue;
++
++ if (nidx++ < neigh_skip)
+ continue;
+
+ if (neightbl_fill_param_info(skb, tbl, p,
+@@ -2003,6 +2047,7 @@
+ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+ struct netlink_callback *cb)
+ {
++ struct net * net = skb->sk->sk_net;
+ struct neighbour *n;
+ int rc, h, s_h = cb->args[1];
+ int idx, s_idx = idx = cb->args[2];
+@@ -2013,8 +2058,12 @@
+ continue;
+ if (h > s_h)
+ s_idx = 0;
+- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
+- if (idx < s_idx)
++ for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
++ int lidx;
++ if (n->dev->nd_net != net)
++ continue;
++ lidx = idx++;
++ if (lidx < s_idx)
+ continue;
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+@@ -2109,6 +2158,7 @@
+ static struct neighbour *neigh_get_first(struct seq_file *seq)
+ {
+ struct neigh_seq_state *state = seq->private;
++ struct net * net = state->net;
+ struct neigh_table *tbl = state->tbl;
+ struct neighbour *n = NULL;
+ int bucket = state->bucket;
+@@ -2118,6 +2168,8 @@
+ n = tbl->hash_buckets[bucket];
+
+ while (n) {
++ if (n->dev->nd_net != net)
++ goto next;
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+@@ -2147,6 +2199,7 @@
+ loff_t *pos)
+ {
+ struct neigh_seq_state *state = seq->private;
++ struct net * net = state->net;
+ struct neigh_table *tbl = state->tbl;
+
+ if (state->neigh_sub_iter) {
+@@ -2158,6 +2211,8 @@
+
+ while (1) {
+ while (n) {
++ if (n->dev->nd_net != net)
++ goto next;
+ if (state->neigh_sub_iter) {
+ void *v = state->neigh_sub_iter(state, n, pos);
+ if (v)
+@@ -2204,6 +2259,7 @@
+ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+ {
+ struct neigh_seq_state *state = seq->private;
++ struct net * net = state->net;
+ struct neigh_table *tbl = state->tbl;
+ struct pneigh_entry *pn = NULL;
+ int bucket = state->bucket;
+@@ -2211,6 +2267,8 @@
+ state->flags |= NEIGH_SEQ_IS_PNEIGH;
+ for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+ pn = tbl->phash_buckets[bucket];
++ while (pn && (pn->net != net))
++ pn = pn->next;
+ if (pn)
+ break;
+ }
+@@ -2224,6 +2282,7 @@
+ loff_t *pos)
+ {
+ struct neigh_seq_state *state = seq->private;
++ struct net * net = state->net;
+ struct neigh_table *tbl = state->tbl;
+
+ pn = pn->next;
+@@ -2231,6 +2290,8 @@
+ if (++state->bucket > PNEIGH_HASHMASK)
+ break;
+ pn = tbl->phash_buckets[state->bucket];
++ while (pn && (pn->net != net))
++ pn = pn->next;
+ if (pn)
+ break;
+ }
+@@ -2433,6 +2494,7 @@
+
+ static void __neigh_notify(struct neighbour *n, int type, int flags)
+ {
++ struct net * net = n->dev->nd_net;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+@@ -2447,10 +2509,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
++ err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_NEIGH, err);
++ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ }
+
+ void neigh_app_ns(struct neighbour *n)
+@@ -2648,6 +2710,7 @@
+
+ if (!t)
+ return -ENOBUFS;
++
+ t->neigh_vars[0].data = &p->mcast_probes;
+ t->neigh_vars[1].data = &p->ucast_probes;
+ t->neigh_vars[2].data = &p->app_probes;
+@@ -2716,7 +2779,7 @@
+ t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
+ t->neigh_root_dir[0].child = t->neigh_proto_dir;
+
+- t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
++ t->sysctl_header = register_net_sysctl_table(p->net, t->neigh_root_dir);
+ if (!t->sysctl_header) {
+ err = -ENOBUFS;
+ goto free_procname;
+@@ -2738,7 +2801,7 @@
+ if (p->sysctl_table) {
+ struct neigh_sysctl_table *t = p->sysctl_table;
+ p->sysctl_table = NULL;
+- unregister_sysctl_table(t->sysctl_header);
++ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t->neigh_dev[0].procname);
+ kfree(t);
+ }
+@@ -2771,6 +2834,7 @@
+ EXPORT_SYMBOL(neigh_lookup);
+ EXPORT_SYMBOL(neigh_lookup_nodev);
+ EXPORT_SYMBOL(neigh_parms_alloc);
++EXPORT_SYMBOL(neigh_parms_alloc_default);
+ EXPORT_SYMBOL(neigh_parms_release);
+ EXPORT_SYMBOL(neigh_rand_reach_time);
+ EXPORT_SYMBOL(neigh_resolve_output);
+diff -Nurb linux-2.6.22-570/net/core/net-sysfs.c linux-2.6.22-591/net/core/net-sysfs.c
+--- linux-2.6.22-570/net/core/net-sysfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/net-sysfs.c 2007-12-21 15:36:15.000000000 -0500
+@@ -13,7 +13,9 @@
+ #include <linux/kernel.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
++#include <linux/nsproxy.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/wireless.h>
+ #include <net/iw_handler.h>
+@@ -29,16 +31,16 @@
+ }
+
+ /* use same locking rules as GIF* ioctl's */
+-static ssize_t netdev_show(const struct device *dev,
++static ssize_t netdev_show(const struct device *device,
+ struct device_attribute *attr, char *buf,
+ ssize_t (*format)(const struct net_device *, char *))
+ {
+- struct net_device *net = to_net_dev(dev);
++ struct net_device *dev = to_net_dev(device);
+ ssize_t ret = -EINVAL;
+
+ read_lock(&dev_base_lock);
+- if (dev_isalive(net))
+- ret = (*format)(net, buf);
++ if (dev_isalive(dev))
++ ret = (*format)(dev, buf);
+ read_unlock(&dev_base_lock);
+
+ return ret;
+@@ -46,9 +48,9 @@
+
+ /* generate a show function for simple field */
+ #define NETDEVICE_SHOW(field, format_string) \
+-static ssize_t format_##field(const struct net_device *net, char *buf) \
++static ssize_t format_##field(const struct net_device *dev, char *buf) \
+ { \
+- return sprintf(buf, format_string, net->field); \
++ return sprintf(buf, format_string, dev->field); \
+ } \
+ static ssize_t show_##field(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+@@ -58,11 +60,11 @@
+
+
+ /* use same locking and permission rules as SIF* ioctl's */
+-static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
++static ssize_t netdev_store(struct device *device, struct device_attribute *attr,
+ const char *buf, size_t len,
+ int (*set)(struct net_device *, unsigned long))
+ {
+- struct net_device *net = to_net_dev(dev);
++ struct net_device *dev = to_net_dev(device);
+ char *endp;
+ unsigned long new;
+ int ret = -EINVAL;
+@@ -75,8 +77,8 @@
+ goto err;
+
+ rtnl_lock();
+- if (dev_isalive(net)) {
+- if ((ret = (*set)(net, new)) == 0)
++ if (dev_isalive(dev)) {
++ if ((ret = (*set)(dev, new)) == 0)
+ ret = len;
+ }
+ rtnl_unlock();
+@@ -103,45 +105,45 @@
+ return cp - buf;
+ }
+
+-static ssize_t show_address(struct device *dev, struct device_attribute *attr,
++static ssize_t show_address(struct device *device, struct device_attribute *attr,
+ char *buf)
+ {
+- struct net_device *net = to_net_dev(dev);
++ struct net_device *dev = to_net_dev(device);
+ ssize_t ret = -EINVAL;
+
+ read_lock(&dev_base_lock);
+- if (dev_isalive(net))
+- ret = format_addr(buf, net->dev_addr, net->addr_len);
++ if (dev_isalive(dev))
++ ret = format_addr(buf, dev->dev_addr, dev->addr_len);
+ read_unlock(&dev_base_lock);
+ return ret;
+ }
+
+-static ssize_t show_broadcast(struct device *dev,
++static ssize_t show_broadcast(struct device *device,
+ struct device_attribute *attr, char *buf)
+ {
+- struct net_device *net = to_net_dev(dev);
+- if (dev_isalive(net))
+- return format_addr(buf, net->broadcast, net->addr_len);
++ struct net_device *dev = to_net_dev(device);
++ if (dev_isalive(dev))
++ return format_addr(buf, dev->broadcast, dev->addr_len);
+ return -EINVAL;
+ }
+
+-static ssize_t show_carrier(struct device *dev,
++static ssize_t show_carrier(struct device *device,
+ struct device_attribute *attr, char *buf)
+ {
+- struct net_device *netdev = to_net_dev(dev);
+- if (netif_running(netdev)) {
+- return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
++ struct net_device *dev = to_net_dev(device);
++ if (netif_running(dev)) {
++ return sprintf(buf, fmt_dec, !!netif_carrier_ok(dev));
+ }
+ return -EINVAL;
+ }
+
+-static ssize_t show_dormant(struct device *dev,
++static ssize_t show_dormant(struct device *device,
+ struct device_attribute *attr, char *buf)
+ {
+- struct net_device *netdev = to_net_dev(dev);
++ struct net_device *dev = to_net_dev(device);
+
+- if (netif_running(netdev))
+- return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
++ if (netif_running(dev))
++ return sprintf(buf, fmt_dec, !!netif_dormant(dev));
+
+ return -EINVAL;
+ }
+@@ -156,15 +158,15 @@
+ "up"
+ };
+
+-static ssize_t show_operstate(struct device *dev,
++static ssize_t show_operstate(struct device *device,
+ struct device_attribute *attr, char *buf)
+ {
+- const struct net_device *netdev = to_net_dev(dev);
++ const struct net_device *dev = to_net_dev(device);
+ unsigned char operstate;
+
+ read_lock(&dev_base_lock);
+- operstate = netdev->operstate;
+- if (!netif_running(netdev))
++ operstate = dev->operstate;
++ if (!netif_running(dev))
+ operstate = IF_OPER_DOWN;
+ read_unlock(&dev_base_lock);
+
+@@ -177,57 +179,57 @@
+ /* read-write attributes */
+ NETDEVICE_SHOW(mtu, fmt_dec);
+
+-static int change_mtu(struct net_device *net, unsigned long new_mtu)
++static int change_mtu(struct net_device *dev, unsigned long new_mtu)
+ {
+- return dev_set_mtu(net, (int) new_mtu);
++ return dev_set_mtu(dev, (int) new_mtu);
+ }
+
+-static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
++static ssize_t store_mtu(struct device *device, struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
+- return netdev_store(dev, attr, buf, len, change_mtu);
++ return netdev_store(device, attr, buf, len, change_mtu);
+ }
+
+ NETDEVICE_SHOW(flags, fmt_hex);
+
+-static int change_flags(struct net_device *net, unsigned long new_flags)
++static int change_flags(struct net_device *dev, unsigned long new_flags)
+ {
+- return dev_change_flags(net, (unsigned) new_flags);
++ return dev_change_flags(dev, (unsigned) new_flags);
+ }
+
+-static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
++static ssize_t store_flags(struct device *device, struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
+- return netdev_store(dev, attr, buf, len, change_flags);
++ return netdev_store(device, attr, buf, len, change_flags);
+ }
+
+ NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+
+-static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
++static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
+ {
+- net->tx_queue_len = new_len;
++ dev->tx_queue_len = new_len;
+ return 0;
+ }
+
+-static ssize_t store_tx_queue_len(struct device *dev,
++static ssize_t store_tx_queue_len(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
+- return netdev_store(dev, attr, buf, len, change_tx_queue_len);
++ return netdev_store(device, attr, buf, len, change_tx_queue_len);
+ }
+
+ NETDEVICE_SHOW(weight, fmt_dec);
+
+-static int change_weight(struct net_device *net, unsigned long new_weight)
++static int change_weight(struct net_device *dev, unsigned long new_weight)
+ {
+- net->weight = new_weight;
++ dev->weight = new_weight;
+ return 0;
+ }
+
+-static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
++static ssize_t store_weight(struct device *device, struct device_attribute *attr,
+ const char *buf, size_t len)
+ {
+- return netdev_store(dev, attr, buf, len, change_weight);
++ return netdev_store(device, attr, buf, len, change_weight);
+ }
+
+ static struct device_attribute net_class_attributes[] = {
+@@ -447,6 +449,23 @@
+ kfree((char *)dev - dev->padded);
+ }
+
++static const void *net_current_tag(void)
++{
++ return current->nsproxy->net_ns;
++}
++
++static const void *net_kobject_tag(struct kobject *kobj)
++{
++ struct net_device *dev;
++ dev = container_of(kobj, struct net_device, dev.kobj);
++ return dev->nd_net;
++}
++
++static const struct shadow_dir_operations net_shadow_dir_operations = {
++ .current_tag = net_current_tag,
++ .kobject_tag = net_kobject_tag,
++};
++
+ static struct class net_class = {
+ .name = "net",
+ .dev_release = netdev_release,
+@@ -454,42 +473,43 @@
+ #ifdef CONFIG_HOTPLUG
+ .dev_uevent = netdev_uevent,
+ #endif
++ .shadow_ops = &net_shadow_dir_operations,
+ };
+
+ /* Delete sysfs entries but hold kobject reference until after all
+ * netdev references are gone.
+ */
+-void netdev_unregister_sysfs(struct net_device * net)
++void netdev_unregister_sysfs(struct net_device * dev)
+ {
+- struct device *dev = &(net->dev);
++ struct device *device = &(dev->dev);
+
+- kobject_get(&dev->kobj);
+- device_del(dev);
++ kobject_get(&device->kobj);
++ device_del(device);
+ }
+
+ /* Create sysfs entries for network device. */
+-int netdev_register_sysfs(struct net_device *net)
++int netdev_register_sysfs(struct net_device *dev)
+ {
+- struct device *dev = &(net->dev);
+- struct attribute_group **groups = net->sysfs_groups;
++ struct device *device = &(dev->dev);
++ struct attribute_group **groups = dev->sysfs_groups;
+
+- device_initialize(dev);
+- dev->class = &net_class;
+- dev->platform_data = net;
+- dev->groups = groups;
++ device_initialize(device);
++ device->class = &net_class;
++ device->platform_data = dev;
++ device->groups = groups;
+
+ BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
+- strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
++ strlcpy(device->bus_id, dev->name, BUS_ID_SIZE);
+
+- if (net->get_stats)
++ if (dev->get_stats)
+ *groups++ = &netstat_group;
+
+ #ifdef CONFIG_WIRELESS_EXT
+- if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
++ if (dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats)
+ *groups++ = &wireless_group;
+ #endif
+
+- return device_add(dev);
++ return device_add(device);
+ }
+
+ int netdev_sysfs_init(void)
+diff -Nurb linux-2.6.22-570/net/core/net_namespace.c linux-2.6.22-591/net/core/net_namespace.c
+--- linux-2.6.22-570/net/core/net_namespace.c 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/net/core/net_namespace.c 2007-12-21 15:36:15.000000000 -0500
+@@ -0,0 +1,332 @@
++#include <linux/workqueue.h>
++#include <linux/rtnetlink.h>
++#include <linux/cache.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/delay.h>
++#include <net/net_namespace.h>
++
++/*
++ * Our network namespace constructor/destructor lists
++ */
++
++static LIST_HEAD(pernet_list);
++static struct list_head *first_device = &pernet_list;
++static DEFINE_MUTEX(net_mutex);
++
++static DEFINE_MUTEX(net_list_mutex);
++LIST_HEAD(net_namespace_list);
++
++static struct kmem_cache *net_cachep;
++
++struct net init_net;
++EXPORT_SYMBOL_GPL(init_net);
++
++void net_lock(void)
++{
++ mutex_lock(&net_list_mutex);
++}
++
++void net_unlock(void)
++{
++ mutex_unlock(&net_list_mutex);
++}
++
++static struct net *net_alloc(void)
++{
++ return kmem_cache_alloc(net_cachep, GFP_KERNEL);
++}
++
++static void net_free(struct net *net)
++{
++ if (!net)
++ return;
++
++ if (unlikely(atomic_read(&net->use_count) != 0)) {
++ printk(KERN_EMERG "network namespace not free! Usage: %d\n",
++ atomic_read(&net->use_count));
++ return;
++ }
++
++ kmem_cache_free(net_cachep, net);
++}
++
++static void cleanup_net(struct work_struct *work)
++{
++ struct pernet_operations *ops;
++ struct list_head *ptr;
++ struct net *net;
++
++ net = container_of(work, struct net, work);
++
++ mutex_lock(&net_mutex);
++
++ /* Don't let anyone else find us. */
++ net_lock();
++ list_del(&net->list);
++ net_unlock();
++
++ /* Run all of the network namespace exit methods */
++ list_for_each_prev(ptr, &pernet_list) {
++ ops = list_entry(ptr, struct pernet_operations, list);
++ if (ops->exit)
++ ops->exit(net);
++ }
++
++ mutex_unlock(&net_mutex);
++
++ /* Ensure there are no outstanding rcu callbacks using this
++ * network namespace.
++ */
++ rcu_barrier();
++
++ /* Finally it is safe to free my network namespace structure */
++ net_free(net);
++}
++
++
++void __put_net(struct net *net)
++{
++ /* Cleanup the network namespace in process context */
++ INIT_WORK(&net->work, cleanup_net);
++ schedule_work(&net->work);
++}
++EXPORT_SYMBOL_GPL(__put_net);
++
++/*
++ * setup_net runs the initializers for the network namespace object.
++ */
++static int setup_net(struct net *net)
++{
++ /* Must be called with net_mutex held */
++ struct pernet_operations *ops;
++ struct list_head *ptr;
++ int error;
++
++ memset(net, 0, sizeof(struct net));
++ atomic_set(&net->count, 1);
++ atomic_set(&net->use_count, 0);
++
++ error = 0;
++ list_for_each(ptr, &pernet_list) {
++ ops = list_entry(ptr, struct pernet_operations, list);
++ if (ops->init) {
++ error = ops->init(net);
++ if (error < 0)
++ goto out_undo;
++ }
++ }
++out:
++ return error;
++out_undo:
++ /* Walk through the list backwards calling the exit functions
++ * for the pernet modules whose init functions did not fail.
++ */
++ for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) {
++ ops = list_entry(ptr, struct pernet_operations, list);
++ if (ops->exit)
++ ops->exit(net);
++ }
++ goto out;
++}
++
++struct net *copy_net_ns(unsigned long flags, struct net *old_net)
++{
++ struct net *new_net = NULL;
++ int err;
++
++ get_net(old_net);
++
++ if (!(flags & CLONE_NEWNET))
++ return old_net;
++
++ err = -EPERM;
++ if (!capable(CAP_SYS_ADMIN))
++ goto out;
++
++ err = -ENOMEM;
++ new_net = net_alloc();
++ if (!new_net)
++ goto out;
++
++ mutex_lock(&net_mutex);
++ err = setup_net(new_net);
++ if (err)
++ goto out_unlock;
++
++ net_lock();
++ list_add_tail(&new_net->list, &net_namespace_list);
++ net_unlock();
++
++
++out_unlock:
++ mutex_unlock(&net_mutex);
++out:
++ put_net(old_net);
++ if (err) {
++ net_free(new_net);
++ new_net = ERR_PTR(err);
++ }
++ return new_net;
++}
++
++static int __init net_ns_init(void)
++{
++ int err;
++
++ printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
++ net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
++ SMP_CACHE_BYTES,
++ SLAB_PANIC, NULL, NULL);
++ mutex_lock(&net_mutex);
++ err = setup_net(&init_net);
++
++ net_lock();
++ list_add_tail(&init_net.list, &net_namespace_list);
++ net_unlock();
++
++ mutex_unlock(&net_mutex);
++ if (err)
++ panic("Could not setup the initial network namespace");
++
++ return 0;
++}
++
++pure_initcall(net_ns_init);
++
++static int register_pernet_operations(struct list_head *list,
++ struct pernet_operations *ops)
++{
++ struct net *net, *undo_net;
++ int error;
++
++ error = 0;
++ list_add_tail(&ops->list, list);
++ for_each_net(net) {
++ if (ops->init) {
++ error = ops->init(net);
++ if (error)
++ goto out_undo;
++ }
++ }
++out:
++ return error;
++
++out_undo:
++ /* If I have an error cleanup all namespaces I initialized */
++ list_del(&ops->list);
++ for_each_net(undo_net) {
++ if (undo_net == net)
++ goto undone;
++ if (ops->exit)
++ ops->exit(undo_net);
++ }
++undone:
++ goto out;
++}
++
++static void unregister_pernet_operations(struct pernet_operations *ops)
++{
++ struct net *net;
++
++ list_del(&ops->list);
++ for_each_net(net)
++ if (ops->exit)
++ ops->exit(net);
++}
++
++/**
++ * register_pernet_subsys - register a network namespace subsystem
++ * @ops: pernet operations structure for the subsystem
++ *
++ * Register a subsystem which has init and exit functions
++ * that are called when network namespaces are created and
++ * destroyed respectively.
++ *
++ * When registered all network namespace init functions are
++ * called for every existing network namespace. Allowing kernel
++ * modules to have a race free view of the set of network namespaces.
++ *
++ * When a new network namespace is created all of the init
++ * methods are called in the order in which they were registered.
++ *
++ * When a network namespace is destroyed all of the exit methods
++ * are called in the reverse of the order with which they were
++ * registered.
++ */
++int register_pernet_subsys(struct pernet_operations *ops)
++{
++ int error;
++ mutex_lock(&net_mutex);
++ error = register_pernet_operations(first_device, ops);
++ mutex_unlock(&net_mutex);
++ return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_subsys);
++
++/**
++ * unregister_pernet_subsys - unregister a network namespace subsystem
++ * @ops: pernet operations structure to manipulate
++ *
++ * Remove the pernet operations structure from the list to be
++ * used when network namespaces are created or destoryed. In
++ * addition run the exit method for all existing network
++ * namespaces.
++ */
++void unregister_pernet_subsys(struct pernet_operations *module)
++{
++ mutex_lock(&net_mutex);
++ unregister_pernet_operations(module);
++ mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
++
++/**
++ * register_pernet_device - register a network namespace device
++ * @ops: pernet operations structure for the subsystem
++ *
++ * Register a device which has init and exit functions
++ * that are called when network namespaces are created and
++ * destroyed respectively.
++ *
++ * When registered all network namespace init functions are
++ * called for every existing network namespace. Allowing kernel
++ * modules to have a race free view of the set of network namespaces.
++ *
++ * When a new network namespace is created all of the init
++ * methods are called in the order in which they were registered.
++ *
++ * When a network namespace is destroyed all of the exit methods
++ * are called in the reverse of the order with which they were
++ * registered.
++ */
++int register_pernet_device(struct pernet_operations *ops)
++{
++ int error;
++ mutex_lock(&net_mutex);
++ error = register_pernet_operations(&pernet_list, ops);
++ if (!error && (first_device == &pernet_list))
++ first_device = &ops->list;
++ mutex_unlock(&net_mutex);
++ return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_device);
++
++/**
++ * unregister_pernet_device - unregister a network namespace netdevice
++ * @ops: pernet operations structure to manipulate
++ *
++ * Remove the pernet operations structure from the list to be
++ * used when network namespaces are created or destoryed. In
++ * addition run the exit method for all existing network
++ * namespaces.
++ */
++void unregister_pernet_device(struct pernet_operations *ops)
++{
++ mutex_lock(&net_mutex);
++ if (&ops->list == first_device)
++ first_device = first_device->next;
++ unregister_pernet_operations(ops);
++ mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_device);
+diff -Nurb linux-2.6.22-570/net/core/netpoll.c linux-2.6.22-591/net/core/netpoll.c
+--- linux-2.6.22-570/net/core/netpoll.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/core/netpoll.c 2007-12-21 15:36:15.000000000 -0500
+@@ -503,7 +503,8 @@
+
+ np->rx_hook(np, ntohs(uh->source),
+ (char *)(uh+1),
+- ulen - sizeof(struct udphdr));
++ ulen - sizeof(struct udphdr),
++ skb);
+
+ kfree_skb(skb);
+ return 1;
+@@ -633,7 +634,7 @@
+ int err;
+
+ if (np->dev_name)
+- ndev = dev_get_by_name(np->dev_name);
++ ndev = dev_get_by_name(&init_net, np->dev_name);
+ if (!ndev) {
+ printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+ np->name, np->dev_name);
+diff -Nurb linux-2.6.22-570/net/core/pktgen.c linux-2.6.22-591/net/core/pktgen.c
+--- linux-2.6.22-570/net/core/pktgen.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/core/pktgen.c 2007-12-21 15:36:15.000000000 -0500
+@@ -155,6 +155,7 @@
+ #include <net/checksum.h>
+ #include <net/ipv6.h>
+ #include <net/addrconf.h>
++#include <net/net_namespace.h>
+ #include <asm/byteorder.h>
+ #include <linux/rcupdate.h>
+ #include <asm/bitops.h>
+@@ -1903,6 +1904,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* It is OK that we do not hold the group lock right now,
+ * as we run under the RTNL lock.
+ */
+@@ -1933,7 +1937,7 @@
+ pkt_dev->odev = NULL;
+ }
+
+- odev = dev_get_by_name(ifname);
++ odev = dev_get_by_name(&init_net, ifname);
+ if (!odev) {
+ printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+ return -ENODEV;
+@@ -3284,6 +3288,8 @@
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
++ set_freezable();
++
+ while (!kthread_should_stop()) {
+ pkt_dev = next_to_run(t);
+
+@@ -3568,7 +3574,7 @@
+
+ printk(version);
+
+- pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
++ pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
+ if (!pg_proc_dir)
+ return -ENODEV;
+ pg_proc_dir->owner = THIS_MODULE;
+@@ -3577,7 +3583,7 @@
+ if (pe == NULL) {
+ printk("pktgen: ERROR: cannot create %s procfs entry.\n",
+ PGCTRL);
+- proc_net_remove(PG_PROC_DIR);
++ proc_net_remove(&init_net, PG_PROC_DIR);
+ return -EINVAL;
+ }
+
+@@ -3600,7 +3606,7 @@
+ printk("pktgen: ERROR: Initialization failed for all threads\n");
+ unregister_netdevice_notifier(&pktgen_notifier_block);
+ remove_proc_entry(PGCTRL, pg_proc_dir);
+- proc_net_remove(PG_PROC_DIR);
++ proc_net_remove(&init_net, PG_PROC_DIR);
+ return -ENODEV;
+ }
+
+@@ -3627,7 +3633,7 @@
+
+ /* Clean up proc file system */
+ remove_proc_entry(PGCTRL, pg_proc_dir);
+- proc_net_remove(PG_PROC_DIR);
++ proc_net_remove(&init_net, PG_PROC_DIR);
+ }
+
+ module_init(pg_init);
+diff -Nurb linux-2.6.22-570/net/core/rtnetlink.c linux-2.6.22-591/net/core/rtnetlink.c
+--- linux-2.6.22-570/net/core/rtnetlink.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/core/rtnetlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -59,7 +59,6 @@
+ };
+
+ static DEFINE_MUTEX(rtnl_mutex);
+-static struct sock *rtnl;
+
+ void rtnl_lock(void)
+ {
+@@ -73,9 +72,17 @@
+
+ void rtnl_unlock(void)
+ {
++ struct net *net;
+ mutex_unlock(&rtnl_mutex);
++
++ net_lock();
++ for_each_net(net) {
++ struct sock *rtnl = net->rtnl;
+ if (rtnl && rtnl->sk_receive_queue.qlen)
+ rtnl->sk_data_ready(rtnl, 0);
++ }
++ net_unlock();
++
+ netdev_run_todo();
+ }
+
+@@ -97,6 +104,19 @@
+ return 0;
+ }
+
++int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
++ struct rtattr *rta, int len)
++{
++ if (RTA_PAYLOAD(rta) < len)
++ return -1;
++ if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) {
++ rta = RTA_DATA(rta) + RTA_ALIGN(len);
++ return rtattr_parse_nested(tb, maxattr, rta);
++ }
++ memset(tb, 0, sizeof(struct rtattr *) * maxattr);
++ return 0;
++}
++
+ static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+ static inline int rtm_msgindex(int msgtype)
+@@ -243,6 +263,143 @@
+
+ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
+
++static LIST_HEAD(link_ops);
++
++/**
++ * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
++ * @ops: struct rtnl_link_ops * to register
++ *
++ * The caller must hold the rtnl_mutex. This function should be used
++ * by drivers that create devices during module initialization. It
++ * must be called before registering the devices.
++ *
++ * Returns 0 on success or a negative error code.
++ */
++int __rtnl_link_register(struct rtnl_link_ops *ops)
++{
++ list_add_tail(&ops->list, &link_ops);
++ return 0;
++}
++
++EXPORT_SYMBOL_GPL(__rtnl_link_register);
++
++/**
++ * rtnl_link_register - Register rtnl_link_ops with rtnetlink.
++ * @ops: struct rtnl_link_ops * to register
++ *
++ * Returns 0 on success or a negative error code.
++ */
++int rtnl_link_register(struct rtnl_link_ops *ops)
++{
++ int err;
++
++ rtnl_lock();
++ err = __rtnl_link_register(ops);
++ rtnl_unlock();
++ return err;
++}
++
++EXPORT_SYMBOL_GPL(rtnl_link_register);
++
++/**
++ * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
++ * @ops: struct rtnl_link_ops * to unregister
++ *
++ * The caller must hold the rtnl_mutex. This function should be used
++ * by drivers that unregister devices during module unloading. It must
++ * be called after unregistering the devices.
++ */
++void __rtnl_link_unregister(struct rtnl_link_ops *ops)
++{
++ list_del(&ops->list);
++}
++
++EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
++
++/**
++ * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
++ * @ops: struct rtnl_link_ops * to unregister
++ */
++void rtnl_link_unregister(struct rtnl_link_ops *ops)
++{
++ rtnl_lock();
++ __rtnl_link_unregister(ops);
++ rtnl_unlock();
++}
++
++EXPORT_SYMBOL_GPL(rtnl_link_unregister);
++
++static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
++{
++ const struct rtnl_link_ops *ops;
++
++ list_for_each_entry(ops, &link_ops, list) {
++ if (!strcmp(ops->kind, kind))
++ return ops;
++ }
++ return NULL;
++}
++
++static size_t rtnl_link_get_size(const struct net_device *dev)
++{
++ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
++ size_t size;
++
++ if (!ops)
++ return 0;
++
++ size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
++ nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
++
++ if (ops->get_size)
++ /* IFLA_INFO_DATA + nested data */
++ size += nlmsg_total_size(sizeof(struct nlattr)) +
++ ops->get_size(dev);
++
++ if (ops->get_xstats_size)
++ size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */
++
++ return size;
++}
++
++static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
++{
++ const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
++ struct nlattr *linkinfo, *data;
++ int err = -EMSGSIZE;
++
++ linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
++ if (linkinfo == NULL)
++ goto out;
++
++ if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
++ goto err_cancel_link;
++ if (ops->fill_xstats) {
++ err = ops->fill_xstats(skb, dev);
++ if (err < 0)
++ goto err_cancel_link;
++ }
++ if (ops->fill_info) {
++ data = nla_nest_start(skb, IFLA_INFO_DATA);
++ if (data == NULL)
++ goto err_cancel_link;
++ err = ops->fill_info(skb, dev);
++ if (err < 0)
++ goto err_cancel_data;
++ nla_nest_end(skb, data);
++ }
++
++ nla_nest_end(skb, linkinfo);
++ return 0;
++
++err_cancel_data:
++ nla_nest_cancel(skb, data);
++err_cancel_link:
++ nla_nest_cancel(skb, linkinfo);
++out:
++ return err;
++}
++
+ static const int rtm_min[RTM_NR_FAMILIES] =
+ {
+ [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+@@ -296,8 +453,9 @@
+ return ret;
+ }
+
+-int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
++int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
+ {
++ struct sock *rtnl = net->rtnl;
+ int err = 0;
+
+ NETLINK_CB(skb).dst_group = group;
+@@ -309,14 +467,17 @@
+ return err;
+ }
+
+-int rtnl_unicast(struct sk_buff *skb, u32 pid)
++int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
+ {
++ struct sock *rtnl = net->rtnl;
++
+ return nlmsg_unicast(rtnl, skb, pid);
+ }
+
+-int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
++int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ struct nlmsghdr *nlh, gfp_t flags)
+ {
++ struct sock *rtnl = net->rtnl;
+ int report = 0;
+
+ if (nlh)
+@@ -325,8 +486,10 @@
+ return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+ }
+
+-void rtnl_set_sk_err(u32 group, int error)
++void rtnl_set_sk_err(struct net *net, u32 group, int error)
+ {
++ struct sock *rtnl = net->rtnl;
++
+ netlink_set_err(rtnl, 0, group, error);
+ }
+
+@@ -437,7 +600,7 @@
+ a->tx_compressed = b->tx_compressed;
+ };
+
+-static inline size_t if_nlmsg_size(void)
++static inline size_t if_nlmsg_size(const struct net_device *dev)
+ {
+ return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+@@ -452,7 +615,8 @@
+ + nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(4) /* IFLA_MASTER */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+- + nla_total_size(1); /* IFLA_LINKMODE */
++ + nla_total_size(1) /* IFLA_LINKMODE */
++ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ }
+
+ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+@@ -522,6 +686,11 @@
+ }
+ }
+
++ if (dev->rtnl_link_ops) {
++ if (rtnl_link_fill(skb, dev) < 0)
++ goto nla_put_failure;
++ }
++
+ return nlmsg_end(skb, nlh);
+
+ nla_put_failure:
+@@ -531,12 +700,13 @@
+
+ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx;
+ int s_idx = cb->args[0];
+ struct net_device *dev;
+
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
+ continue;
+ if (idx < s_idx)
+@@ -555,6 +725,8 @@
+
+ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
++ [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
++ [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
+ [IFLA_MTU] = { .type = NLA_U32 },
+ [IFLA_TXQLEN] = { .type = NLA_U32 },
+@@ -563,44 +735,16 @@
+ [IFLA_LINKMODE] = { .type = NLA_U8 },
+ };
+
+-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+-{
+- struct ifinfomsg *ifm;
+- struct net_device *dev;
+- int err, send_addr_notify = 0, modified = 0;
+- struct nlattr *tb[IFLA_MAX+1];
+- char ifname[IFNAMSIZ];
+-
+- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+- if (err < 0)
+- goto errout;
+-
+- if (tb[IFLA_IFNAME])
+- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+- else
+- ifname[0] = '\0';
+-
+- err = -EINVAL;
+- ifm = nlmsg_data(nlh);
+- if (ifm->ifi_index > 0)
+- dev = dev_get_by_index(ifm->ifi_index);
+- else if (tb[IFLA_IFNAME])
+- dev = dev_get_by_name(ifname);
+- else
+- goto errout;
+-
+- if (dev == NULL) {
+- err = -ENODEV;
+- goto errout;
+- }
+-
+- if (tb[IFLA_ADDRESS] &&
+- nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+- goto errout_dev;
++static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
++ [IFLA_INFO_KIND] = { .type = NLA_STRING },
++ [IFLA_INFO_DATA] = { .type = NLA_NESTED },
++};
+
+- if (tb[IFLA_BROADCAST] &&
+- nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+- goto errout_dev;
++static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
++ struct nlattr **tb, char *ifname, int modified)
++{
++ int send_addr_notify = 0;
++ int err;
+
+ if (tb[IFLA_MAP]) {
+ struct rtnl_link_ifmap *u_map;
+@@ -608,12 +752,12 @@
+
+ if (!dev->set_config) {
+ err = -EOPNOTSUPP;
+- goto errout_dev;
++ goto errout;
+ }
+
+ if (!netif_device_present(dev)) {
+ err = -ENODEV;
+- goto errout_dev;
++ goto errout;
+ }
+
+ u_map = nla_data(tb[IFLA_MAP]);
+@@ -626,7 +770,7 @@
+
+ err = dev->set_config(dev, &k_map);
+ if (err < 0)
+- goto errout_dev;
++ goto errout;
+
+ modified = 1;
+ }
+@@ -637,19 +781,19 @@
+
+ if (!dev->set_mac_address) {
+ err = -EOPNOTSUPP;
+- goto errout_dev;
++ goto errout;
+ }
+
+ if (!netif_device_present(dev)) {
+ err = -ENODEV;
+- goto errout_dev;
++ goto errout;
+ }
+
+ len = sizeof(sa_family_t) + dev->addr_len;
+ sa = kmalloc(len, GFP_KERNEL);
+ if (!sa) {
+ err = -ENOMEM;
+- goto errout_dev;
++ goto errout;
+ }
+ sa->sa_family = dev->type;
+ memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+@@ -657,7 +801,7 @@
+ err = dev->set_mac_address(dev, sa);
+ kfree(sa);
+ if (err)
+- goto errout_dev;
++ goto errout;
+ send_addr_notify = 1;
+ modified = 1;
+ }
+@@ -665,7 +809,7 @@
+ if (tb[IFLA_MTU]) {
+ err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+ if (err < 0)
+- goto errout_dev;
++ goto errout;
+ modified = 1;
+ }
+
+@@ -677,7 +821,7 @@
+ if (ifm->ifi_index > 0 && ifname[0]) {
+ err = dev_change_name(dev, ifname);
+ if (err < 0)
+- goto errout_dev;
++ goto errout;
+ modified = 1;
+ }
+
+@@ -686,7 +830,6 @@
+ send_addr_notify = 1;
+ }
+
+-
+ if (ifm->ifi_flags || ifm->ifi_change) {
+ unsigned int flags = ifm->ifi_flags;
+
+@@ -714,7 +857,7 @@
+
+ err = 0;
+
+-errout_dev:
++errout:
+ if (err < 0 && modified && net_ratelimit())
+ printk(KERN_WARNING "A link change request failed with "
+ "some changes comitted already. Interface %s may "
+@@ -723,14 +866,237 @@
+
+ if (send_addr_notify)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
++ return err;
++}
++
++static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++ struct net *net = skb->sk->sk_net;
++ struct ifinfomsg *ifm;
++ struct net_device *dev;
++ int err;
++ struct nlattr *tb[IFLA_MAX+1];
++ char ifname[IFNAMSIZ];
+
++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++ if (err < 0)
++ goto errout;
++
++ if (tb[IFLA_IFNAME])
++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++ else
++ ifname[0] = '\0';
++
++ err = -EINVAL;
++ ifm = nlmsg_data(nlh);
++ if (ifm->ifi_index > 0)
++ dev = dev_get_by_index(net, ifm->ifi_index);
++ else if (tb[IFLA_IFNAME])
++ dev = dev_get_by_name(net, ifname);
++ else
++ goto errout;
++
++ if (dev == NULL) {
++ err = -ENODEV;
++ goto errout;
++ }
++
++ if (tb[IFLA_ADDRESS] &&
++ nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
++ goto errout_dev;
++
++ if (tb[IFLA_BROADCAST] &&
++ nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
++ goto errout_dev;
++
++ err = do_setlink(dev, ifm, tb, ifname, 0);
++errout_dev:
+ dev_put(dev);
+ errout:
+ return err;
+ }
+
++static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++ struct net *net = skb->sk->sk_net;
++ const struct rtnl_link_ops *ops;
++ struct net_device *dev;
++ struct ifinfomsg *ifm;
++ char ifname[IFNAMSIZ];
++ struct nlattr *tb[IFLA_MAX+1];
++ int err;
++
++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++ if (err < 0)
++ return err;
++
++ if (tb[IFLA_IFNAME])
++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++
++ ifm = nlmsg_data(nlh);
++ if (ifm->ifi_index > 0)
++ dev = __dev_get_by_index(net, ifm->ifi_index);
++ else if (tb[IFLA_IFNAME])
++ dev = __dev_get_by_name(net, ifname);
++ else
++ return -EINVAL;
++
++ if (!dev)
++ return -ENODEV;
++
++ ops = dev->rtnl_link_ops;
++ if (!ops)
++ return -EOPNOTSUPP;
++
++ ops->dellink(dev);
++ return 0;
++}
++
++static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++ struct net *net = skb->sk->sk_net;
++ const struct rtnl_link_ops *ops;
++ struct net_device *dev;
++ struct ifinfomsg *ifm;
++ char kind[MODULE_NAME_LEN];
++ char ifname[IFNAMSIZ];
++ struct nlattr *tb[IFLA_MAX+1];
++ struct nlattr *linkinfo[IFLA_INFO_MAX+1];
++ int err;
++
++replay:
++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++ if (err < 0)
++ return err;
++
++ if (tb[IFLA_IFNAME])
++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++ else
++ ifname[0] = '\0';
++
++ ifm = nlmsg_data(nlh);
++ if (ifm->ifi_index > 0)
++ dev = __dev_get_by_index(net, ifm->ifi_index);
++ else if (ifname[0])
++ dev = __dev_get_by_name(net, ifname);
++ else
++ dev = NULL;
++
++ if (tb[IFLA_LINKINFO]) {
++ err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
++ tb[IFLA_LINKINFO], ifla_info_policy);
++ if (err < 0)
++ return err;
++ } else
++ memset(linkinfo, 0, sizeof(linkinfo));
++
++ if (linkinfo[IFLA_INFO_KIND]) {
++ nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
++ ops = rtnl_link_ops_get(kind);
++ } else {
++ kind[0] = '\0';
++ ops = NULL;
++ }
++
++ if (1) {
++ struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
++
++ if (ops) {
++ if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
++ err = nla_parse_nested(attr, ops->maxtype,
++ linkinfo[IFLA_INFO_DATA],
++ ops->policy);
++ if (err < 0)
++ return err;
++ data = attr;
++ }
++ if (ops->validate) {
++ err = ops->validate(tb, data);
++ if (err < 0)
++ return err;
++ }
++ }
++
++ if (dev) {
++ int modified = 0;
++
++ if (nlh->nlmsg_flags & NLM_F_EXCL)
++ return -EEXIST;
++ if (nlh->nlmsg_flags & NLM_F_REPLACE)
++ return -EOPNOTSUPP;
++
++ if (linkinfo[IFLA_INFO_DATA]) {
++ if (!ops || ops != dev->rtnl_link_ops ||
++ !ops->changelink)
++ return -EOPNOTSUPP;
++
++ err = ops->changelink(dev, tb, data);
++ if (err < 0)
++ return err;
++ modified = 1;
++ }
++
++ return do_setlink(dev, ifm, tb, ifname, modified);
++ }
++
++ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
++ return -ENODEV;
++
++ if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
++ return -EOPNOTSUPP;
++ if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] ||
++ tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
++ return -EOPNOTSUPP;
++
++ if (!ops) {
++#ifdef CONFIG_KMOD
++ if (kind[0]) {
++ __rtnl_unlock();
++ request_module("rtnl-link-%s", kind);
++ rtnl_lock();
++ ops = rtnl_link_ops_get(kind);
++ if (ops)
++ goto replay;
++ }
++#endif
++ return -EOPNOTSUPP;
++ }
++
++ if (!ifname[0])
++ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
++ dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
++ if (!dev)
++ return -ENOMEM;
++
++ if (strchr(dev->name, '%')) {
++ err = dev_alloc_name(dev, dev->name);
++ if (err < 0)
++ goto err_free;
++ }
++ dev->rtnl_link_ops = ops;
++
++ if (tb[IFLA_MTU])
++ dev->mtu = nla_get_u32(tb[IFLA_MTU]);
++ if (tb[IFLA_TXQLEN])
++ dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
++ if (tb[IFLA_WEIGHT])
++ dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
++ if (tb[IFLA_OPERSTATE])
++ set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
++ if (tb[IFLA_LINKMODE])
++ dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
++
++ err = ops->newlink(dev, tb, data);
++err_free:
++ if (err < 0)
++ free_netdev(dev);
++ return err;
++ }
++}
++
+ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifinfomsg *ifm;
+ struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *dev = NULL;
+@@ -743,13 +1109,13 @@
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_index > 0) {
+- dev = dev_get_by_index(ifm->ifi_index);
++ dev = dev_get_by_index(net, ifm->ifi_index);
+ if (dev == NULL)
+ return -ENODEV;
+ } else
+ return -EINVAL;
+
+- nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
++ nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ if (nskb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+@@ -763,7 +1129,7 @@
+ kfree_skb(nskb);
+ goto errout;
+ }
+- err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
++ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+ errout:
+ dev_put(dev);
+
+@@ -796,13 +1162,14 @@
+
+ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+ {
++ struct net *net = dev->nd_net;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ if (!nx_dev_visible(current->nx_info, dev))
+ return;
+
+- skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
++ skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ if (skb == NULL)
+ goto errout;
+
+@@ -813,10 +1180,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
++ err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_LINK, err);
++ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ }
+
+ /* Protected by RTNL sempahore. */
+@@ -827,6 +1194,7 @@
+
+ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+ {
++ struct net *net = skb->sk->sk_net;
+ rtnl_doit_func doit;
+ int sz_idx, kind;
+ int min_len;
+@@ -855,6 +1223,7 @@
+ return -EPERM;
+
+ if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
++ struct sock *rtnl;
+ rtnl_dumpit_func dumpit;
+
+ dumpit = rtnl_get_dumpit(family, type);
+@@ -862,6 +1231,7 @@
+ return -EOPNOTSUPP;
+
+ __rtnl_unlock();
++ rtnl = net->rtnl;
+ err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ rtnl_lock();
+ return err;
+@@ -911,6 +1281,10 @@
+ static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
+ struct net_device *dev = ptr;
++
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+@@ -936,6 +1310,36 @@
+ .notifier_call = rtnetlink_event,
+ };
+
++
++static int rtnetlink_net_init(struct net *net)
++{
++ struct sock *sk;
++ sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
++ rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
++ if (!sk)
++ return -ENOMEM;
++
++ /* Don't hold an extra reference on the namespace */
++ put_net(sk->sk_net);
++ net->rtnl = sk;
++ return 0;
++}
++
++static void rtnetlink_net_exit(struct net *net)
++{
++ /* At the last minute lie and say this is a socket for the
++ * initial network namespace. So the socket will be safe to
++ * free.
++ */
++ net->rtnl->sk_net = get_net(&init_net);
++ sock_put(net->rtnl);
++}
++
++static struct pernet_operations rtnetlink_net_ops = {
++ .init = rtnetlink_net_init,
++ .exit = rtnetlink_net_exit,
++};
++
+ void __init rtnetlink_init(void)
+ {
+ int i;
+@@ -948,15 +1352,16 @@
+ if (!rta_buf)
+ panic("rtnetlink_init: cannot allocate rta_buf\n");
+
+- rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+- &rtnl_mutex, THIS_MODULE);
+- if (rtnl == NULL)
++ if (register_pernet_subsys(&rtnetlink_net_ops))
+ panic("rtnetlink_init: cannot initialize rtnetlink\n");
++
+ netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+ register_netdevice_notifier(&rtnetlink_dev_notifier);
+
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
++ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
++ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
+
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
+@@ -965,6 +1370,7 @@
+ EXPORT_SYMBOL(__rta_fill);
+ EXPORT_SYMBOL(rtattr_strlcpy);
+ EXPORT_SYMBOL(rtattr_parse);
++EXPORT_SYMBOL(__rtattr_parse_nested_compat);
+ EXPORT_SYMBOL(rtnetlink_put_metrics);
+ EXPORT_SYMBOL(rtnl_lock);
+ EXPORT_SYMBOL(rtnl_trylock);
+diff -Nurb linux-2.6.22-570/net/core/skbuff.c linux-2.6.22-591/net/core/skbuff.c
+--- linux-2.6.22-570/net/core/skbuff.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/core/skbuff.c 2007-12-21 15:36:12.000000000 -0500
+@@ -417,6 +417,7 @@
+ C(csum);
+ C(local_df);
+ n->cloned = 1;
++ n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+ n->nohdr = 0;
+ C(pkt_type);
+ C(ip_summed);
+@@ -681,6 +682,7 @@
+ skb->network_header += off;
+ skb->mac_header += off;
+ skb->cloned = 0;
++ skb->hdr_len = 0;
+ skb->nohdr = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+ return 0;
+@@ -2012,13 +2014,13 @@
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ sizeof(struct sk_buff),
+ 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY,
+ NULL, NULL);
+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ (2*sizeof(struct sk_buff)) +
+ sizeof(atomic_t),
+ 0,
+- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY,
+ NULL, NULL);
+ }
+
+diff -Nurb linux-2.6.22-570/net/core/sock.c linux-2.6.22-591/net/core/sock.c
+--- linux-2.6.22-570/net/core/sock.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/core/sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -123,6 +123,7 @@
+ #include <net/sock.h>
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
++#include <net/net_namespace.h>
+
+ #include <linux/filter.h>
+ #include <linux/vs_socket.h>
+@@ -360,6 +361,7 @@
+ char __user *optval, int optlen)
+ {
+ struct sock *sk=sock->sk;
++ struct net *net = sk->sk_net;
+ struct sk_filter *filter;
+ int val;
+ int valbool;
+@@ -614,7 +616,7 @@
+ if (devname[0] == '\0') {
+ sk->sk_bound_dev_if = 0;
+ } else {
+- struct net_device *dev = dev_get_by_name(devname);
++ struct net_device *dev = dev_get_by_name(net, devname);
+ if (!dev) {
+ ret = -ENODEV;
+ break;
+@@ -867,7 +869,7 @@
+ * @prot: struct proto associated with this new sock instance
+ * @zero_it: if we should zero the newly allocated sock
+ */
+-struct sock *sk_alloc(int family, gfp_t priority,
++struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+ struct proto *prot, int zero_it)
+ {
+ struct sock *sk = NULL;
+@@ -888,6 +890,7 @@
+ */
+ sk->sk_prot = sk->sk_prot_creator = prot;
+ sock_lock_init(sk);
++ sk->sk_net = get_net(net);
+ }
+ sock_vx_init(sk);
+ sock_nx_init(sk);
+@@ -929,6 +932,7 @@
+ __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+
+ security_sk_free(sk);
++ put_net(sk->sk_net);
+ vx_sock_dec(sk);
+ clr_vx_info(&sk->sk_vx_info);
+ sk->sk_xid = -1;
+@@ -943,7 +947,7 @@
+
+ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+ {
+- struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
++ struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
+
+ if (newsk != NULL) {
+ struct sk_filter *filter;
+@@ -2017,7 +2021,7 @@
+ static int __init proto_init(void)
+ {
+ /* register /proc/net/protocols */
+- return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
++ return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ }
+
+ subsys_initcall(proto_init);
+diff -Nurb linux-2.6.22-570/net/core/sysctl_net_core.c linux-2.6.22-591/net/core/sysctl_net_core.c
+--- linux-2.6.22-570/net/core/sysctl_net_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/core/sysctl_net_core.c 2007-12-21 15:36:15.000000000 -0500
+@@ -9,25 +9,10 @@
+ #include <linux/sysctl.h>
+ #include <linux/module.h>
+ #include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <net/xfrm.h>
+ #include <net/sock.h>
+
+-#ifdef CONFIG_SYSCTL
+-
+-extern int netdev_max_backlog;
+-extern int weight_p;
+-
+-extern __u32 sysctl_wmem_max;
+-extern __u32 sysctl_rmem_max;
+-
+-extern int sysctl_core_destroy_delay;
+-
+-#ifdef CONFIG_XFRM
+-extern u32 sysctl_xfrm_aevent_etime;
+-extern u32 sysctl_xfrm_aevent_rseqth;
+-extern int sysctl_xfrm_larval_drop;
+-extern u32 sysctl_xfrm_acq_expires;
+-#endif
+-
+ ctl_table core_table[] = {
+ #ifdef CONFIG_NET
+ {
+@@ -103,11 +88,32 @@
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
++#endif /* CONFIG_NET */
++ {
++ .ctl_name = NET_CORE_BUDGET,
++ .procname = "netdev_budget",
++ .data = &netdev_budget,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_CORE_WARNINGS,
++ .procname = "warnings",
++ .data = &net_msg_warn,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ { .ctl_name = 0 }
++};
++
++struct ctl_table multi_core_table[] = {
+ #ifdef CONFIG_XFRM
+ {
+ .ctl_name = NET_CORE_AEVENT_ETIME,
+ .procname = "xfrm_aevent_etime",
+- .data = &sysctl_xfrm_aevent_etime,
++ .data = &init_net.sysctl_xfrm_aevent_etime,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+@@ -115,7 +121,7 @@
+ {
+ .ctl_name = NET_CORE_AEVENT_RSEQTH,
+ .procname = "xfrm_aevent_rseqth",
+- .data = &sysctl_xfrm_aevent_rseqth,
++ .data = &init_net.sysctl_xfrm_aevent_rseqth,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+@@ -123,7 +129,7 @@
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "xfrm_larval_drop",
+- .data = &sysctl_xfrm_larval_drop,
++ .data = &init_net.sysctl_xfrm_larval_drop,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+@@ -131,38 +137,19 @@
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "xfrm_acq_expires",
+- .data = &sysctl_xfrm_acq_expires,
++ .data = &init_net.sysctl_xfrm_acq_expires,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ #endif /* CONFIG_XFRM */
+-#endif /* CONFIG_NET */
+ {
+ .ctl_name = NET_CORE_SOMAXCONN,
+ .procname = "somaxconn",
+- .data = &sysctl_somaxconn,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_CORE_BUDGET,
+- .procname = "netdev_budget",
+- .data = &netdev_budget,
++ .data = &init_net.sysctl_somaxconn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+- {
+- .ctl_name = NET_CORE_WARNINGS,
+- .procname = "warnings",
+- .data = &net_msg_warn,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- { .ctl_name = 0 }
++ {}
+ };
+-
+-#endif
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.c linux-2.6.22-591/net/dccp/ccids/ccid3.c
+--- linux-2.6.22-570/net/dccp/ccids/ccid3.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ccids/ccid3.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/dccp/ccids/ccid3.c
+ *
+- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ *
+ * An implementation of the DCCP protocol
+ *
+@@ -49,7 +49,6 @@
+
+ static struct dccp_tx_hist *ccid3_tx_hist;
+ static struct dccp_rx_hist *ccid3_rx_hist;
+-static struct dccp_li_hist *ccid3_li_hist;
+
+ /*
+ * Transmitter Half-Connection Routines
+@@ -194,25 +193,20 @@
+ * The algorithm is not applicable if RTT < 4 microseconds.
+ */
+ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
+- struct timeval *now)
++ ktime_t now)
+ {
+- suseconds_t delta;
+ u32 quarter_rtts;
+
+ if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */
+ return;
+
+- delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
+- DCCP_BUG_ON(delta < 0);
+-
+- quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);
++ quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
++ quarter_rtts /= hctx->ccid3hctx_rtt / 4;
+
+ if (quarter_rtts > 0) {
+- hctx->ccid3hctx_t_last_win_count = *now;
++ hctx->ccid3hctx_t_last_win_count = now;
+ hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5);
+ hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */
+-
+- ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
+ }
+ }
+
+@@ -312,8 +306,8 @@
+ {
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+- struct timeval now;
+- suseconds_t delay;
++ ktime_t now = ktime_get_real();
++ s64 delay;
+
+ BUG_ON(hctx == NULL);
+
+@@ -325,8 +319,6 @@
+ if (unlikely(skb->len == 0))
+ return -EBADMSG;
+
+- dccp_timestamp(sk, &now);
+-
+ switch (hctx->ccid3hctx_state) {
+ case TFRC_SSTATE_NO_SENT:
+ sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+@@ -349,7 +341,7 @@
+ ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+ hctx->ccid3hctx_rtt = dp->dccps_syn_rtt;
+ hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
+- hctx->ccid3hctx_t_ld = now;
++ hctx->ccid3hctx_t_ld = ktime_to_timeval(now);
+ } else {
+ /* Sender does not have RTT sample: X = MSS/second */
+ hctx->ccid3hctx_x = dp->dccps_mss_cache;
+@@ -361,7 +353,7 @@
+ break;
+ case TFRC_SSTATE_NO_FBACK:
+ case TFRC_SSTATE_FBACK:
+- delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
++ delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now);
+ ccid3_pr_debug("delay=%ld\n", (long)delay);
+ /*
+ * Scheduling of packet transmissions [RFC 3448, 4.6]
+@@ -371,10 +363,10 @@
+ * else
+ * // send the packet in (t_nom - t_now) milliseconds.
+ */
+- if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
+- return delay / 1000L;
++ if (delay - (s64)hctx->ccid3hctx_delta >= 1000)
++ return (u32)delay / 1000L;
+
+- ccid3_hc_tx_update_win_count(hctx, &now);
++ ccid3_hc_tx_update_win_count(hctx, now);
+ break;
+ case TFRC_SSTATE_TERM:
+ DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+@@ -387,8 +379,8 @@
+ hctx->ccid3hctx_idle = 0;
+
+ /* set the nominal send time for the next following packet */
+- timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+-
++ hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
++ hctx->ccid3hctx_t_ipi);
+ return 0;
+ }
+
+@@ -819,154 +811,6 @@
+ return 0;
+ }
+
+-/* calculate first loss interval
+- *
+- * returns estimated loss interval in usecs */
+-
+-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+-{
+- struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+- struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+- u32 x_recv, p;
+- suseconds_t rtt, delta;
+- struct timeval tstamp = { 0, };
+- int interval = 0;
+- int win_count = 0;
+- int step = 0;
+- u64 fval;
+-
+- list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
+- dccphrx_node) {
+- if (dccp_rx_hist_entry_data_packet(entry)) {
+- tail = entry;
+-
+- switch (step) {
+- case 0:
+- tstamp = entry->dccphrx_tstamp;
+- win_count = entry->dccphrx_ccval;
+- step = 1;
+- break;
+- case 1:
+- interval = win_count - entry->dccphrx_ccval;
+- if (interval < 0)
+- interval += TFRC_WIN_COUNT_LIMIT;
+- if (interval > 4)
+- goto found;
+- break;
+- }
+- }
+- }
+-
+- if (unlikely(step == 0)) {
+- DCCP_WARN("%s(%p), packet history has no data packets!\n",
+- dccp_role(sk), sk);
+- return ~0;
+- }
+-
+- if (unlikely(interval == 0)) {
+- DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
+- "Defaulting to 1\n", dccp_role(sk), sk);
+- interval = 1;
+- }
+-found:
+- if (!tail) {
+- DCCP_CRIT("tail is null\n");
+- return ~0;
+- }
+-
+- delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+- DCCP_BUG_ON(delta < 0);
+-
+- rtt = delta * 4 / interval;
+- ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
+- dccp_role(sk), sk, (int)rtt);
+-
+- /*
+- * Determine the length of the first loss interval via inverse lookup.
+- * Assume that X_recv can be computed by the throughput equation
+- * s
+- * X_recv = --------
+- * R * fval
+- * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
+- */
+- if (rtt == 0) { /* would result in divide-by-zero */
+- DCCP_WARN("RTT==0\n");
+- return ~0;
+- }
+-
+- dccp_timestamp(sk, &tstamp);
+- delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+- DCCP_BUG_ON(delta <= 0);
+-
+- x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+- if (x_recv == 0) { /* would also trigger divide-by-zero */
+- DCCP_WARN("X_recv==0\n");
+- if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+- DCCP_BUG("stored value of X_recv is zero");
+- return ~0;
+- }
+- }
+-
+- fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
+- fval = scaled_div32(fval, x_recv);
+- p = tfrc_calc_x_reverse_lookup(fval);
+-
+- ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
+- "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+-
+- if (p == 0)
+- return ~0;
+- else
+- return 1000000 / p;
+-}
+-
+-static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+-{
+- struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+- struct dccp_li_hist_entry *head;
+- u64 seq_temp;
+-
+- if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+- if (!dccp_li_hist_interval_new(ccid3_li_hist,
+- &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+- return;
+-
+- head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+- struct dccp_li_hist_entry, dccplih_node);
+- head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+- } else {
+- struct dccp_li_hist_entry *entry;
+- struct list_head *tail;
+-
+- head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+- struct dccp_li_hist_entry, dccplih_node);
+- /* FIXME win count check removed as was wrong */
+- /* should make this check with receive history */
+- /* and compare there as per section 10.2 of RFC4342 */
+-
+- /* new loss event detected */
+- /* calculate last interval length */
+- seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+- entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
+-
+- if (entry == NULL) {
+- DCCP_BUG("out of memory - can not allocate entry");
+- return;
+- }
+-
+- list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+-
+- tail = hcrx->ccid3hcrx_li_hist.prev;
+- list_del(tail);
+- kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+-
+- /* Create the newest interval */
+- entry->dccplih_seqno = seq_loss;
+- entry->dccplih_interval = seq_temp;
+- entry->dccplih_win_count = win_loss;
+- }
+-}
+-
+ static int ccid3_hc_rx_detect_loss(struct sock *sk,
+ struct dccp_rx_hist_entry *packet)
+ {
+@@ -992,7 +836,14 @@
+ while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+ > TFRC_RECV_NUM_LATE_LOSS) {
+ loss = 1;
+- ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
++ dccp_li_update_li(sk,
++ &hcrx->ccid3hcrx_li_hist,
++ &hcrx->ccid3hcrx_hist,
++ &hcrx->ccid3hcrx_tstamp_last_feedback,
++ hcrx->ccid3hcrx_s,
++ hcrx->ccid3hcrx_bytes_recv,
++ hcrx->ccid3hcrx_x_recv,
++ hcrx->ccid3hcrx_seqno_nonloss,
+ hcrx->ccid3hcrx_ccval_nonloss);
+ tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+ dccp_inc_seqno(&tmp_seqno);
+@@ -1152,7 +1003,7 @@
+ dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+
+ /* Empty loss interval history */
+- dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
++ dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
+ }
+
+ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
+@@ -1236,19 +1087,12 @@
+ if (ccid3_tx_hist == NULL)
+ goto out_free_rx;
+
+- ccid3_li_hist = dccp_li_hist_new("ccid3");
+- if (ccid3_li_hist == NULL)
+- goto out_free_tx;
+-
+ rc = ccid_register(&ccid3);
+ if (rc != 0)
+- goto out_free_loss_interval_history;
++ goto out_free_tx;
+ out:
+ return rc;
+
+-out_free_loss_interval_history:
+- dccp_li_hist_delete(ccid3_li_hist);
+- ccid3_li_hist = NULL;
+ out_free_tx:
+ dccp_tx_hist_delete(ccid3_tx_hist);
+ ccid3_tx_hist = NULL;
+@@ -1271,10 +1115,6 @@
+ dccp_rx_hist_delete(ccid3_rx_hist);
+ ccid3_rx_hist = NULL;
+ }
+- if (ccid3_li_hist != NULL) {
+- dccp_li_hist_delete(ccid3_li_hist);
+- ccid3_li_hist = NULL;
+- }
+ }
+ module_exit(ccid3_module_exit);
+
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.h linux-2.6.22-591/net/dccp/ccids/ccid3.h
+--- linux-2.6.22-570/net/dccp/ccids/ccid3.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ccids/ccid3.h 2007-12-21 15:36:12.000000000 -0500
+@@ -36,6 +36,7 @@
+ #ifndef _DCCP_CCID3_H_
+ #define _DCCP_CCID3_H_
+
++#include <linux/ktime.h>
+ #include <linux/list.h>
+ #include <linux/time.h>
+ #include <linux/types.h>
+@@ -108,10 +109,10 @@
+ enum ccid3_hc_tx_states ccid3hctx_state:8;
+ u8 ccid3hctx_last_win_count;
+ u8 ccid3hctx_idle;
+- struct timeval ccid3hctx_t_last_win_count;
++ ktime_t ccid3hctx_t_last_win_count;
+ struct timer_list ccid3hctx_no_feedback_timer;
+ struct timeval ccid3hctx_t_ld;
+- struct timeval ccid3hctx_t_nom;
++ ktime_t ccid3hctx_t_nom;
+ u32 ccid3hctx_delta;
+ struct list_head ccid3hctx_hist;
+ struct ccid3_options_received ccid3hctx_options_received;
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c
+--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/dccp/ccids/lib/loss_interval.c
+ *
+- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+@@ -15,58 +15,38 @@
+ #include <net/sock.h>
+ #include "../../dccp.h"
+ #include "loss_interval.h"
++#include "packet_history.h"
++#include "tfrc.h"
+
+-struct dccp_li_hist *dccp_li_hist_new(const char *name)
+-{
+- struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+- static const char dccp_li_hist_mask[] = "li_hist_%s";
+- char *slab_name;
+-
+- if (hist == NULL)
+- goto out;
+-
+- slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
+- GFP_ATOMIC);
+- if (slab_name == NULL)
+- goto out_free_hist;
++#define DCCP_LI_HIST_IVAL_F_LENGTH 8
+
+- sprintf(slab_name, dccp_li_hist_mask, name);
+- hist->dccplih_slab = kmem_cache_create(slab_name,
+- sizeof(struct dccp_li_hist_entry),
+- 0, SLAB_HWCACHE_ALIGN,
+- NULL, NULL);
+- if (hist->dccplih_slab == NULL)
+- goto out_free_slab_name;
+-out:
+- return hist;
+-out_free_slab_name:
+- kfree(slab_name);
+-out_free_hist:
+- kfree(hist);
+- hist = NULL;
+- goto out;
+-}
++struct dccp_li_hist_entry {
++ struct list_head dccplih_node;
++ u64 dccplih_seqno:48,
++ dccplih_win_count:4;
++ u32 dccplih_interval;
++};
+
+-EXPORT_SYMBOL_GPL(dccp_li_hist_new);
++struct kmem_cache *dccp_li_cachep __read_mostly;
+
+-void dccp_li_hist_delete(struct dccp_li_hist *hist)
++static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
+ {
+- const char* name = kmem_cache_name(hist->dccplih_slab);
+-
+- kmem_cache_destroy(hist->dccplih_slab);
+- kfree(name);
+- kfree(hist);
++ return kmem_cache_alloc(dccp_li_cachep, prio);
+ }
+
+-EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
++static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry)
++{
++ if (entry != NULL)
++ kmem_cache_free(dccp_li_cachep, entry);
++}
+
+-void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
++void dccp_li_hist_purge(struct list_head *list)
+ {
+ struct dccp_li_hist_entry *entry, *next;
+
+ list_for_each_entry_safe(entry, next, list, dccplih_node) {
+ list_del_init(&entry->dccplih_node);
+- kmem_cache_free(hist->dccplih_slab, entry);
++ kmem_cache_free(dccp_li_cachep, entry);
+ }
+ }
+
+@@ -118,16 +98,16 @@
+
+ EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
+
+-int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+- struct list_head *list, const u64 seq_loss, const u8 win_loss)
++static int dccp_li_hist_interval_new(struct list_head *list,
++ const u64 seq_loss, const u8 win_loss)
+ {
+ struct dccp_li_hist_entry *entry;
+ int i;
+
+ for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
+- entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
++ entry = dccp_li_hist_entry_new(GFP_ATOMIC);
+ if (entry == NULL) {
+- dccp_li_hist_purge(hist, list);
++ dccp_li_hist_purge(list);
+ DCCP_BUG("loss interval list entry is NULL");
+ return 0;
+ }
+@@ -140,4 +120,176 @@
+ return 1;
+ }
+
+-EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
++/* calculate first loss interval
++ *
++ * returns estimated loss interval in usecs */
++static u32 dccp_li_calc_first_li(struct sock *sk,
++ struct list_head *hist_list,
++ struct timeval *last_feedback,
++ u16 s, u32 bytes_recv,
++ u32 previous_x_recv)
++{
++ struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
++ u32 x_recv, p;
++ suseconds_t rtt, delta;
++ struct timeval tstamp = { 0, 0 };
++ int interval = 0;
++ int win_count = 0;
++ int step = 0;
++ u64 fval;
++
++ list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
++ if (dccp_rx_hist_entry_data_packet(entry)) {
++ tail = entry;
++
++ switch (step) {
++ case 0:
++ tstamp = entry->dccphrx_tstamp;
++ win_count = entry->dccphrx_ccval;
++ step = 1;
++ break;
++ case 1:
++ interval = win_count - entry->dccphrx_ccval;
++ if (interval < 0)
++ interval += TFRC_WIN_COUNT_LIMIT;
++ if (interval > 4)
++ goto found;
++ break;
++ }
++ }
++ }
++
++ if (unlikely(step == 0)) {
++ DCCP_WARN("%s(%p), packet history has no data packets!\n",
++ dccp_role(sk), sk);
++ return ~0;
++ }
++
++ if (unlikely(interval == 0)) {
++ DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
++ "Defaulting to 1\n", dccp_role(sk), sk);
++ interval = 1;
++ }
++found:
++ if (!tail) {
++ DCCP_CRIT("tail is null\n");
++ return ~0;
++ }
++
++ delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
++ DCCP_BUG_ON(delta < 0);
++
++ rtt = delta * 4 / interval;
++ dccp_pr_debug("%s(%p), approximated RTT to %dus\n",
++ dccp_role(sk), sk, (int)rtt);
++
++ /*
++ * Determine the length of the first loss interval via inverse lookup.
++ * Assume that X_recv can be computed by the throughput equation
++ * s
++ * X_recv = --------
++ * R * fval
++ * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
++ */
++ if (rtt == 0) { /* would result in divide-by-zero */
++ DCCP_WARN("RTT==0\n");
++ return ~0;
++ }
++
++ dccp_timestamp(sk, &tstamp);
++ delta = timeval_delta(&tstamp, last_feedback);
++ DCCP_BUG_ON(delta <= 0);
++
++ x_recv = scaled_div32(bytes_recv, delta);
++ if (x_recv == 0) { /* would also trigger divide-by-zero */
++ DCCP_WARN("X_recv==0\n");
++ if (previous_x_recv == 0) {
++ DCCP_BUG("stored value of X_recv is zero");
++ return ~0;
++ }
++ x_recv = previous_x_recv;
++ }
++
++ fval = scaled_div(s, rtt);
++ fval = scaled_div32(fval, x_recv);
++ p = tfrc_calc_x_reverse_lookup(fval);
++
++ dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
++ "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
++
++ if (p == 0)
++ return ~0;
++ else
++ return 1000000 / p;
++}
++
++void dccp_li_update_li(struct sock *sk,
++ struct list_head *li_hist_list,
++ struct list_head *hist_list,
++ struct timeval *last_feedback, u16 s, u32 bytes_recv,
++ u32 previous_x_recv, u64 seq_loss, u8 win_loss)
++{
++ struct dccp_li_hist_entry *head;
++ u64 seq_temp;
++
++ if (list_empty(li_hist_list)) {
++ if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
++ win_loss))
++ return;
++
++ head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
++ dccplih_node);
++ head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
++ last_feedback,
++ s, bytes_recv,
++ previous_x_recv);
++ } else {
++ struct dccp_li_hist_entry *entry;
++ struct list_head *tail;
++
++ head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
++ dccplih_node);
++ /* FIXME win count check removed as was wrong */
++ /* should make this check with receive history */
++ /* and compare there as per section 10.2 of RFC4342 */
++
++ /* new loss event detected */
++ /* calculate last interval length */
++ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
++ entry = dccp_li_hist_entry_new(GFP_ATOMIC);
++
++ if (entry == NULL) {
++ DCCP_BUG("out of memory - can not allocate entry");
++ return;
++ }
++
++ list_add(&entry->dccplih_node, li_hist_list);
++
++ tail = li_hist_list->prev;
++ list_del(tail);
++ kmem_cache_free(dccp_li_cachep, tail);
++
++ /* Create the newest interval */
++ entry->dccplih_seqno = seq_loss;
++ entry->dccplih_interval = seq_temp;
++ entry->dccplih_win_count = win_loss;
++ }
++}
++
++EXPORT_SYMBOL_GPL(dccp_li_update_li);
++
++static __init int dccp_li_init(void)
++{
++ dccp_li_cachep = kmem_cache_create("dccp_li_hist",
++ sizeof(struct dccp_li_hist_entry),
++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++ return dccp_li_cachep == NULL ? -ENOBUFS : 0;
++}
++
++static __exit void dccp_li_exit(void)
++{
++ kmem_cache_destroy(dccp_li_cachep);
++}
++
++module_init(dccp_li_init);
++module_exit(dccp_li_exit);
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h
+--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h 2007-12-21 15:36:12.000000000 -0500
+@@ -3,8 +3,8 @@
+ /*
+ * net/dccp/ccids/lib/loss_interval.h
+ *
+- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+ * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+@@ -14,44 +14,16 @@
+ */
+
+ #include <linux/list.h>
+-#include <linux/slab.h>
+ #include <linux/time.h>
+
+-#define DCCP_LI_HIST_IVAL_F_LENGTH 8
+-
+-struct dccp_li_hist {
+- struct kmem_cache *dccplih_slab;
+-};
+-
+-extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
+-extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
+-
+-struct dccp_li_hist_entry {
+- struct list_head dccplih_node;
+- u64 dccplih_seqno:48,
+- dccplih_win_count:4;
+- u32 dccplih_interval;
+-};
+-
+-static inline struct dccp_li_hist_entry *
+- dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+- const gfp_t prio)
+-{
+- return kmem_cache_alloc(hist->dccplih_slab, prio);
+-}
+-
+-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+- struct dccp_li_hist_entry *entry)
+-{
+- if (entry != NULL)
+- kmem_cache_free(hist->dccplih_slab, entry);
+-}
+-
+-extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
+- struct list_head *list);
++extern void dccp_li_hist_purge(struct list_head *list);
+
+ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+
+-extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+- struct list_head *list, const u64 seq_loss, const u8 win_loss);
++extern void dccp_li_update_li(struct sock *sk,
++ struct list_head *li_hist_list,
++ struct list_head *hist_list,
++ struct timeval *last_feedback, u16 s,
++ u32 bytes_recv, u32 previous_x_recv,
++ u64 seq_loss, u8 win_loss);
+ #endif /* _DCCP_LI_HIST_ */
+diff -Nurb linux-2.6.22-570/net/dccp/dccp.h linux-2.6.22-591/net/dccp/dccp.h
+--- linux-2.6.22-570/net/dccp/dccp.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/dccp.h 2007-12-21 15:36:12.000000000 -0500
+@@ -184,7 +184,7 @@
+ /*
+ * Checksumming routines
+ */
+-static inline int dccp_csum_coverage(const struct sk_buff *skb)
++static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
+ {
+ const struct dccp_hdr* dh = dccp_hdr(skb);
+
+@@ -195,7 +195,7 @@
+
+ static inline void dccp_csum_outgoing(struct sk_buff *skb)
+ {
+- int cov = dccp_csum_coverage(skb);
++ unsigned int cov = dccp_csum_coverage(skb);
+
+ if (cov >= skb->len)
+ dccp_hdr(skb)->dccph_cscov = 0;
+diff -Nurb linux-2.6.22-570/net/dccp/ipv4.c linux-2.6.22-591/net/dccp/ipv4.c
+--- linux-2.6.22-570/net/dccp/ipv4.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ipv4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -202,6 +202,7 @@
+ */
+ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+ {
++ struct net *net = skb->dev->nd_net;
+ const struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
+ (iph->ihl << 2));
+@@ -213,13 +214,16 @@
+ __u64 seq;
+ int err;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (skb->len < (iph->ihl << 2) + 8) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+ }
+
+ sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+- iph->saddr, dh->dccph_sport, inet_iif(skb));
++ iph->saddr, dh->dccph_sport, inet_iif(skb), net);
+ if (sk == NULL) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+@@ -441,7 +445,7 @@
+ nsk = inet_lookup_established(&dccp_hashinfo,
+ iph->saddr, dh->dccph_sport,
+ iph->daddr, dh->dccph_dport,
+- inet_iif(skb));
++ inet_iif(skb), sk->sk_net);
+ if (nsk != NULL) {
+ if (nsk->sk_state != DCCP_TIME_WAIT) {
+ bh_lock_sock(nsk);
+@@ -458,7 +462,8 @@
+ struct sk_buff *skb)
+ {
+ struct rtable *rt;
+- struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = ((struct rtable *)skb->dst)->rt_iif,
+ .nl_u = { .ip4_u =
+ { .daddr = ip_hdr(skb)->saddr,
+ .saddr = ip_hdr(skb)->daddr,
+@@ -809,11 +814,16 @@
+ /* this is called when real data arrives */
+ static int dccp_v4_rcv(struct sk_buff *skb)
+ {
++ struct net *net = skb->dev->nd_net;
+ const struct dccp_hdr *dh;
+ const struct iphdr *iph;
+ struct sock *sk;
+ int min_cov;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ /* Step 1: Check header basics */
+
+ if (dccp_invalid_packet(skb))
+@@ -852,7 +862,7 @@
+ * Look up flow ID in table and get corresponding socket */
+ sk = __inet_lookup(&dccp_hashinfo,
+ iph->saddr, dh->dccph_sport,
+- iph->daddr, dh->dccph_dport, inet_iif(skb));
++ iph->daddr, dh->dccph_dport, inet_iif(skb), net);
+ /*
+ * Step 2:
+ * If no socket ...
+diff -Nurb linux-2.6.22-570/net/dccp/ipv6.c linux-2.6.22-591/net/dccp/ipv6.c
+--- linux-2.6.22-570/net/dccp/ipv6.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/ipv6.c 2007-12-21 15:36:15.000000000 -0500
+@@ -94,6 +94,7 @@
+ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __be32 info)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
+ const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ struct ipv6_pinfo *np;
+@@ -102,7 +103,7 @@
+ __u64 seq;
+
+ sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
+- &hdr->saddr, dh->dccph_sport, inet6_iif(skb));
++ &hdr->saddr, dh->dccph_sport, inet6_iif(skb), net);
+
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+@@ -142,6 +143,7 @@
+ for now.
+ */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_DCCP;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+@@ -242,6 +244,7 @@
+ int err = -1;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net,
+ fl.proto = IPPROTO_DCCP;
+ ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+@@ -358,6 +361,7 @@
+ &rxip6h->daddr);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
+
+@@ -407,7 +411,7 @@
+ nsk = __inet6_lookup_established(&dccp_hashinfo,
+ &iph->saddr, dh->dccph_sport,
+ &iph->daddr, ntohs(dh->dccph_dport),
+- inet6_iif(skb));
++ inet6_iif(skb), sk->sk_net);
+ if (nsk != NULL) {
+ if (nsk->sk_state != DCCP_TIME_WAIT) {
+ bh_lock_sock(nsk);
+@@ -584,6 +588,7 @@
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_DCCP;
+ ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ if (opt != NULL && opt->srcrt != NULL) {
+@@ -819,6 +824,7 @@
+ {
+ const struct dccp_hdr *dh;
+ struct sk_buff *skb = *pskb;
++ struct net *net = skb->dev->nd_net;
+ struct sock *sk;
+ int min_cov;
+
+@@ -849,7 +855,7 @@
+ sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
+ dh->dccph_sport,
+ &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
+- inet6_iif(skb));
++ inet6_iif(skb), net);
+ /*
+ * Step 2:
+ * If no socket ...
+@@ -937,6 +943,7 @@
+ return -EAFNOSUPPORT;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+diff -Nurb linux-2.6.22-570/net/dccp/probe.c linux-2.6.22-591/net/dccp/probe.c
+--- linux-2.6.22-570/net/dccp/probe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/dccp/probe.c 2007-12-21 15:36:15.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/module.h>
+ #include <linux/kfifo.h>
+ #include <linux/vmalloc.h>
++#include <net/net_namespace.h>
+
+ #include "dccp.h"
+ #include "ccid.h"
+@@ -168,7 +169,7 @@
+ if (IS_ERR(dccpw.fifo))
+ return PTR_ERR(dccpw.fifo);
+
+- if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
++ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
+ goto err0;
+
+ ret = register_jprobe(&dccp_send_probe);
+@@ -178,7 +179,7 @@
+ pr_info("DCCP watch registered (port=%d)\n", port);
+ return 0;
+ err1:
+- proc_net_remove(procname);
++ proc_net_remove(&init_net, procname);
+ err0:
+ kfifo_free(dccpw.fifo);
+ return ret;
+@@ -188,7 +189,7 @@
+ static __exit void dccpprobe_exit(void)
+ {
+ kfifo_free(dccpw.fifo);
+- proc_net_remove(procname);
++ proc_net_remove(&init_net, procname);
+ unregister_jprobe(&dccp_send_probe);
+
+ }
+diff -Nurb linux-2.6.22-570/net/decnet/af_decnet.c linux-2.6.22-591/net/decnet/af_decnet.c
+--- linux-2.6.22-570/net/decnet/af_decnet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/af_decnet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -131,6 +131,7 @@
+ #include <net/neighbour.h>
+ #include <net/dst.h>
+ #include <net/fib_rules.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_nsp.h>
+ #include <net/dn_dev.h>
+@@ -470,10 +471,10 @@
+ .obj_size = sizeof(struct dn_sock),
+ };
+
+-static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
++static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp)
+ {
+ struct dn_scp *scp;
+- struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1);
++ struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1);
+
+ if (!sk)
+ goto out;
+@@ -674,10 +675,13 @@
+
+
+
+-static int dn_create(struct socket *sock, int protocol)
++static int dn_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ switch(sock->type) {
+ case SOCK_SEQPACKET:
+ if (protocol != DNPROTO_NSP)
+@@ -690,7 +694,7 @@
+ }
+
+
+- if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL)
++ if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL)
+ return -ENOBUFS;
+
+ sk->sk_protocol = protocol;
+@@ -747,7 +751,7 @@
+ if (dn_ntohs(saddr->sdn_nodeaddrl)) {
+ read_lock(&dev_base_lock);
+ ldev = NULL;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (!dev->dn_ptr)
+ continue;
+ if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+@@ -943,6 +947,7 @@
+
+ err = -EHOSTUNREACH;
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fld_dst = dn_saddr2dn(&scp->peer);
+ fl.fld_src = dn_saddr2dn(&scp->addr);
+@@ -1090,7 +1095,7 @@
+
+ cb = DN_SKB_CB(skb);
+ sk->sk_ack_backlog--;
+- newsk = dn_alloc_sock(newsock, sk->sk_allocation);
++ newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation);
+ if (newsk == NULL) {
+ release_sock(sk);
+ kfree_skb(skb);
+@@ -2085,6 +2090,9 @@
+ {
+ struct net_device *dev = (struct net_device *)ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch(event) {
+ case NETDEV_UP:
+ dn_dev_up(dev);
+@@ -2399,7 +2407,7 @@
+ dev_add_pack(&dn_dix_packet_type);
+ register_netdevice_notifier(&dn_dev_notifier);
+
+- proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops);
++ proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops);
+ dn_register_sysctl();
+ out:
+ return rc;
+@@ -2428,7 +2436,7 @@
+ dn_neigh_cleanup();
+ dn_fib_cleanup();
+
+- proc_net_remove("decnet");
++ proc_net_remove(&init_net, "decnet");
+
+ proto_unregister(&dn_proto);
+ }
+diff -Nurb linux-2.6.22-570/net/decnet/dn_dev.c linux-2.6.22-591/net/decnet/dn_dev.c
+--- linux-2.6.22-570/net/decnet/dn_dev.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/decnet/dn_dev.c 2007-12-21 15:36:15.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <net/flow.h>
+ #include <net/fib_rules.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_dev.h>
+ #include <net/dn_route.h>
+@@ -513,7 +514,7 @@
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+
+ #ifdef CONFIG_KMOD
+- dev_load(ifr->ifr_name);
++ dev_load(&init_net, ifr->ifr_name);
+ #endif
+
+ switch(cmd) {
+@@ -531,7 +532,7 @@
+
+ rtnl_lock();
+
+- if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) {
++ if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
+ ret = -ENODEV;
+ goto done;
+ }
+@@ -629,7 +630,7 @@
+ {
+ struct net_device *dev;
+ struct dn_dev *dn_dev = NULL;
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(&init_net, ifindex);
+ if (dev) {
+ dn_dev = dev->dn_ptr;
+ dev_put(dev);
+@@ -647,12 +648,16 @@
+
+ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct nlattr *tb[IFA_MAX+1];
+ struct dn_dev *dn_db;
+ struct ifaddrmsg *ifm;
+ struct dn_ifaddr *ifa, **ifap;
+ int err = -EADDRNOTAVAIL;
+
++ if (net != &init_net)
++ goto errout;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ if (err < 0)
+ goto errout;
+@@ -679,6 +684,7 @@
+
+ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct nlattr *tb[IFA_MAX+1];
+ struct net_device *dev;
+ struct dn_dev *dn_db;
+@@ -686,6 +692,9 @@
+ struct dn_ifaddr *ifa;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ if (err < 0)
+ return err;
+@@ -694,7 +703,7 @@
+ return -EINVAL;
+
+ ifm = nlmsg_data(nlh);
+- if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
+ return -ENODEV;
+
+ if ((dn_db = dev->dn_ptr) == NULL) {
+@@ -783,24 +792,28 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
+ }
+
+ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx, dn_idx = 0, skip_ndevs, skip_naddr;
+ struct net_device *dev;
+ struct dn_dev *dn_db;
+ struct dn_ifaddr *ifa;
+
++ if (net != &init_net)
++ return 0;
++
+ skip_ndevs = cb->args[0];
+ skip_naddr = cb->args[1];
+
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (idx < skip_ndevs)
+ goto cont;
+ else if (idx > skip_ndevs) {
+@@ -869,10 +882,10 @@
+ rv = dn_dev_get_first(dev, addr);
+ read_unlock(&dev_base_lock);
+ dev_put(dev);
+- if (rv == 0 || dev == &loopback_dev)
++ if (rv == 0 || dev == &init_net.loopback_dev)
+ return rv;
+ }
+- dev = &loopback_dev;
++ dev = &init_net.loopback_dev;
+ dev_hold(dev);
+ goto last_chance;
+ }
+@@ -1299,7 +1312,7 @@
+ struct net_device *dev;
+
+ rtnl_lock();
+- for_each_netdev(dev)
++ for_each_netdev(&init_net, dev)
+ dn_dev_down(dev);
+ rtnl_unlock();
+
+@@ -1310,7 +1323,7 @@
+ struct net_device *dev;
+
+ rtnl_lock();
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (dev->flags & IFF_UP)
+ dn_dev_up(dev);
+ }
+@@ -1344,7 +1357,7 @@
+ return SEQ_START_TOKEN;
+
+ i = 1;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+@@ -1363,9 +1376,9 @@
+
+ dev = (struct net_device *)v;
+ if (v == SEQ_START_TOKEN)
+- dev = net_device_entry(&dev_base_head);
++ dev = net_device_entry(&init_net.dev_base_head);
+
+- for_each_netdev_continue(dev) {
++ for_each_netdev_continue(&init_net, dev) {
+ if (!is_dn_dev(dev))
+ continue;
+
+@@ -1465,7 +1478,7 @@
+ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+ rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
+
+- proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
++ proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops);
+
+ #ifdef CONFIG_SYSCTL
+ {
+@@ -1486,7 +1499,7 @@
+ }
+ #endif /* CONFIG_SYSCTL */
+
+- proc_net_remove("decnet_dev");
++ proc_net_remove(&init_net, "decnet_dev");
+
+ dn_dev_devices_off();
+ }
+diff -Nurb linux-2.6.22-570/net/decnet/dn_fib.c linux-2.6.22-591/net/decnet/dn_fib.c
+--- linux-2.6.22-570/net/decnet/dn_fib.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_fib.c 2007-12-21 15:36:15.000000000 -0500
+@@ -203,8 +203,6 @@
+ struct flowi fl;
+ struct dn_fib_res res;
+
+- memset(&fl, 0, sizeof(fl));
+-
+ if (nh->nh_flags&RTNH_F_ONLINK) {
+ struct net_device *dev;
+
+@@ -212,7 +210,7 @@
+ return -EINVAL;
+ if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
+ return -EINVAL;
+- if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
+ return -ENODEV;
+ if (!(dev->flags&IFF_UP))
+ return -ENETDOWN;
+@@ -223,6 +221,7 @@
+ }
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.fld_dst = nh->nh_gw;
+ fl.oif = nh->nh_oif;
+ fl.fld_scope = r->rtm_scope + 1;
+@@ -255,7 +254,7 @@
+ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ return -EINVAL;
+
+- dev = __dev_get_by_index(nh->nh_oif);
++ dev = __dev_get_by_index(&init_net, nh->nh_oif);
+ if (dev == NULL || dev->dn_ptr == NULL)
+ return -ENODEV;
+ if (!(dev->flags&IFF_UP))
+@@ -355,7 +354,7 @@
+ if (nhs != 1 || nh->nh_gw)
+ goto err_inval;
+ nh->nh_scope = RT_SCOPE_NOWHERE;
+- nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
++ nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
+ err = -ENODEV;
+ if (nh->nh_dev == NULL)
+ goto failure;
+@@ -506,10 +505,14 @@
+
+ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct dn_fib_table *tb;
+ struct rtattr **rta = arg;
+ struct rtmsg *r = NLMSG_DATA(nlh);
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ if (dn_fib_check_attr(r, rta))
+ return -EINVAL;
+
+@@ -522,10 +525,14 @@
+
+ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct dn_fib_table *tb;
+ struct rtattr **rta = arg;
+ struct rtmsg *r = NLMSG_DATA(nlh);
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ if (dn_fib_check_attr(r, rta))
+ return -EINVAL;
+
+@@ -602,7 +609,7 @@
+
+ /* Scan device list */
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ dn_db = dev->dn_ptr;
+ if (dn_db == NULL)
+ continue;
+diff -Nurb linux-2.6.22-570/net/decnet/dn_neigh.c linux-2.6.22-591/net/decnet/dn_neigh.c
+--- linux-2.6.22-570/net/decnet/dn_neigh.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_neigh.c 2007-12-21 15:36:15.000000000 -0500
+@@ -38,6 +38,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/jhash.h>
+ #include <asm/atomic.h>
++#include <net/net_namespace.h>
+ #include <net/neighbour.h>
+ #include <net/dst.h>
+ #include <net/flow.h>
+@@ -591,6 +592,7 @@
+
+ seq = file->private_data;
+ seq->private = s;
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -598,12 +600,20 @@
+ goto out;
+ }
+
++static int dn_neigh_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct neigh_seq_state *state = seq->private;
++ put_net(state->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations dn_neigh_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = dn_neigh_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = dn_neigh_seq_release,
+ };
+
+ #endif
+@@ -611,11 +621,11 @@
+ void __init dn_neigh_init(void)
+ {
+ neigh_table_init(&dn_neigh_table);
+- proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
++ proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
+ }
+
+ void __exit dn_neigh_cleanup(void)
+ {
+- proc_net_remove("decnet_neigh");
++ proc_net_remove(&init_net, "decnet_neigh");
+ neigh_table_clear(&dn_neigh_table);
+ }
+diff -Nurb linux-2.6.22-570/net/decnet/dn_nsp_out.c linux-2.6.22-591/net/decnet/dn_nsp_out.c
+--- linux-2.6.22-570/net/decnet/dn_nsp_out.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_nsp_out.c 2007-12-21 15:36:15.000000000 -0500
+@@ -91,6 +91,7 @@
+ }
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fld_src = dn_saddr2dn(&scp->addr);
+ fl.fld_dst = dn_saddr2dn(&scp->peer);
+diff -Nurb linux-2.6.22-570/net/decnet/dn_route.c linux-2.6.22-591/net/decnet/dn_route.c
+--- linux-2.6.22-570/net/decnet/dn_route.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -82,6 +82,7 @@
+ #include <net/dst.h>
+ #include <net/flow.h>
+ #include <net/fib_rules.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_dev.h>
+ #include <net/dn_nsp.h>
+@@ -583,6 +584,9 @@
+ struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+ unsigned char padlen = 0;
+
++ if (dev->nd_net != &init_net)
++ goto dump_it;
++
+ if (dn == NULL)
+ goto dump_it;
+
+@@ -877,13 +881,14 @@
+
+ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
+ {
+- struct flowi fl = { .nl_u = { .dn_u =
++ struct flowi fl = { .fl_net = &init_net,
++ .nl_u = { .dn_u =
+ { .daddr = oldflp->fld_dst,
+ .saddr = oldflp->fld_src,
+ .scope = RT_SCOPE_UNIVERSE,
+ } },
+ .mark = oldflp->mark,
+- .iif = loopback_dev.ifindex,
++ .iif = init_net.loopback_dev.ifindex,
+ .oif = oldflp->oif };
+ struct dn_route *rt = NULL;
+ struct net_device *dev_out = NULL, *dev;
+@@ -900,11 +905,11 @@
+ "dn_route_output_slow: dst=%04x src=%04x mark=%d"
+ " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst),
+ dn_ntohs(oldflp->fld_src),
+- oldflp->mark, loopback_dev.ifindex, oldflp->oif);
++ oldflp->mark, init_net.loopback_dev.ifindex, oldflp->oif);
+
+ /* If we have an output interface, verify its a DECnet device */
+ if (oldflp->oif) {
+- dev_out = dev_get_by_index(oldflp->oif);
++ dev_out = dev_get_by_index(&init_net, oldflp->oif);
+ err = -ENODEV;
+ if (dev_out && dev_out->dn_ptr == NULL) {
+ dev_put(dev_out);
+@@ -925,7 +930,7 @@
+ goto out;
+ }
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (!dev->dn_ptr)
+ continue;
+ if (!dn_dev_islocal(dev, oldflp->fld_src))
+@@ -953,7 +958,7 @@
+ err = -EADDRNOTAVAIL;
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &init_net.loopback_dev;
+ dev_hold(dev_out);
+ if (!fl.fld_dst) {
+ fl.fld_dst =
+@@ -962,7 +967,7 @@
+ if (!fl.fld_dst)
+ goto out;
+ }
+- fl.oif = loopback_dev.ifindex;
++ fl.oif = init_net.loopback_dev.ifindex;
+ res.type = RTN_LOCAL;
+ goto make_route;
+ }
+@@ -995,7 +1000,7 @@
+ * here
+ */
+ if (!try_hard) {
+- neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst);
++ neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
+ if (neigh) {
+ if ((oldflp->oif &&
+ (neigh->dev->ifindex != oldflp->oif)) ||
+@@ -1008,7 +1013,7 @@
+ if (dev_out)
+ dev_put(dev_out);
+ if (dn_dev_islocal(neigh->dev, fl.fld_dst)) {
+- dev_out = &loopback_dev;
++ dev_out = &init_net.loopback_dev;
+ res.type = RTN_LOCAL;
+ } else {
+ dev_out = neigh->dev;
+@@ -1029,7 +1034,7 @@
+ /* Possible improvement - check all devices for local addr */
+ if (dn_dev_islocal(dev_out, fl.fld_dst)) {
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &init_net.loopback_dev;
+ dev_hold(dev_out);
+ res.type = RTN_LOCAL;
+ goto select_source;
+@@ -1065,7 +1070,7 @@
+ fl.fld_src = fl.fld_dst;
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &init_net.loopback_dev;
+ dev_hold(dev_out);
+ fl.oif = dev_out->ifindex;
+ if (res.fi)
+@@ -1103,6 +1108,7 @@
+ atomic_set(&rt->u.dst.__refcnt, 1);
+ rt->u.dst.flags = DST_HOST;
+
++ rt->fl.fl_net = &init_net;
+ rt->fl.fld_src = oldflp->fld_src;
+ rt->fl.fld_dst = oldflp->fld_dst;
+ rt->fl.oif = oldflp->oif;
+@@ -1226,7 +1232,8 @@
+ int flags = 0;
+ __le16 gateway = 0;
+ __le16 local_src = 0;
+- struct flowi fl = { .nl_u = { .dn_u =
++ struct flowi fl = { .fl_net = &init_net,
++ .nl_u = { .dn_u =
+ { .daddr = cb->dst,
+ .saddr = cb->src,
+ .scope = RT_SCOPE_UNIVERSE,
+@@ -1374,6 +1381,7 @@
+ rt->rt_dst_map = fl.fld_dst;
+ rt->rt_src_map = fl.fld_src;
+
++ rt->fl.fl_net = &init_net;
+ rt->fl.fld_src = cb->src;
+ rt->fl.fld_dst = cb->dst;
+ rt->fl.oif = 0;
+@@ -1526,6 +1534,7 @@
+ */
+ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ struct rtattr **rta = arg;
+ struct rtmsg *rtm = NLMSG_DATA(nlh);
+ struct dn_route *rt = NULL;
+@@ -1534,7 +1543,11 @@
+ struct sk_buff *skb;
+ struct flowi fl;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = DNPROTO_NSP;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+@@ -1552,7 +1565,7 @@
+
+ if (fl.iif) {
+ struct net_device *dev;
+- if ((dev = dev_get_by_index(fl.iif)) == NULL) {
++ if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+@@ -1598,7 +1611,7 @@
+ goto out_free;
+ }
+
+- return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++ return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+
+ out_free:
+ kfree_skb(skb);
+@@ -1611,10 +1624,14 @@
+ */
+ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct dn_route *rt;
+ int h, s_h;
+ int idx, s_idx;
+
++ if (net != &init_net)
++ return 0;
++
+ if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
+ return -EINVAL;
+ if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
+@@ -1814,7 +1831,7 @@
+
+ dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
+
+- proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
++ proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+
+ #ifdef CONFIG_DECNET_ROUTER
+ rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+@@ -1829,6 +1846,6 @@
+ del_timer(&dn_route_timer);
+ dn_run_flush(0);
+
+- proc_net_remove("decnet_cache");
++ proc_net_remove(&init_net, "decnet_cache");
+ }
+
+diff -Nurb linux-2.6.22-570/net/decnet/dn_rules.c linux-2.6.22-591/net/decnet/dn_rules.c
+--- linux-2.6.22-570/net/decnet/dn_rules.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_rules.c 2007-12-21 15:36:15.000000000 -0500
+@@ -186,7 +186,10 @@
+
+ unsigned dnet_addr_type(__le16 addr)
+ {
+- struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
++ struct flowi fl = {
++ .fl_net = &init_net,
++ .nl_u = { .dn_u = { .daddr = addr } }
++ };
+ struct dn_fib_res res;
+ unsigned ret = RTN_UNICAST;
+ struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
+@@ -223,7 +226,7 @@
+ return -ENOBUFS;
+ }
+
+-static u32 dn_fib_rule_default_pref(void)
++static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
+ {
+ struct list_head *pos;
+ struct fib_rule *rule;
+@@ -240,7 +243,7 @@
+ return 0;
+ }
+
+-static void dn_fib_rule_flush_cache(void)
++static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
+ {
+ dn_rt_cache_flush(-1);
+ }
+@@ -265,12 +268,12 @@
+ void __init dn_fib_rules_init(void)
+ {
+ list_add_tail(&default_rule.common.list, &dn_fib_rules);
+- fib_rules_register(&dn_fib_rules_ops);
++ fib_rules_register(&init_net, &dn_fib_rules_ops);
+ }
+
+ void __exit dn_fib_rules_cleanup(void)
+ {
+- fib_rules_unregister(&dn_fib_rules_ops);
++ fib_rules_unregister(&init_net, &dn_fib_rules_ops);
+ }
+
+
+diff -Nurb linux-2.6.22-570/net/decnet/dn_table.c linux-2.6.22-591/net/decnet/dn_table.c
+--- linux-2.6.22-570/net/decnet/dn_table.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/dn_table.c 2007-12-21 15:36:15.000000000 -0500
+@@ -375,10 +375,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
++ err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
+ }
+
+ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
+@@ -463,12 +463,16 @@
+
+ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct dn_fib_table *tb;
+ struct hlist_node *node;
+ int dumped = 0;
+
++ if (net != &init_net)
++ return 0;
++
+ if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+ return dn_cache_dump(skb, cb);
+diff -Nurb linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c
+--- linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c 2007-12-21 15:36:15.000000000 -0500
+@@ -93,6 +93,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ dnrmg_send_peer(*pskb);
+ return NF_ACCEPT;
+ }
+@@ -137,7 +141,8 @@
+ {
+ int rv = 0;
+
+- dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
++ dnrmg = netlink_kernel_create(&init_net,
++ NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
+ dnrmg_receive_user_sk, NULL, THIS_MODULE);
+ if (dnrmg == NULL) {
+ printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
+diff -Nurb linux-2.6.22-570/net/decnet/sysctl_net_decnet.c linux-2.6.22-591/net/decnet/sysctl_net_decnet.c
+--- linux-2.6.22-570/net/decnet/sysctl_net_decnet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/decnet/sysctl_net_decnet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -259,7 +259,7 @@
+
+ devname[newlen] = 0;
+
+- dev = dev_get_by_name(devname);
++ dev = dev_get_by_name(&init_net, devname);
+ if (dev == NULL)
+ return -ENODEV;
+
+@@ -299,7 +299,7 @@
+ devname[*lenp] = 0;
+ strip_it(devname);
+
+- dev = dev_get_by_name(devname);
++ dev = dev_get_by_name(&init_net, devname);
+ if (dev == NULL)
+ return -ENODEV;
+
+diff -Nurb linux-2.6.22-570/net/econet/af_econet.c linux-2.6.22-591/net/econet/af_econet.c
+--- linux-2.6.22-570/net/econet/af_econet.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/econet/af_econet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -608,12 +608,15 @@
+ * Create an Econet socket
+ */
+
+-static int econet_create(struct socket *sock, int protocol)
++static int econet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct econet_sock *eo;
+ int err;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ /* Econet only provides datagram services. */
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+@@ -621,7 +624,7 @@
+ sock->state = SS_UNCONNECTED;
+
+ err = -ENOBUFS;
+- sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1);
++ sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1);
+ if (sk == NULL)
+ goto out;
+
+@@ -659,7 +662,7 @@
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+- if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL)
++ if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
+ return -ENODEV;
+
+ sec = (struct sockaddr_ec *)&ifr.ifr_addr;
+@@ -1062,6 +1065,9 @@
+ struct sock *sk;
+ struct ec_device *edev = dev->ec_ptr;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+
+@@ -1116,6 +1122,9 @@
+ struct net_device *dev = (struct net_device *)data;
+ struct ec_device *edev;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ /* A device has gone down - kill any data we hold for it. */
+diff -Nurb linux-2.6.22-570/net/ieee80211/ieee80211_module.c linux-2.6.22-591/net/ieee80211/ieee80211_module.c
+--- linux-2.6.22-570/net/ieee80211/ieee80211_module.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ieee80211/ieee80211_module.c 2007-12-21 15:36:15.000000000 -0500
+@@ -264,7 +264,7 @@
+ struct proc_dir_entry *e;
+
+ ieee80211_debug_level = debug;
+- ieee80211_proc = proc_mkdir(DRV_NAME, proc_net);
++ ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net);
+ if (ieee80211_proc == NULL) {
+ IEEE80211_ERROR("Unable to create " DRV_NAME
+ " proc directory\n");
+@@ -273,7 +273,7 @@
+ e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR,
+ ieee80211_proc);
+ if (!e) {
+- remove_proc_entry(DRV_NAME, proc_net);
++ remove_proc_entry(DRV_NAME, init_net.proc_net);
+ ieee80211_proc = NULL;
+ return -EIO;
+ }
+@@ -293,7 +293,7 @@
+ #ifdef CONFIG_IEEE80211_DEBUG
+ if (ieee80211_proc) {
+ remove_proc_entry("debug_level", ieee80211_proc);
+- remove_proc_entry(DRV_NAME, proc_net);
++ remove_proc_entry(DRV_NAME, init_net.proc_net);
+ ieee80211_proc = NULL;
+ }
+ #endif /* CONFIG_IEEE80211_DEBUG */
+diff -Nurb linux-2.6.22-570/net/ipv4/Kconfig linux-2.6.22-591/net/ipv4/Kconfig
+--- linux-2.6.22-570/net/ipv4/Kconfig 2007-12-21 15:36:02.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -116,48 +116,6 @@
+ equal "cost" and chooses one of them in a non-deterministic fashion
+ if a matching packet arrives.
+
+-config IP_ROUTE_MULTIPATH_CACHED
+- bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
+- depends on IP_ROUTE_MULTIPATH
+- help
+- Normally, equal cost multipath routing is not supported by the
+- routing cache. If you say Y here, alternative routes are cached
+- and on cache lookup a route is chosen in a configurable fashion.
+-
+- If unsure, say N.
+-
+-config IP_ROUTE_MULTIPATH_RR
+- tristate "MULTIPATH: round robin algorithm"
+- depends on IP_ROUTE_MULTIPATH_CACHED
+- help
+- Multipath routes are chosen according to Round Robin
+-
+-config IP_ROUTE_MULTIPATH_RANDOM
+- tristate "MULTIPATH: random algorithm"
+- depends on IP_ROUTE_MULTIPATH_CACHED
+- help
+- Multipath routes are chosen in a random fashion. Actually,
+- there is no weight for a route. The advantage of this policy
+- is that it is implemented stateless and therefore introduces only
+- a very small delay.
+-
+-config IP_ROUTE_MULTIPATH_WRANDOM
+- tristate "MULTIPATH: weighted random algorithm"
+- depends on IP_ROUTE_MULTIPATH_CACHED
+- help
+- Multipath routes are chosen in a weighted random fashion.
+- The per route weights are the weights visible via ip route 2. As the
+- corresponding state management introduces some overhead routing delay
+- is increased.
+-
+-config IP_ROUTE_MULTIPATH_DRR
+- tristate "MULTIPATH: interface round robin algorithm"
+- depends on IP_ROUTE_MULTIPATH_CACHED
+- help
+- Connections are distributed in a round robin fashion over the
+- available interfaces. This policy makes sense if the connections
+- should be primarily distributed on interfaces and not on routes.
+-
+ config IP_ROUTE_VERBOSE
+ bool "IP: verbose route monitoring"
+ depends on IP_ADVANCED_ROUTER
+diff -Nurb linux-2.6.22-570/net/ipv4/Makefile linux-2.6.22-591/net/ipv4/Makefile
+--- linux-2.6.22-570/net/ipv4/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -29,14 +29,9 @@
+ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
+ obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
+ obj-$(CONFIG_IP_PNP) += ipconfig.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
+ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
+ obj-$(CONFIG_IP_VS) += ipvs/
+ obj-$(CONFIG_INET_DIAG) += inet_diag.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
+ obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
+ obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+diff -Nurb linux-2.6.22-570/net/ipv4/af_inet.c linux-2.6.22-591/net/ipv4/af_inet.c
+--- linux-2.6.22-570/net/ipv4/af_inet.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/af_inet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -244,7 +244,7 @@
+ * Create an inet socket.
+ */
+
+-static int inet_create(struct socket *sock, int protocol)
++static int inet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct list_head *p;
+@@ -310,6 +310,10 @@
+ goto out_rcu_unlock;
+ }
+
++ err = -EPROTONOSUPPORT;
++ if (!(answer->flags & INET_PROTOSW_NETNS) && (net != &init_net))
++ goto out_rcu_unlock;
++
+ err = -EPERM;
+ if ((protocol == IPPROTO_ICMP) &&
+ nx_capable(answer->capability, NXC_RAW_ICMP))
+@@ -326,7 +330,7 @@
+ BUG_TRAP(answer_prot->slab != NULL);
+
+ err = -ENOBUFS;
+- sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1);
++ sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1);
+ if (sk == NULL)
+ goto out;
+
+@@ -344,7 +348,7 @@
+ inet->hdrincl = 1;
+ }
+
+- if (ipv4_config.no_pmtu_disc)
++ if (net->sysctl_ipv4_no_pmtu_disc)
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -423,12 +427,12 @@
+ }
+
+ /* It is off by default, see below. */
+-int sysctl_ip_nonlocal_bind __read_mostly;
+
+ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ {
+ struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct inet_sock *inet = inet_sk(sk);
+ struct nx_v4_sock_addr nsa;
+ unsigned short snum;
+@@ -448,7 +452,7 @@
+ if (err)
+ goto out;
+
+- chk_addr_ret = inet_addr_type(nsa.saddr);
++ chk_addr_ret = inet_addr_type(net, nsa.saddr);
+
+ /* Not specified by any standard per-se, however it breaks too
+ * many applications when removed. It is unfortunate since
+@@ -458,7 +462,7 @@
+ * is temporarily down)
+ */
+ err = -EADDRNOTAVAIL;
+- if (!sysctl_ip_nonlocal_bind &&
++ if (!net->sysctl_ip_nonlocal_bind &&
+ !inet->freebind &&
+ nsa.saddr != INADDR_ANY &&
+ chk_addr_ret != RTN_LOCAL &&
+@@ -787,6 +791,7 @@
+ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ int err = 0;
+
+ switch (cmd) {
+@@ -799,12 +804,12 @@
+ case SIOCADDRT:
+ case SIOCDELRT:
+ case SIOCRTMSG:
+- err = ip_rt_ioctl(cmd, (void __user *)arg);
++ err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCDARP:
+ case SIOCGARP:
+ case SIOCSARP:
+- err = arp_ioctl(cmd, (void __user *)arg);
++ err = arp_ioctl(net, cmd, (void __user *)arg);
+ break;
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+@@ -817,7 +822,7 @@
+ case SIOCSIFPFLAGS:
+ case SIOCGIFPFLAGS:
+ case SIOCSIFFLAGS:
+- err = devinet_ioctl(cmd, (void __user *)arg);
++ err = devinet_ioctl(net, cmd, (void __user *)arg);
+ break;
+ default:
+ if (sk->sk_prot->ioctl)
+@@ -927,7 +932,8 @@
+ .capability = -1,
+ .no_check = 0,
+ .flags = INET_PROTOSW_PERMANENT |
+- INET_PROTOSW_ICSK,
++ INET_PROTOSW_ICSK |
++ INET_PROTOSW_NETNS,
+ },
+
+ {
+@@ -937,7 +943,8 @@
+ .ops = &inet_dgram_ops,
+ .capability = -1,
+ .no_check = UDP_CSUM_DEFAULT,
+- .flags = INET_PROTOSW_PERMANENT,
++ .flags = INET_PROTOSW_PERMANENT |
++ INET_PROTOSW_NETNS,
+ },
+
+
+@@ -948,7 +955,8 @@
+ .ops = &inet_sockraw_ops,
+ .capability = CAP_NET_RAW,
+ .no_check = UDP_CSUM_DEFAULT,
+- .flags = INET_PROTOSW_REUSE,
++ .flags = INET_PROTOSW_REUSE |
++ INET_PROTOSW_NETNS,
+ }
+ };
+
+@@ -1029,8 +1037,6 @@
+ * Shall we try to damage output packets if routing dev changes?
+ */
+
+-int sysctl_ip_dynaddr __read_mostly;
+-
+ static int inet_sk_reselect_saddr(struct sock *sk)
+ {
+ struct inet_sock *inet = inet_sk(sk);
+@@ -1059,7 +1065,7 @@
+ if (new_saddr == old_saddr)
+ return 0;
+
+- if (sysctl_ip_dynaddr > 1) {
++ if (sk->sk_net->sysctl_ip_dynaddr > 1) {
+ printk(KERN_INFO "%s(): shifting inet->"
+ "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+ __FUNCTION__,
+@@ -1098,6 +1104,7 @@
+ daddr = inet->opt->faddr;
+ {
+ struct flowi fl = {
++ .fl_net = sk->sk_net,
+ .oif = sk->sk_bound_dev_if,
+ .nl_u = {
+ .ip4_u = {
+@@ -1127,7 +1134,7 @@
+ * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+ * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+ */
+- if (!sysctl_ip_dynaddr ||
++ if (!sk->sk_net->sysctl_ip_dynaddr ||
+ sk->sk_state != TCP_SYN_SENT ||
+ (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+ (err = inet_sk_reselect_saddr(sk)) != 0)
+@@ -1183,6 +1190,9 @@
+ int ihl;
+ int id;
+
++ if (!(features & NETIF_F_V4_CSUM))
++ features &= ~NETIF_F_SG;
++
+ if (unlikely(skb_shinfo(skb)->gso_type &
+ ~(SKB_GSO_TCPV4 |
+ SKB_GSO_UDP |
+@@ -1353,6 +1363,24 @@
+ .gso_segment = inet_gso_segment,
+ };
+
++
++static int inet_net_init(struct net *net)
++{
++ net->sysctl_ip_default_ttl = IPDEFTTL;
++ net->sysctl_ip_dynaddr = 0;
++
++ return 0;
++}
++
++static void inet_net_exit(struct net *net)
++{
++}
++
++static struct pernet_operations inet_net_ops = {
++ .init = inet_net_init,
++ .exit = inet_net_exit,
++};
++
+ static int __init inet_init(void)
+ {
+ struct sk_buff *dummy_skb;
+@@ -1374,6 +1402,10 @@
+ if (rc)
+ goto out_unregister_udp_proto;
+
++ rc = register_pernet_subsys(&inet_net_ops);
++ if (rc)
++ goto out_unregister_raw_proto;
++
+ /*
+ * Tell SOCKET that we are alive...
+ */
+@@ -1450,6 +1482,8 @@
+ rc = 0;
+ out:
+ return rc;
++out_unregister_raw_proto:
++ proto_unregister(&raw_prot);
+ out_unregister_udp_proto:
+ proto_unregister(&udp_prot);
+ out_unregister_tcp_proto:
+@@ -1472,15 +1506,11 @@
+ goto out_tcp;
+ if (udp4_proc_init())
+ goto out_udp;
+- if (fib_proc_init())
+- goto out_fib;
+ if (ip_misc_proc_init())
+ goto out_misc;
+ out:
+ return rc;
+ out_misc:
+- fib_proc_exit();
+-out_fib:
+ udp4_proc_exit();
+ out_udp:
+ tcp4_proc_exit();
+@@ -1516,4 +1546,3 @@
+ EXPORT_SYMBOL(inet_stream_ops);
+ EXPORT_SYMBOL(inet_unregister_protosw);
+ EXPORT_SYMBOL(net_statistics);
+-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
+diff -Nurb linux-2.6.22-570/net/ipv4/ah4.c linux-2.6.22-591/net/ipv4/ah4.c
+--- linux-2.6.22-570/net/ipv4/ah4.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/ah4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -198,6 +198,9 @@
+ struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
+ struct xfrm_state *x;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+@@ -339,3 +342,4 @@
+ module_init(ah4_init);
+ module_exit(ah4_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
+diff -Nurb linux-2.6.22-570/net/ipv4/arp.c linux-2.6.22-591/net/ipv4/arp.c
+--- linux-2.6.22-570/net/ipv4/arp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/arp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -109,6 +109,7 @@
+ #include <net/protocol.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/arp.h>
+ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+ #include <net/ax25.h>
+@@ -235,10 +236,11 @@
+ {
+ __be32 addr = *(__be32*)neigh->primary_key;
+ struct net_device *dev = neigh->dev;
++ struct net *net = dev->nd_net;
+ struct in_device *in_dev;
+ struct neigh_parms *parms;
+
+- neigh->type = inet_addr_type(addr);
++ neigh->type = inet_addr_type(net, addr);
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+@@ -332,6 +334,7 @@
+ __be32 saddr = 0;
+ u8 *dst_ha = NULL;
+ struct net_device *dev = neigh->dev;
++ struct net *net = dev->nd_net;
+ __be32 target = *(__be32*)neigh->primary_key;
+ int probes = atomic_read(&neigh->probes);
+ struct in_device *in_dev = in_dev_get(dev);
+@@ -342,14 +345,14 @@
+ switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
+ default:
+ case 0: /* By default announce any local IP */
+- if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
++ if (skb && inet_addr_type(net, ip_hdr(skb)->saddr) == RTN_LOCAL)
+ saddr = ip_hdr(skb)->saddr;
+ break;
+ case 1: /* Restrict announcements of saddr in same subnet */
+ if (!skb)
+ break;
+ saddr = ip_hdr(skb)->saddr;
+- if (inet_addr_type(saddr) == RTN_LOCAL) {
++ if (inet_addr_type(net, saddr) == RTN_LOCAL) {
+ /* saddr should be known to target */
+ if (inet_addr_onlink(in_dev, target, saddr))
+ break;
+@@ -386,6 +389,7 @@
+ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
+ __be32 sip, __be32 tip)
+ {
++ struct net *net = dev->nd_net;
+ int scope;
+
+ switch (IN_DEV_ARP_IGNORE(in_dev)) {
+@@ -416,13 +420,15 @@
+ default:
+ return 0;
+ }
+- return !inet_confirm_addr(dev, sip, tip, scope);
++ return !inet_confirm_addr(net, dev, sip, tip, scope);
+ }
+
+ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
+ {
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
+- .saddr = tip } } };
++ struct flowi fl = {
++ .fl_net = dev->nd_net,
++ .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } }
++ };
+ struct rtable *rt;
+ int flag = 0;
+ /*unsigned long now; */
+@@ -469,6 +475,7 @@
+ int arp_find(unsigned char *haddr, struct sk_buff *skb)
+ {
+ struct net_device *dev = skb->dev;
++ struct net *net = dev->nd_net;
+ __be32 paddr;
+ struct neighbour *n;
+
+@@ -480,7 +487,7 @@
+
+ paddr = ((struct rtable*)skb->dst)->rt_gateway;
+
+- if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
++ if (arp_set_predefined(inet_addr_type(net, paddr), haddr, paddr, dev))
+ return 0;
+
+ n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
+@@ -704,6 +711,7 @@
+ static int arp_process(struct sk_buff *skb)
+ {
+ struct net_device *dev = skb->dev;
++ struct net *net = dev->nd_net;
+ struct in_device *in_dev = in_dev_get(dev);
+ struct arphdr *arp;
+ unsigned char *arp_ptr;
+@@ -824,7 +832,7 @@
+ /* Special case: IPv4 duplicate address detection packet (RFC2131) */
+ if (sip == 0) {
+ if (arp->ar_op == htons(ARPOP_REQUEST) &&
+- inet_addr_type(tip) == RTN_LOCAL &&
++ inet_addr_type(net, tip) == RTN_LOCAL &&
+ !arp_ignore(in_dev,dev,sip,tip))
+ arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr);
+ goto out;
+@@ -854,7 +862,7 @@
+ } else if (IN_DEV_FORWARD(in_dev)) {
+ if ((rt->rt_flags&RTCF_DNAT) ||
+ (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
+- (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
++ (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
+ n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+ if (n)
+ neigh_release(n);
+@@ -877,14 +885,14 @@
+
+ n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
+
+- if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
++ if (IPV4_DEVCONF_ALL(net, ARP_ACCEPT)) {
+ /* Unsolicited ARP is not accepted by default.
+ It is possible, that this option should be enabled for some
+ devices (strip is candidate)
+ */
+ if (n == NULL &&
+ arp->ar_op == htons(ARPOP_REPLY) &&
+- inet_addr_type(sip) == RTN_UNICAST)
++ inet_addr_type(net, sip) == RTN_UNICAST)
+ n = __neigh_lookup(&arp_tbl, &sip, dev, -1);
+ }
+
+@@ -966,7 +974,7 @@
+ * Set (create) an ARP cache entry.
+ */
+
+-static int arp_req_set(struct arpreq *r, struct net_device * dev)
++static int arp_req_set(struct net *net, struct arpreq *r, struct net_device * dev)
+ {
+ __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ struct neighbour *neigh;
+@@ -977,17 +985,17 @@
+ if (mask && mask != htonl(0xFFFFFFFF))
+ return -EINVAL;
+ if (!dev && (r->arp_flags & ATF_COM)) {
+- dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
++ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data);
+ if (!dev)
+ return -ENODEV;
+ }
+ if (mask) {
+- if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
++ if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL)
+ return -ENOBUFS;
+ return 0;
+ }
+ if (dev == NULL) {
+- IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
++ IPV4_DEVCONF_ALL(net, PROXY_ARP) = 1;
+ return 0;
+ }
+ if (__in_dev_get_rtnl(dev)) {
+@@ -1000,8 +1008,10 @@
+ if (r->arp_flags & ATF_PERM)
+ r->arp_flags |= ATF_COM;
+ if (dev == NULL) {
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+- .tos = RTO_ONLINK } } };
++ struct flowi fl = {
++ .fl_net = net,
++ .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } }
++ };
+ struct rtable * rt;
+ if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ return err;
+@@ -1080,7 +1090,7 @@
+ return err;
+ }
+
+-static int arp_req_delete(struct arpreq *r, struct net_device * dev)
++static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev)
+ {
+ int err;
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+@@ -1090,10 +1100,10 @@
+ __be32 mask =
+ ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+ if (mask == htonl(0xFFFFFFFF))
+- return pneigh_delete(&arp_tbl, &ip, dev);
++ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask == 0) {
+ if (dev == NULL) {
+- IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
++ IPV4_DEVCONF_ALL(net, PROXY_ARP) = 0;
+ return 0;
+ }
+ if (__in_dev_get_rtnl(dev)) {
+@@ -1107,8 +1117,10 @@
+ }
+
+ if (dev == NULL) {
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+- .tos = RTO_ONLINK } } };
++ struct flowi fl = {
++ .fl_net = net,
++ .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } }
++ };
+ struct rtable * rt;
+ if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ return err;
+@@ -1133,7 +1145,7 @@
+ * Handle an ARP layer I/O control request.
+ */
+
+-int arp_ioctl(unsigned int cmd, void __user *arg)
++int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ int err;
+ struct arpreq r;
+@@ -1165,7 +1177,7 @@
+ rtnl_lock();
+ if (r.arp_dev[0]) {
+ err = -ENODEV;
+- if ((dev = __dev_get_by_name(r.arp_dev)) == NULL)
++ if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
+ goto out;
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
+@@ -1181,10 +1193,10 @@
+
+ switch (cmd) {
+ case SIOCDARP:
+- err = arp_req_delete(&r, dev);
++ err = arp_req_delete(net, &r, dev);
+ break;
+ case SIOCSARP:
+- err = arp_req_set(&r, dev);
++ err = arp_req_set(net, &r, dev);
+ break;
+ case SIOCGARP:
+ err = arp_req_get(&r, dev);
+@@ -1201,6 +1213,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case NETDEV_CHANGEADDR:
+ neigh_changeaddr(&arp_tbl, dev);
+@@ -1227,6 +1242,54 @@
+ }
+
+
++static int arp_proc_init(struct net *net);
++static void arp_proc_exit(struct net *net);
++
++
++static int arp_net_init(struct net *net)
++{
++ int error;
++ if ((error = arp_proc_init(net)))
++ goto out_proc;
++
++ error = -ENOMEM;
++ net->arp_neigh_parms_default = neigh_parms_alloc_default(&arp_tbl, net);
++ if (!net->arp_neigh_parms_default)
++ goto out_parm;
++
++#ifdef CONFIG_SYSCTL
++ if ((error = neigh_sysctl_register(
++ NULL, net->arp_neigh_parms_default,
++ NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL)))
++ goto out_sysctl;
++#endif
++
++out:
++ return error;
++
++#ifdef CONFIG_SYSCTL
++out_sysctl:
++ neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default);
++#endif
++out_parm:
++ arp_proc_exit(net);
++out_proc:
++ goto out;
++}
++
++static void arp_net_exit(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++ neigh_sysctl_unregister(net->arp_neigh_parms_default);
++#endif
++ neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default);
++ arp_proc_exit(net);
++}
++
++static struct pernet_operations arp_net_ops = {
++ .init = arp_net_init,
++ .exit = arp_net_exit,
++};
+ /*
+ * Called once on startup.
+ */
+@@ -1236,18 +1299,12 @@
+ .func = arp_rcv,
+ };
+
+-static int arp_proc_init(void);
+-
+ void __init arp_init(void)
+ {
+ neigh_table_init(&arp_tbl);
+
+ dev_add_pack(&arp_packet_type);
+- arp_proc_init();
+-#ifdef CONFIG_SYSCTL
+- neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+- NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+-#endif
++ register_pernet_subsys(&arp_net_ops);
+ register_netdevice_notifier(&arp_netdev_notifier);
+ }
+
+@@ -1383,6 +1440,8 @@
+
+ seq = file->private_data;
+ seq->private = s;
++ s->net = get_net(PROC_NET(inode));
++
+ out:
+ return rc;
+ out_kfree:
+@@ -1390,28 +1449,46 @@
+ goto out;
+ }
+
++static int arp_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct neigh_seq_state *state = seq->private;
++ put_net(state->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations arp_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = arp_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = arp_seq_release,
+ };
+
+-static int __init arp_proc_init(void)
++static int arp_proc_init(struct net *net)
+ {
+- if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
++ if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
++static void arp_proc_exit(struct net *net)
++{
++ proc_net_remove(net, "arp");
++}
++
+ #else /* CONFIG_PROC_FS */
+
+-static int __init arp_proc_init(void)
++static int arp_proc_init(struct net *net)
+ {
+ return 0;
+ }
+
++static void arp_proc_exit(struct net *net)
++{
++ return;
++}
++
+ #endif /* CONFIG_PROC_FS */
+
+ EXPORT_SYMBOL(arp_broken_ops);
+diff -Nurb linux-2.6.22-570/net/ipv4/devinet.c linux-2.6.22-591/net/ipv4/devinet.c
+--- linux-2.6.22-570/net/ipv4/devinet.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/devinet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -63,7 +63,7 @@
+ #include <net/ip_fib.h>
+ #include <net/rtnetlink.h>
+
+-struct ipv4_devconf ipv4_devconf = {
++static struct ipv4_devconf ipv4_devconf_template = {
+ .data = {
+ [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+@@ -72,7 +72,7 @@
+ },
+ };
+
+-static struct ipv4_devconf ipv4_devconf_dflt = {
++static struct ipv4_devconf ipv4_devconf_dflt_template = {
+ .data = {
+ [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+@@ -82,7 +82,7 @@
+ },
+ };
+
+-#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
++#define IPV4_DEVCONF_DFLT(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf_dflt), attr)
+
+ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
+ [IFA_LOCAL] = { .type = NLA_U32 },
+@@ -98,7 +98,7 @@
+ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ int destroy);
+ #ifdef CONFIG_SYSCTL
+-static void devinet_sysctl_register(struct in_device *in_dev,
++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev,
+ struct ipv4_devconf *p);
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+ #endif
+@@ -149,6 +149,7 @@
+
+ static struct in_device *inetdev_init(struct net_device *dev)
+ {
++ struct net *net = dev->nd_net;
+ struct in_device *in_dev;
+
+ ASSERT_RTNL();
+@@ -157,7 +158,7 @@
+ if (!in_dev)
+ goto out;
+ INIT_RCU_HEAD(&in_dev->rcu_head);
+- memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
++ memcpy(&in_dev->cnf, &net->ipv4_devconf_dflt, sizeof(in_dev->cnf));
+ in_dev->cnf.sysctl = NULL;
+ in_dev->dev = dev;
+ if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
+@@ -173,7 +174,7 @@
+ in_dev_hold(in_dev);
+
+ #ifdef CONFIG_SYSCTL
+- devinet_sysctl_register(in_dev, &in_dev->cnf);
++ devinet_sysctl_register(net, in_dev, &in_dev->cnf);
+ #endif
+ ip_mc_init_dev(in_dev);
+ if (dev->flags & IFF_UP)
+@@ -203,8 +204,6 @@
+ ASSERT_RTNL();
+
+ dev = in_dev->dev;
+- if (dev == &loopback_dev)
+- return;
+
+ in_dev->dead = 1;
+
+@@ -415,12 +414,12 @@
+ return inet_insert_ifa(ifa);
+ }
+
+-struct in_device *inetdev_by_index(int ifindex)
++struct in_device *inetdev_by_index(struct net *net, int ifindex)
+ {
+ struct net_device *dev;
+ struct in_device *in_dev = NULL;
+ read_lock(&dev_base_lock);
+- dev = __dev_get_by_index(ifindex);
++ dev = __dev_get_by_index(net, ifindex);
+ if (dev)
+ in_dev = in_dev_get(dev);
+ read_unlock(&dev_base_lock);
+@@ -444,6 +443,7 @@
+
+ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in_device *in_dev;
+ struct ifaddrmsg *ifm;
+@@ -457,7 +457,7 @@
+ goto errout;
+
+ ifm = nlmsg_data(nlh);
+- in_dev = inetdev_by_index(ifm->ifa_index);
++ in_dev = inetdev_by_index(net, ifm->ifa_index);
+ if (in_dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+@@ -488,7 +488,7 @@
+ return err;
+ }
+
+-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
++static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+ {
+ struct nlattr *tb[IFA_MAX+1];
+ struct in_ifaddr *ifa;
+@@ -507,7 +507,7 @@
+ goto errout;
+ }
+
+- dev = __dev_get_by_index(ifm->ifa_index);
++ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout;
+@@ -564,11 +564,12 @@
+
+ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct in_ifaddr *ifa;
+
+ ASSERT_RTNL();
+
+- ifa = rtm_to_ifaddr(nlh);
++ ifa = rtm_to_ifaddr(net, nlh);
+ if (IS_ERR(ifa))
+ return PTR_ERR(ifa);
+
+@@ -600,7 +601,7 @@
+ }
+
+
+-int devinet_ioctl(unsigned int cmd, void __user *arg)
++int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ struct ifreq ifr;
+ struct sockaddr_in sin_orig;
+@@ -629,7 +630,7 @@
+ *colon = 0;
+
+ #ifdef CONFIG_KMOD
+- dev_load(ifr.ifr_name);
++ dev_load(net, ifr.ifr_name);
+ #endif
+
+ switch (cmd) {
+@@ -670,7 +671,7 @@
+ rtnl_lock();
+
+ ret = -ENODEV;
+- if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++ if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
+ goto done;
+
+ if (colon)
+@@ -889,6 +890,7 @@
+
+ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
+ {
++ struct net *net = dev->nd_net;
+ __be32 addr = 0;
+ struct in_device *in_dev;
+
+@@ -919,7 +921,7 @@
+ */
+ read_lock(&dev_base_lock);
+ rcu_read_lock();
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
+ continue;
+
+@@ -982,7 +984,7 @@
+ * - local: address, 0=autoselect the local address
+ * - scope: maximum allowed scope value for the local address
+ */
+-__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
++__be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope)
+ {
+ __be32 addr = 0;
+ struct in_device *in_dev;
+@@ -998,7 +1000,7 @@
+
+ read_lock(&dev_base_lock);
+ rcu_read_lock();
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if ((in_dev = __in_dev_get_rcu(dev))) {
+ addr = confirm_addr_indev(in_dev, dst, local, scope);
+ if (addr)
+@@ -1059,6 +1061,7 @@
+ void *ptr)
+ {
+ struct net_device *dev = ptr;
++ struct net *net = dev->nd_net;
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
+
+ ASSERT_RTNL();
+@@ -1066,7 +1069,7 @@
+ if (!in_dev) {
+ if (event == NETDEV_REGISTER) {
+ in_dev = inetdev_init(dev);
+- if (dev == &loopback_dev) {
++ if (dev == &net->loopback_dev) {
+ if (!in_dev)
+ panic("devinet: "
+ "Failed to create loopback\n");
+@@ -1085,7 +1088,7 @@
+ case NETDEV_UP:
+ if (dev->mtu < 68)
+ break;
+- if (dev == &loopback_dev) {
++ if (dev == &net->loopback_dev) {
+ struct in_ifaddr *ifa;
+ if ((ifa = inet_alloc_ifa()) != NULL) {
+ ifa->ifa_local =
+@@ -1122,7 +1125,7 @@
+ neigh_sysctl_unregister(in_dev->arp_parms);
+ neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
+ NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+- devinet_sysctl_register(in_dev, &in_dev->cnf);
++ devinet_sysctl_register(net, in_dev, &in_dev->cnf);
+ #endif
+ break;
+ }
+@@ -1185,6 +1188,7 @@
+
+ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx, ip_idx;
+ struct net_device *dev;
+ struct in_device *in_dev;
+@@ -1194,7 +1198,7 @@
+
+ s_ip_idx = ip_idx = cb->args[1];
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+@@ -1228,6 +1232,7 @@
+ u32 pid)
+ {
+ struct sk_buff *skb;
++ struct net *net = ifa->ifa_dev->dev->nd_net;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ int err = -ENOBUFS;
+
+@@ -1242,25 +1247,25 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
++ err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
++ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
+ }
+
+ #ifdef CONFIG_SYSCTL
+
+-static void devinet_copy_dflt_conf(int i)
++static void devinet_copy_dflt_conf(struct net *net, int i)
+ {
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ struct in_device *in_dev;
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev && !test_bit(i, in_dev->cnf.state))
+- in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
++ in_dev->cnf.data[i] = net->ipv4_devconf_dflt->data[i];
+ rcu_read_unlock();
+ }
+ read_unlock(&dev_base_lock);
+@@ -1274,12 +1279,13 @@
+
+ if (write) {
+ struct ipv4_devconf *cnf = ctl->extra1;
++ struct net *net = ctl->extra2;
+ int i = (int *)ctl->data - cnf->data;
+
+ set_bit(i, cnf->state);
+
+- if (cnf == &ipv4_devconf_dflt)
+- devinet_copy_dflt_conf(i);
++ if (cnf == net->ipv4_devconf_dflt)
++ devinet_copy_dflt_conf(net, i);
+ }
+
+ return ret;
+@@ -1291,6 +1297,7 @@
+ {
+ struct ipv4_devconf *cnf;
+ int *valp = table->data;
++ struct net *net;
+ int new;
+ int i;
+
+@@ -1325,26 +1332,27 @@
+ *valp = new;
+
+ cnf = table->extra1;
++ net = table->extra2;
+ i = (int *)table->data - cnf->data;
+
+ set_bit(i, cnf->state);
+
+- if (cnf == &ipv4_devconf_dflt)
+- devinet_copy_dflt_conf(i);
++ if (cnf == net->ipv4_devconf_dflt)
++ devinet_copy_dflt_conf(net, i);
+
+ return 1;
+ }
+
+-void inet_forward_change(void)
++void inet_forward_change(struct net *net)
+ {
+ struct net_device *dev;
+- int on = IPV4_DEVCONF_ALL(FORWARDING);
++ int on = IPV4_DEVCONF_ALL(net, FORWARDING);
+
+- IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+- IPV4_DEVCONF_DFLT(FORWARDING) = on;
++ IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
++ IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(net, dev) {
+ struct in_device *in_dev;
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dev);
+@@ -1364,11 +1372,12 @@
+ int *valp = ctl->data;
+ int val = *valp;
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++ struct net *net = ctl->extra2;
+
+ if (write && *valp != val) {
+- if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
+- inet_forward_change();
+- else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
++ if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
++ inet_forward_change(net);
++ else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
+ rt_cache_flush(0);
+ }
+
+@@ -1407,13 +1416,14 @@
+ { \
+ .ctl_name = NET_IPV4_CONF_ ## attr, \
+ .procname = name, \
+- .data = ipv4_devconf.data + \
++ .data = ipv4_devconf_template.data + \
+ NET_IPV4_CONF_ ## attr - 1, \
+ .maxlen = sizeof(int), \
+ .mode = mval, \
+ .proc_handler = proc, \
+ .strategy = sysctl, \
+- .extra1 = &ipv4_devconf, \
++ .extra1 = &ipv4_devconf_template, \
++ .extra2 = &init_net, \
+ }
+
+ #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
+@@ -1503,25 +1513,29 @@
+ },
+ };
+
+-static void devinet_sysctl_register(struct in_device *in_dev,
++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev,
+ struct ipv4_devconf *p)
+ {
+ int i;
+ struct net_device *dev = in_dev ? in_dev->dev : NULL;
+- struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
+- GFP_KERNEL);
++ struct devinet_sysctl_table *t;
+ char *dev_name = NULL;
+
++ t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return;
+ for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+- t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
++ t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf_template;
+ t->devinet_vars[i].extra1 = p;
++ t->devinet_vars[i].extra2 = net;
+ }
+
+ if (dev) {
+ dev_name = dev->name;
+ t->devinet_dev[0].ctl_name = dev->ifindex;
++ } else if (p == net->ipv4_devconf) {
++ dev_name = "all";
++ t->devinet_dev[0].ctl_name = NET_PROTO_CONF_ALL;
+ } else {
+ dev_name = "default";
+ t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+@@ -1542,7 +1556,7 @@
+ t->devinet_proto_dir[0].child = t->devinet_conf_dir;
+ t->devinet_root_dir[0].child = t->devinet_proto_dir;
+
+- t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
++ t->sysctl_header = register_net_sysctl_table(net, t->devinet_root_dir);
+ if (!t->sysctl_header)
+ goto free_procname;
+
+@@ -1562,26 +1576,59 @@
+ if (p->sysctl) {
+ struct devinet_sysctl_table *t = p->sysctl;
+ p->sysctl = NULL;
+- unregister_sysctl_table(t->sysctl_header);
++ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t->devinet_dev[0].procname);
+ kfree(t);
+ }
+ }
+ #endif
+
++static int devinet_net_init(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++ net->ipv4_devconf = kmemdup(&ipv4_devconf_template,
++ sizeof(ipv4_devconf_template), GFP_KERNEL);
++ if (!net->ipv4_devconf)
++ return -ENOMEM;
++
++ net->ipv4_devconf_dflt = kmemdup(&ipv4_devconf_dflt_template,
++ sizeof(ipv4_devconf_template),
++ GFP_KERNEL);
++ if (!net->ipv4_devconf_dflt) {
++ kfree(net->ipv4_devconf);
++ return -ENOMEM;
++ }
++
++ devinet_sysctl_register(net, NULL, net->ipv4_devconf);
++ devinet_sysctl_register(net, NULL, net->ipv4_devconf_dflt);
++
++ multi_ipv4_table[0].data = &IPV4_DEVCONF_ALL(net, FORWARDING);
++#endif
++ return 0;
++}
++
++static void devinet_net_exit(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++ devinet_sysctl_unregister(net->ipv4_devconf_dflt);
++ devinet_sysctl_unregister(net->ipv4_devconf);
++#endif
++}
++
++static struct pernet_operations devinet_net_ops = {
++ .init = devinet_net_init,
++ .exit = devinet_net_exit,
++};
++
+ void __init devinet_init(void)
+ {
++ register_pernet_subsys(&devinet_net_ops);
+ register_gifconf(PF_INET, inet_gifconf);
+ register_netdevice_notifier(&ip_netdev_notifier);
+
+ rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+ rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+ rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
+-#ifdef CONFIG_SYSCTL
+- devinet_sysctl.sysctl_header =
+- register_sysctl_table(devinet_sysctl.devinet_root_dir);
+- devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+-#endif
+ }
+
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+diff -Nurb linux-2.6.22-570/net/ipv4/esp4.c linux-2.6.22-591/net/ipv4/esp4.c
+--- linux-2.6.22-570/net/ipv4/esp4.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/esp4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -307,6 +307,9 @@
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
+ struct xfrm_state *x;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+@@ -481,3 +484,4 @@
+ module_init(esp4_init);
+ module_exit(esp4_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_frontend.c linux-2.6.22-591/net/ipv4/fib_frontend.c
+--- linux-2.6.22-570/net/ipv4/fib_frontend.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/fib_frontend.c 2007-12-21 15:36:15.000000000 -0500
+@@ -51,38 +51,34 @@
+
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+
+-struct fib_table *ip_fib_local_table;
+-struct fib_table *ip_fib_main_table;
+-
+ #define FIB_TABLE_HASHSZ 1
+-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+ #else
+
+ #define FIB_TABLE_HASHSZ 256
+-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+-struct fib_table *fib_new_table(u32 id)
++struct fib_table *fib_new_table(struct net *net, u32 id)
+ {
+ struct fib_table *tb;
+ unsigned int h;
+
+ if (id == 0)
+ id = RT_TABLE_MAIN;
+- tb = fib_get_table(id);
++ tb = fib_get_table(net, id);
+ if (tb)
+ return tb;
+ tb = fib_hash_init(id);
+ if (!tb)
+ return NULL;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+- hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
++ hlist_add_head_rcu(&tb->tb_hlist, &net->ip_fib_table_hash[h]);
+ return tb;
+ }
+
+-struct fib_table *fib_get_table(u32 id)
++struct fib_table *fib_get_table(struct net *net, u32 id)
+ {
+ struct fib_table *tb;
++ struct hlist_head *head;
+ struct hlist_node *node;
+ unsigned int h;
+
+@@ -90,7 +86,8 @@
+ id = RT_TABLE_MAIN;
+ h = id & (FIB_TABLE_HASHSZ - 1);
+ rcu_read_lock();
+- hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
++ head = &net->ip_fib_table_hash[h];
++ hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+ if (tb->tb_id == id) {
+ rcu_read_unlock();
+ return tb;
+@@ -99,9 +96,10 @@
+ rcu_read_unlock();
+ return NULL;
+ }
++
+ #endif /* CONFIG_IP_MULTIPLE_TABLES */
+
+-static void fib_flush(void)
++static void fib_flush(struct net *net)
+ {
+ int flushed = 0;
+ struct fib_table *tb;
+@@ -109,7 +107,8 @@
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+- hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
++ struct hlist_head *head = &net->ip_fib_table_hash[h];
++ hlist_for_each_entry(tb, node, head, tb_hlist)
+ flushed += tb->tb_flush(tb);
+ }
+
+@@ -121,18 +120,23 @@
+ * Find the first device with a given source address.
+ */
+
+-struct net_device * ip_dev_find(__be32 addr)
++struct net_device * ip_dev_find(struct net *net, __be32 addr)
+ {
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
++ struct flowi fl = {
++ .fl_net = net,
++ .nl_u = { .ip4_u = { .daddr = addr } }
++ };
+ struct fib_result res;
+ struct net_device *dev = NULL;
++ struct fib_table *local_table;
+
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ res.r = NULL;
+ #endif
+
+- if (!ip_fib_local_table ||
+- ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
++ local_table = fib_get_table(net, RT_TABLE_LOCAL);
++ if (!local_table ||
++ local_table->tb_lookup(local_table, &fl, &res))
+ return NULL;
+ if (res.type != RTN_LOCAL)
+ goto out;
+@@ -145,11 +149,15 @@
+ return dev;
+ }
+
+-unsigned inet_addr_type(__be32 addr)
++unsigned inet_addr_type(struct net *net, __be32 addr)
+ {
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
++ struct flowi fl = {
++ .fl_net = net,
++ .nl_u = { .ip4_u = { .daddr = addr } }
++ };
+ struct fib_result res;
+ unsigned ret = RTN_BROADCAST;
++ struct fib_table *local_table;
+
+ if (ZERONET(addr) || BADCLASS(addr))
+ return RTN_BROADCAST;
+@@ -160,10 +168,10 @@
+ res.r = NULL;
+ #endif
+
+- if (ip_fib_local_table) {
++ local_table = fib_get_table(net, RT_TABLE_LOCAL);
++ if (local_table) {
+ ret = RTN_UNICAST;
+- if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
+- &fl, &res)) {
++ if (!local_table->tb_lookup(local_table, &fl, &res)) {
+ ret = res.type;
+ fib_res_put(&res);
+ }
+@@ -183,7 +191,8 @@
+ struct net_device *dev, __be32 *spec_dst, u32 *itag)
+ {
+ struct in_device *in_dev;
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = dev->nd_net,
++ .nl_u = { .ip4_u =
+ { .daddr = src,
+ .saddr = dst,
+ .tos = tos } },
+@@ -267,13 +276,16 @@
+ return len + nla_total_size(4);
+ }
+
+-static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
++static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
+ struct fib_config *cfg)
+ {
+ __be32 addr;
+ int plen;
+
+ memset(cfg, 0, sizeof(*cfg));
++ cfg->fc_nlinfo.pid = 0;
++ cfg->fc_nlinfo.nlh = NULL;
++ cfg->fc_nlinfo.net = net;
+
+ if (rt->rt_dst.sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+@@ -334,7 +346,7 @@
+ colon = strchr(devname, ':');
+ if (colon)
+ *colon = 0;
+- dev = __dev_get_by_name(devname);
++ dev = __dev_get_by_name(net, devname);
+ if (!dev)
+ return -ENODEV;
+ cfg->fc_oif = dev->ifindex;
+@@ -357,7 +369,7 @@
+ if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ cfg->fc_gw = addr;
+ if (rt->rt_flags & RTF_GATEWAY &&
+- inet_addr_type(addr) == RTN_UNICAST)
++ inet_addr_type(net, addr) == RTN_UNICAST)
+ cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ }
+
+@@ -398,7 +410,7 @@
+ * Handle IP routing ioctl calls. These are used to manipulate the routing tables
+ */
+
+-int ip_rt_ioctl(unsigned int cmd, void __user *arg)
++int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ struct fib_config cfg;
+ struct rtentry rt;
+@@ -414,18 +426,18 @@
+ return -EFAULT;
+
+ rtnl_lock();
+- err = rtentry_to_fib_config(cmd, &rt, &cfg);
++ err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ if (err == 0) {
+ struct fib_table *tb;
+
+ if (cmd == SIOCDELRT) {
+- tb = fib_get_table(cfg.fc_table);
++ tb = fib_get_table(net, cfg.fc_table);
+ if (tb)
+ err = tb->tb_delete(tb, &cfg);
+ else
+ err = -ESRCH;
+ } else {
+- tb = fib_new_table(cfg.fc_table);
++ tb = fib_new_table(net, cfg.fc_table);
+ if (tb)
+ err = tb->tb_insert(tb, &cfg);
+ else
+@@ -453,7 +465,6 @@
+ [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+ [RTA_PROTOINFO] = { .type = NLA_U32 },
+ [RTA_FLOW] = { .type = NLA_U32 },
+- [RTA_MP_ALGO] = { .type = NLA_U32 },
+ };
+
+ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+@@ -481,6 +492,7 @@
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
++ cfg->fc_nlinfo.net = skb->sk->sk_net;
+
+ if (cfg->fc_type > RTN_MAX) {
+ err = -EINVAL;
+@@ -515,9 +527,6 @@
+ case RTA_FLOW:
+ cfg->fc_flow = nla_get_u32(attr);
+ break;
+- case RTA_MP_ALGO:
+- cfg->fc_mp_alg = nla_get_u32(attr);
+- break;
+ case RTA_TABLE:
+ cfg->fc_table = nla_get_u32(attr);
+ break;
+@@ -531,6 +540,7 @@
+
+ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
+@@ -539,7 +549,7 @@
+ if (err < 0)
+ goto errout;
+
+- tb = fib_get_table(cfg.fc_table);
++ tb = fib_get_table(net, cfg.fc_table);
+ if (tb == NULL) {
+ err = -ESRCH;
+ goto errout;
+@@ -552,6 +562,7 @@
+
+ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib_config cfg;
+ struct fib_table *tb;
+ int err;
+@@ -560,7 +571,7 @@
+ if (err < 0)
+ goto errout;
+
+- tb = fib_new_table(cfg.fc_table);
++ tb = fib_new_table(net, cfg.fc_table);
+ if (tb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+@@ -573,6 +584,7 @@
+
+ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct fib_table *tb;
+@@ -587,8 +599,9 @@
+ s_e = cb->args[1];
+
+ for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
++ struct hlist_head *head = &net->ip_fib_table_hash[h];
+ e = 0;
+- hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
++ hlist_for_each_entry(tb, node, head, tb_hlist) {
+ if (e < s_e)
+ goto next;
+ if (dumped)
+@@ -617,6 +630,7 @@
+
+ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+ {
++ struct net *net = ifa->ifa_dev->dev->nd_net;
+ struct fib_table *tb;
+ struct fib_config cfg = {
+ .fc_protocol = RTPROT_KERNEL,
+@@ -626,12 +640,13 @@
+ .fc_prefsrc = ifa->ifa_local,
+ .fc_oif = ifa->ifa_dev->dev->ifindex,
+ .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
++ .fc_nlinfo.net = net,
+ };
+
+ if (type == RTN_UNICAST)
+- tb = fib_new_table(RT_TABLE_MAIN);
++ tb = fib_new_table(net, RT_TABLE_MAIN);
+ else
+- tb = fib_new_table(RT_TABLE_LOCAL);
++ tb = fib_new_table(net, RT_TABLE_LOCAL);
+
+ if (tb == NULL)
+ return;
+@@ -692,6 +707,7 @@
+ {
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct net_device *dev = in_dev->dev;
++ struct net *net = dev->nd_net;
+ struct in_ifaddr *ifa1;
+ struct in_ifaddr *prim = ifa;
+ __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
+@@ -740,15 +756,15 @@
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+
+ /* Check, that this local address finally disappeared. */
+- if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
++ if (inet_addr_type(net, ifa->ifa_local) != RTN_LOCAL) {
+ /* And the last, but not the least thing.
+ We must flush stray FIB entries.
+
+ First of all, we scan fib_info list searching
+ for stray nexthop entries, then ignite fib_flush.
+ */
+- if (fib_sync_down(ifa->ifa_local, NULL, 0))
+- fib_flush();
++ if (fib_sync_down(net, ifa->ifa_local, NULL, 0))
++ fib_flush(net);
+ }
+ }
+ #undef LOCAL_OK
+@@ -757,11 +773,12 @@
+ #undef BRD1_OK
+ }
+
+-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
++static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn, struct fib_table *tb )
+ {
+
+ struct fib_result res;
+- struct flowi fl = { .mark = frn->fl_mark,
++ struct flowi fl = { .fl_net = net,
++ .mark = frn->fl_mark,
+ .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
+ .tos = frn->fl_tos,
+ .scope = frn->fl_scope } } };
+@@ -790,6 +807,7 @@
+
+ static void nl_fib_input(struct sock *sk, int len)
+ {
++ struct net *net = sk->sk_net;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh = NULL;
+ struct fib_result_nl *frn;
+@@ -808,9 +826,9 @@
+ }
+
+ frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
+- tb = fib_get_table(frn->tb_id_in);
++ tb = fib_get_table(net, frn->tb_id_in);
+
+- nl_fib_lookup(frn, tb);
++ nl_fib_lookup(net, frn, tb);
+
+ pid = NETLINK_CB(skb).pid; /* pid of sending process */
+ NETLINK_CB(skb).pid = 0; /* from kernel */
+@@ -818,16 +836,36 @@
+ netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
+ }
+
+-static void nl_fib_lookup_init(void)
++static int nl_fib_lookup_init(struct net *net)
+ {
+- netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+- THIS_MODULE);
++ int error = -ENOMEM;
++ struct sock *sk;
++ sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, nl_fib_input,
++ NULL, THIS_MODULE);
++ if (sk) {
++ /* Don't hold an extra reference on the namespace */
++ put_net(sk->sk_net);
++ net->nlfl = sk;
++ error = 0;
++ }
++ return error;
++}
++
++static void nl_fib_lookup_exit(struct net *net)
++{
++ /* At the last minute lie and say this is a socket for the
++ * initial network namespace. So the socket will be safe to
++ * free.
++ */
++ net->nlfl->sk_net = get_net(&init_net);
++ sock_put(net->nlfl);
+ }
+
+ static void fib_disable_ip(struct net_device *dev, int force)
+ {
+- if (fib_sync_down(0, dev, force))
+- fib_flush();
++ struct net *net = dev->nd_net;
++ if (fib_sync_down(net, 0, dev, force))
++ fib_flush(net);
+ rt_cache_flush(0);
+ arp_ifdown(dev);
+ }
+@@ -864,6 +902,9 @@
+ struct net_device *dev = ptr;
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_UNREGISTER) {
+ fib_disable_ip(dev, 2);
+ return NOTIFY_DONE;
+@@ -893,6 +934,85 @@
+ return NOTIFY_DONE;
+ }
+
++static int ip_fib_net_init(struct net *net)
++{
++ unsigned int i;
++
++ net->ip_fib_table_hash = kzalloc(
++ sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
++ if (!net->ip_fib_table_hash)
++ return -ENOMEM;
++
++ for (i = 0; i < FIB_TABLE_HASHSZ; i++)
++ INIT_HLIST_HEAD(&net->ip_fib_table_hash[i]);
++#ifndef CONFIG_IP_MULTIPLE_TABLES
++ net->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
++ hlist_add_head_rcu(&net->ip_fib_local_table->tb_hlist,
++ &net->ip_fib_table_hash[0]);
++ net->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
++ hlist_add_head_rcu(&net->ip_fib_main_table->tb_hlist,
++ &net->ip_fib_table_hash[0]);
++#else
++ fib4_rules_init(net);
++#endif
++ return 0;
++}
++
++static void ip_fib_net_exit(struct net *net)
++{
++ unsigned int i;
++
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++ fib4_rules_exit(net);
++#endif
++
++ synchronize_rcu(); /* needed? */
++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++ struct fib_table *tb;
++ struct hlist_head *head;
++ struct hlist_node *node, *tmp;
++
++ head = &net->ip_fib_table_hash[i];
++ hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
++ hlist_del(node);
++ fib_hash_exit(tb);
++ }
++ }
++ kfree(net->ip_fib_table_hash);
++}
++
++static int fib_net_init(struct net *net)
++{
++ int error;
++
++ error = 0;
++ if ((error = ip_fib_net_init(net)))
++ goto out;
++ if ((error = fib_info_init(net)))
++ goto out_info;
++ if ((error = nl_fib_lookup_init(net)))
++ goto out_nlfl;
++ if ((error = fib_proc_init(net)))
++ goto out_proc;
++out:
++ return error;
++out_proc:
++ nl_fib_lookup_exit(net);
++out_nlfl:
++ fib_info_exit(net);
++out_info:
++ ip_fib_net_exit(net);
++ goto out;
++}
++
++static void fib_net_exit(struct net *net)
++{
++ fib_proc_exit(net);
++ nl_fib_lookup_exit(net);
++ fib_info_exit(net);
++ ip_fib_net_exit(net);
++}
++
+ static struct notifier_block fib_inetaddr_notifier = {
+ .notifier_call =fib_inetaddr_event,
+ };
+@@ -901,28 +1021,20 @@
+ .notifier_call =fib_netdev_event,
+ };
+
++static struct pernet_operations fib_net_ops = {
++ .init = fib_net_init,
++ .exit = fib_net_exit,
++};
++
+ void __init ip_fib_init(void)
+ {
+- unsigned int i;
+-
+- for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+- INIT_HLIST_HEAD(&fib_table_hash[i]);
+-#ifndef CONFIG_IP_MULTIPLE_TABLES
+- ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+- hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
+- ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
+- hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+-#else
+- fib4_rules_init();
+-#endif
+-
+- register_netdevice_notifier(&fib_netdev_notifier);
+- register_inetaddr_notifier(&fib_inetaddr_notifier);
+- nl_fib_lookup_init();
+-
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
++
++ register_pernet_subsys(&fib_net_ops);
++ register_netdevice_notifier(&fib_netdev_notifier);
++ register_inetaddr_notifier(&fib_inetaddr_notifier);
+ }
+
+ EXPORT_SYMBOL(inet_addr_type);
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_hash.c linux-2.6.22-591/net/ipv4/fib_hash.c
+--- linux-2.6.22-570/net/ipv4/fib_hash.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/fib_hash.c 2007-12-21 15:36:15.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <net/route.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/ip_fib.h>
+
+ #include "fib_lookup.h"
+@@ -274,11 +275,10 @@
+ return err;
+ }
+
+-static int fn_hash_last_dflt=-1;
+-
+ static void
+ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
++ struct net *net = flp->fl_net;
+ int order, last_idx;
+ struct hlist_node *node;
+ struct fib_node *f;
+@@ -316,12 +316,12 @@
+ if (next_fi != res->fi)
+ break;
+ } else if (!fib_detect_death(fi, order, &last_resort,
+- &last_idx, &fn_hash_last_dflt)) {
++ &last_idx, &net->fn_hash_last_dflt)) {
+ if (res->fi)
+ fib_info_put(res->fi);
+ res->fi = fi;
+ atomic_inc(&fi->fib_clntref);
+- fn_hash_last_dflt = order;
++ net->fn_hash_last_dflt = order;
+ goto out;
+ }
+ fi = next_fi;
+@@ -330,16 +330,16 @@
+ }
+
+ if (order <= 0 || fi == NULL) {
+- fn_hash_last_dflt = -1;
++ net->fn_hash_last_dflt = -1;
+ goto out;
+ }
+
+- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
++ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) {
+ if (res->fi)
+ fib_info_put(res->fi);
+ res->fi = fi;
+ atomic_inc(&fi->fib_clntref);
+- fn_hash_last_dflt = order;
++ net->fn_hash_last_dflt = order;
+ goto out;
+ }
+
+@@ -350,7 +350,7 @@
+ if (last_resort)
+ atomic_inc(&last_resort->fib_clntref);
+ }
+- fn_hash_last_dflt = last_idx;
++ net->fn_hash_last_dflt = last_idx;
+ out:
+ read_unlock(&fib_hash_lock);
+ }
+@@ -759,11 +759,15 @@
+ return skb->len;
+ }
+
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++void fib_hash_exit(struct fib_table *tb)
++{
++ if (!tb)
++ return;
++ fn_hash_flush(tb);
++ kfree(tb);
++}
++
+ struct fib_table * fib_hash_init(u32 id)
+-#else
+-struct fib_table * __init fib_hash_init(u32 id)
+-#endif
+ {
+ struct fib_table *tb;
+
+@@ -799,6 +803,7 @@
+ #ifdef CONFIG_PROC_FS
+
+ struct fib_iter_state {
++ struct net *net;
+ struct fn_zone *zone;
+ int bucket;
+ struct hlist_head *hash_head;
+@@ -812,7 +817,8 @@
+ static struct fib_alias *fib_get_first(struct seq_file *seq)
+ {
+ struct fib_iter_state *iter = seq->private;
+- struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
++ struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN);
++ struct fn_hash *table = (struct fn_hash *) main_table->tb_data;
+
+ iter->bucket = 0;
+ iter->hash_head = NULL;
+@@ -948,10 +954,11 @@
+
+ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++ struct fib_iter_state *iter = seq->private;
+ void *v = NULL;
+
+ read_lock(&fib_hash_lock);
+- if (ip_fib_main_table)
++ if (fib_get_table(iter->net, RT_TABLE_MAIN))
+ v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ return v;
+ }
+@@ -1051,6 +1058,7 @@
+
+ seq = file->private_data;
+ seq->private = s;
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -1058,23 +1066,32 @@
+ goto out;
+ }
+
++static int fib_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct fib_iter_state *iter = seq->private;
++ put_net(iter->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations fib_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = fib_seq_release,
+ };
+
+-int __init fib_proc_init(void)
++int fib_proc_init(struct net *net)
+ {
+- if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
++ net->fn_hash_last_dflt = -1;
++ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+-void __init fib_proc_exit(void)
++void fib_proc_exit(struct net *net)
+ {
+- proc_net_remove("route");
++ proc_net_remove(net, "route");
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_rules.c linux-2.6.22-591/net/ipv4/fib_rules.c
+--- linux-2.6.22-570/net/ipv4/fib_rules.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/fib_rules.c 2007-12-21 15:36:15.000000000 -0500
+@@ -32,8 +32,6 @@
+ #include <net/ip_fib.h>
+ #include <net/fib_rules.h>
+
+-static struct fib_rules_ops fib4_rules_ops;
+-
+ struct fib4_rule
+ {
+ struct fib_rule common;
+@@ -49,35 +47,14 @@
+ #endif
+ };
+
+-static struct fib4_rule default_rule = {
+- .common = {
+- .refcnt = ATOMIC_INIT(2),
+- .pref = 0x7FFF,
+- .table = RT_TABLE_DEFAULT,
+- .action = FR_ACT_TO_TBL,
+- },
++struct fib4_rule_table {
++ struct list_head fib4_rules;
++ struct fib4_rule default_rule;
++ struct fib4_rule main_rule;
++ struct fib4_rule local_rule;
++ struct fib_rules_ops fib4_rules_ops;
+ };
+
+-static struct fib4_rule main_rule = {
+- .common = {
+- .refcnt = ATOMIC_INIT(2),
+- .pref = 0x7FFE,
+- .table = RT_TABLE_MAIN,
+- .action = FR_ACT_TO_TBL,
+- },
+-};
+-
+-static struct fib4_rule local_rule = {
+- .common = {
+- .refcnt = ATOMIC_INIT(2),
+- .table = RT_TABLE_LOCAL,
+- .action = FR_ACT_TO_TBL,
+- .flags = FIB_RULE_PERMANENT,
+- },
+-};
+-
+-static LIST_HEAD(fib4_rules);
+-
+ #ifdef CONFIG_NET_CLS_ROUTE
+ u32 fib_rules_tclass(struct fib_result *res)
+ {
+@@ -87,12 +64,14 @@
+
+ int fib_lookup(struct flowi *flp, struct fib_result *res)
+ {
++ struct net *net = flp->fl_net;
++ struct fib4_rule_table *table = net->fib4_table;
+ struct fib_lookup_arg arg = {
+ .result = res,
+ };
+ int err;
+
+- err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
++ err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg);
+ res->r = arg.rule;
+
+ return err;
+@@ -122,7 +101,7 @@
+ goto errout;
+ }
+
+- if ((tbl = fib_get_table(rule->table)) == NULL)
++ if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL)
+ goto errout;
+
+ err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+@@ -138,7 +117,7 @@
+ if (res->r && res->r->action == FR_ACT_TO_TBL &&
+ FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+ struct fib_table *tb;
+- if ((tb = fib_get_table(res->r->table)) != NULL)
++ if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL)
+ tb->tb_select_default(tb, flp, res);
+ }
+ }
+@@ -159,13 +138,13 @@
+ return 1;
+ }
+
+-static struct fib_table *fib_empty_table(void)
++static struct fib_table *fib_empty_table(struct net *net)
+ {
+ u32 id;
+
+ for (id = 1; id <= RT_TABLE_MAX; id++)
+- if (fib_get_table(id) == NULL)
+- return fib_new_table(id);
++ if (fib_get_table(net, id) == NULL)
++ return fib_new_table(net, id);
+ return NULL;
+ }
+
+@@ -178,6 +157,7 @@
+ struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct nlattr **tb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int err = -EINVAL;
+ struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+@@ -188,7 +168,7 @@
+ if (rule->action == FR_ACT_TO_TBL) {
+ struct fib_table *table;
+
+- table = fib_empty_table();
++ table = fib_empty_table(net);
+ if (table == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+@@ -274,14 +254,15 @@
+ return -ENOBUFS;
+ }
+
+-static u32 fib4_rule_default_pref(void)
++static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
+ {
+- struct list_head *pos;
++ struct list_head *list, *pos;
+ struct fib_rule *rule;
+
+- if (!list_empty(&fib4_rules)) {
+- pos = fib4_rules.next;
+- if (pos->next != &fib4_rules) {
++ list = ops->rules_list;
++ if (!list_empty(list)) {
++ pos = list->next;
++ if (pos->next != list) {
+ rule = list_entry(pos->next, struct fib_rule, list);
+ if (rule->pref)
+ return rule->pref - 1;
+@@ -298,12 +279,37 @@
+ + nla_total_size(4); /* flow */
+ }
+
+-static void fib4_rule_flush_cache(void)
++static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
+ {
+ rt_cache_flush(-1);
+ }
+
+-static struct fib_rules_ops fib4_rules_ops = {
++static struct fib4_rule_table fib4_rule_table = {
++ .default_rule = {
++ .common = {
++ .refcnt = ATOMIC_INIT(2),
++ .pref = 0x7FFF,
++ .table = RT_TABLE_DEFAULT,
++ .action = FR_ACT_TO_TBL,
++ },
++ },
++ .main_rule = {
++ .common = {
++ .refcnt = ATOMIC_INIT(2),
++ .pref = 0x7FFE,
++ .table = RT_TABLE_MAIN,
++ .action = FR_ACT_TO_TBL,
++ },
++ },
++ .local_rule = {
++ .common = {
++ .refcnt = ATOMIC_INIT(2),
++ .table = RT_TABLE_LOCAL,
++ .action = FR_ACT_TO_TBL,
++ .flags = FIB_RULE_PERMANENT,
++ },
++ },
++ .fib4_rules_ops = {
+ .family = AF_INET,
+ .rule_size = sizeof(struct fib4_rule),
+ .addr_size = sizeof(u32),
+@@ -317,15 +323,34 @@
+ .flush_cache = fib4_rule_flush_cache,
+ .nlgroup = RTNLGRP_IPV4_RULE,
+ .policy = fib4_rule_policy,
+- .rules_list = &fib4_rules,
++ .rules_list = &fib4_rule_table.fib4_rules, /* &fib4_rules, */
+ .owner = THIS_MODULE,
++ },
+ };
+
+-void __init fib4_rules_init(void)
++
++void fib4_rules_init(struct net *net)
+ {
+- list_add_tail(&local_rule.common.list, &fib4_rules);
+- list_add_tail(&main_rule.common.list, &fib4_rules);
+- list_add_tail(&default_rule.common.list, &fib4_rules);
++ struct fib4_rule_table *table;
++ table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL);
++ if (!table)
++ return;
++ INIT_LIST_HEAD(&table->fib4_rules);
++ list_add_tail(&table->local_rule.common.list, &table->fib4_rules);
++ list_add_tail(&table->main_rule.common.list, &table->fib4_rules);
++ list_add_tail(&table->default_rule.common.list, &table->fib4_rules);
++ table->fib4_rules_ops.rules_list = &table->fib4_rules;
++ if (fib_rules_register(net, &table->fib4_rules_ops)) {
++ kfree(table);
++ return;
++ }
++ net->fib4_table = table;
++}
+
+- fib_rules_register(&fib4_rules_ops);
++void fib4_rules_exit(struct net *net)
++{
++ struct fib4_rule_table *table = net->fib4_table;
++ if (table)
++ fib_rules_unregister(net, &table->fib4_rules_ops);
++ kfree(table);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_semantics.c linux-2.6.22-591/net/ipv4/fib_semantics.c
+--- linux-2.6.22-570/net/ipv4/fib_semantics.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/fib_semantics.c 2007-12-21 15:36:15.000000000 -0500
+@@ -42,7 +42,6 @@
+ #include <net/tcp.h>
+ #include <net/sock.h>
+ #include <net/ip_fib.h>
+-#include <net/ip_mp_alg.h>
+ #include <net/netlink.h>
+ #include <net/nexthop.h>
+
+@@ -51,14 +50,9 @@
+ #define FSprintk(a...)
+
+ static DEFINE_SPINLOCK(fib_info_lock);
+-static struct hlist_head *fib_info_hash;
+-static struct hlist_head *fib_info_laddrhash;
+-static unsigned int fib_hash_size;
+-static unsigned int fib_info_cnt;
+
+ #define DEVINDEX_HASHBITS 8
+ #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
+-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+@@ -154,7 +148,8 @@
+ dev_put(nh->nh_dev);
+ nh->nh_dev = NULL;
+ } endfor_nexthops(fi);
+- fib_info_cnt--;
++ fi->fib_net->fib_info_cnt--;
++ release_net(fi->fib_net);
+ kfree(fi);
+ }
+
+@@ -197,9 +192,9 @@
+ return 0;
+ }
+
+-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
++static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi)
+ {
+- unsigned int mask = (fib_hash_size - 1);
++ unsigned int mask = net->fib_info_hash_size - 1;
+ unsigned int val = fi->fib_nhs;
+
+ val ^= fi->fib_protocol;
+@@ -209,15 +204,15 @@
+ return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ }
+
+-static struct fib_info *fib_find_info(const struct fib_info *nfi)
++static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi)
+ {
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct fib_info *fi;
+ unsigned int hash;
+
+- hash = fib_info_hashfn(nfi);
+- head = &fib_info_hash[hash];
++ hash = fib_info_hashfn(net, nfi);
++ head = &net->fib_info_hash[hash];
+
+ hlist_for_each_entry(fi, node, head, fib_hash) {
+ if (fi->fib_nhs != nfi->fib_nhs)
+@@ -250,6 +245,7 @@
+
+ int ip_fib_check_default(__be32 gw, struct net_device *dev)
+ {
++ struct net *net = dev->nd_net;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct fib_nh *nh;
+@@ -258,7 +254,7 @@
+ spin_lock(&fib_info_lock);
+
+ hash = fib_devindex_hashfn(dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = &net->fib_info_devhash[hash];
+ hlist_for_each_entry(nh, node, head, nh_hash) {
+ if (nh->nh_dev == dev &&
+ nh->nh_gw == gw &&
+@@ -321,11 +317,11 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
++ err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE,
+ info->nlh, GFP_KERNEL);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
++ rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err);
+ }
+
+ /* Return the first fib alias matching TOS with
+@@ -518,6 +514,7 @@
+ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+ struct fib_nh *nh)
+ {
++ struct net *net = cfg->fc_nlinfo.net;
+ int err;
+
+ if (nh->nh_gw) {
+@@ -532,9 +529,9 @@
+
+ if (cfg->fc_scope >= RT_SCOPE_LINK)
+ return -EINVAL;
+- if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
++ if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
+ return -EINVAL;
+- if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
++ if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
+ return -ENODEV;
+ if (!(dev->flags&IFF_UP))
+ return -ENETDOWN;
+@@ -545,6 +542,7 @@
+ }
+ {
+ struct flowi fl = {
++ .fl_net = net,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = nh->nh_gw,
+@@ -581,7 +579,7 @@
+ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ return -EINVAL;
+
+- in_dev = inetdev_by_index(nh->nh_oif);
++ in_dev = inetdev_by_index(net, nh->nh_oif);
+ if (in_dev == NULL)
+ return -ENODEV;
+ if (!(in_dev->dev->flags&IFF_UP)) {
+@@ -596,9 +594,9 @@
+ return 0;
+ }
+
+-static inline unsigned int fib_laddr_hashfn(__be32 val)
++static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val)
+ {
+- unsigned int mask = (fib_hash_size - 1);
++ unsigned int mask = net->fib_info_hash_size - 1;
+
+ return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
+ }
+@@ -623,21 +621,22 @@
+ free_pages((unsigned long) hash, get_order(bytes));
+ }
+
+-static void fib_hash_move(struct hlist_head *new_info_hash,
++static void fib_hash_move(struct net *net,
++ struct hlist_head *new_info_hash,
+ struct hlist_head *new_laddrhash,
+ unsigned int new_size)
+ {
+ struct hlist_head *old_info_hash, *old_laddrhash;
+- unsigned int old_size = fib_hash_size;
++ unsigned int old_size = net->fib_info_hash_size;
+ unsigned int i, bytes;
+
+ spin_lock_bh(&fib_info_lock);
+- old_info_hash = fib_info_hash;
+- old_laddrhash = fib_info_laddrhash;
+- fib_hash_size = new_size;
++ old_info_hash = net->fib_info_hash;
++ old_laddrhash = net->fib_info_laddrhash;
++ net->fib_info_hash_size = new_size;
+
+ for (i = 0; i < old_size; i++) {
+- struct hlist_head *head = &fib_info_hash[i];
++ struct hlist_head *head = &net->fib_info_hash[i];
+ struct hlist_node *node, *n;
+ struct fib_info *fi;
+
+@@ -647,15 +646,15 @@
+
+ hlist_del(&fi->fib_hash);
+
+- new_hash = fib_info_hashfn(fi);
++ new_hash = fib_info_hashfn(net, fi);
+ dest = &new_info_hash[new_hash];
+ hlist_add_head(&fi->fib_hash, dest);
+ }
+ }
+- fib_info_hash = new_info_hash;
++ net->fib_info_hash = new_info_hash;
+
+ for (i = 0; i < old_size; i++) {
+- struct hlist_head *lhead = &fib_info_laddrhash[i];
++ struct hlist_head *lhead = &net->fib_info_laddrhash[i];
+ struct hlist_node *node, *n;
+ struct fib_info *fi;
+
+@@ -665,12 +664,12 @@
+
+ hlist_del(&fi->fib_lhash);
+
+- new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
++ new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
+ ldest = &new_laddrhash[new_hash];
+ hlist_add_head(&fi->fib_lhash, ldest);
+ }
+ }
+- fib_info_laddrhash = new_laddrhash;
++ net->fib_info_laddrhash = new_laddrhash;
+
+ spin_unlock_bh(&fib_info_lock);
+
+@@ -681,6 +680,7 @@
+
+ struct fib_info *fib_create_info(struct fib_config *cfg)
+ {
++ struct net *net = cfg->fc_nlinfo.net;
+ int err;
+ struct fib_info *fi = NULL;
+ struct fib_info *ofi;
+@@ -697,17 +697,10 @@
+ goto err_inval;
+ }
+ #endif
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- if (cfg->fc_mp_alg) {
+- if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+- cfg->fc_mp_alg > IP_MP_ALG_MAX)
+- goto err_inval;
+- }
+-#endif
+
+ err = -ENOBUFS;
+- if (fib_info_cnt >= fib_hash_size) {
+- unsigned int new_size = fib_hash_size << 1;
++ if (net->fib_info_cnt >= net->fib_info_hash_size) {
++ unsigned int new_size = net->fib_info_hash_size << 1;
+ struct hlist_head *new_info_hash;
+ struct hlist_head *new_laddrhash;
+ unsigned int bytes;
+@@ -724,18 +717,19 @@
+ memset(new_info_hash, 0, bytes);
+ memset(new_laddrhash, 0, bytes);
+
+- fib_hash_move(new_info_hash, new_laddrhash, new_size);
++ fib_hash_move(net, new_info_hash, new_laddrhash, new_size);
+ }
+
+- if (!fib_hash_size)
++ if (!net->fib_info_hash_size)
+ goto failure;
+ }
+
+ fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ if (fi == NULL)
+ goto failure;
+- fib_info_cnt++;
++ net->fib_info_cnt++;
+
++ fi->fib_net = hold_net(net);
+ fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_flags = cfg->fc_flags;
+ fi->fib_priority = cfg->fc_priority;
+@@ -791,10 +785,6 @@
+ #endif
+ }
+
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- fi->fib_mp_alg = cfg->fc_mp_alg;
+-#endif
+-
+ if (fib_props[cfg->fc_type].error) {
+ if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+ goto err_inval;
+@@ -811,7 +801,7 @@
+ if (nhs != 1 || nh->nh_gw)
+ goto err_inval;
+ nh->nh_scope = RT_SCOPE_NOWHERE;
+- nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
++ nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
+ err = -ENODEV;
+ if (nh->nh_dev == NULL)
+ goto failure;
+@@ -825,12 +815,12 @@
+ if (fi->fib_prefsrc) {
+ if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ fi->fib_prefsrc != cfg->fc_dst)
+- if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
++ if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
+ goto err_inval;
+ }
+
+ link_it:
+- if ((ofi = fib_find_info(fi)) != NULL) {
++ if ((ofi = fib_find_info(net, fi)) != NULL) {
+ fi->fib_dead = 1;
+ free_fib_info(fi);
+ ofi->fib_treeref++;
+@@ -841,11 +831,13 @@
+ atomic_inc(&fi->fib_clntref);
+ spin_lock_bh(&fib_info_lock);
+ hlist_add_head(&fi->fib_hash,
+- &fib_info_hash[fib_info_hashfn(fi)]);
++ &net->fib_info_hash[fib_info_hashfn(net, fi)]);
+ if (fi->fib_prefsrc) {
+ struct hlist_head *head;
++ unsigned int hash;
+
+- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
++ hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
++ head = &net->fib_info_laddrhash[hash];
+ hlist_add_head(&fi->fib_lhash, head);
+ }
+ change_nexthops(fi) {
+@@ -855,7 +847,7 @@
+ if (!nh->nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = &net->fib_info_devhash[hash];
+ hlist_add_head(&nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ spin_unlock_bh(&fib_info_lock);
+@@ -940,10 +932,6 @@
+ res->type = fa->fa_type;
+ res->scope = fa->fa_scope;
+ res->fi = fa->fa_info;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- res->netmask = mask;
+- res->network = zone & inet_make_mask(prefixlen);
+-#endif
+ atomic_inc(&res->fi->fib_clntref);
+ return 0;
+ }
+@@ -1046,7 +1034,7 @@
+ - device went down -> we must shutdown all nexthops going via it.
+ */
+
+-int fib_sync_down(__be32 local, struct net_device *dev, int force)
++int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force)
+ {
+ int ret = 0;
+ int scope = RT_SCOPE_NOWHERE;
+@@ -1054,9 +1042,9 @@
+ if (force)
+ scope = -1;
+
+- if (local && fib_info_laddrhash) {
+- unsigned int hash = fib_laddr_hashfn(local);
+- struct hlist_head *head = &fib_info_laddrhash[hash];
++ if (local && net->fib_info_laddrhash) {
++ unsigned int hash = fib_laddr_hashfn(net, local);
++ struct hlist_head *head = &net->fib_info_laddrhash[hash];
+ struct hlist_node *node;
+ struct fib_info *fi;
+
+@@ -1071,7 +1059,7 @@
+ if (dev) {
+ struct fib_info *prev_fi = NULL;
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+- struct hlist_head *head = &fib_info_devhash[hash];
++ struct hlist_head *head = &net->fib_info_devhash[hash];
+ struct hlist_node *node;
+ struct fib_nh *nh;
+
+@@ -1124,6 +1112,7 @@
+
+ int fib_sync_up(struct net_device *dev)
+ {
++ struct net *net = dev->nd_net;
+ struct fib_info *prev_fi;
+ unsigned int hash;
+ struct hlist_head *head;
+@@ -1136,7 +1125,7 @@
+
+ prev_fi = NULL;
+ hash = fib_devindex_hashfn(dev->ifindex);
+- head = &fib_info_devhash[hash];
++ head = &net->fib_info_devhash[hash];
+ ret = 0;
+
+ hlist_for_each_entry(nh, node, head, nh_hash) {
+@@ -1226,3 +1215,17 @@
+ spin_unlock_bh(&fib_multipath_lock);
+ }
+ #endif
++
++int fib_info_init(struct net *net)
++{
++ net->fib_info_devhash = kzalloc(
++ sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL);
++ if (!net->fib_info_devhash)
++ return -ENOMEM;
++ return 0;
++}
++
++void fib_info_exit(struct net *net)
++{
++ kfree(net->fib_info_devhash);
++}
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_trie.c linux-2.6.22-591/net/ipv4/fib_trie.c
+--- linux-2.6.22-570/net/ipv4/fib_trie.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/fib_trie.c 2007-12-21 15:36:15.000000000 -0500
+@@ -78,6 +78,7 @@
+ #include <net/route.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/ip_fib.h>
+ #include "fib_lookup.h"
+
+@@ -172,7 +173,6 @@
+ static void tnode_free(struct tnode *tn);
+
+ static struct kmem_cache *fn_alias_kmem __read_mostly;
+-static struct trie *trie_local = NULL, *trie_main = NULL;
+
+
+ /* rcu_read_lock needs to be hold by caller from readside */
+@@ -290,11 +290,10 @@
+ WARN_ON(tn && tn->pos+tn->bits > 32);
+ }
+
+-static int halve_threshold = 25;
+-static int inflate_threshold = 50;
+-static int halve_threshold_root = 8;
+-static int inflate_threshold_root = 15;
+-
++static const int halve_threshold = 25;
++static const int inflate_threshold = 50;
++static const int halve_threshold_root = 15;
++static const int inflate_threshold_root = 25;
+
+ static void __alias_free_mem(struct rcu_head *head)
+ {
+@@ -1771,11 +1770,10 @@
+ return found;
+ }
+
+-static int trie_last_dflt = -1;
+-
+ static void
+ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
++ struct net *net = flp->fl_net;
+ struct trie *t = (struct trie *) tb->tb_data;
+ int order, last_idx;
+ struct fib_info *fi = NULL;
+@@ -1819,28 +1817,28 @@
+ if (next_fi != res->fi)
+ break;
+ } else if (!fib_detect_death(fi, order, &last_resort,
+- &last_idx, &trie_last_dflt)) {
++ &last_idx, &net->trie_last_dflt)) {
+ if (res->fi)
+ fib_info_put(res->fi);
+ res->fi = fi;
+ atomic_inc(&fi->fib_clntref);
+- trie_last_dflt = order;
++ net->trie_last_dflt = order;
+ goto out;
+ }
+ fi = next_fi;
+ order++;
+ }
+ if (order <= 0 || fi == NULL) {
+- trie_last_dflt = -1;
++ net->trie_last_dflt = -1;
+ goto out;
+ }
+
+- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
++ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) {
+ if (res->fi)
+ fib_info_put(res->fi);
+ res->fi = fi;
+ atomic_inc(&fi->fib_clntref);
+- trie_last_dflt = order;
++ net->trie_last_dflt = order;
+ goto out;
+ }
+ if (last_idx >= 0) {
+@@ -1850,7 +1848,7 @@
+ if (last_resort)
+ atomic_inc(&last_resort->fib_clntref);
+ }
+- trie_last_dflt = last_idx;
++ net->trie_last_dflt = last_idx;
+ out:;
+ rcu_read_unlock();
+ }
+@@ -1957,11 +1955,15 @@
+
+ /* Fix more generic FIB names for init later */
+
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++void fib_hash_exit(struct fib_table *tb)
++{
++ if (!tb)
++ return;
++ fn_trie_flush(tb);
++ kfree(tb);
++}
++
+ struct fib_table * fib_hash_init(u32 id)
+-#else
+-struct fib_table * __init fib_hash_init(u32 id)
+-#endif
+ {
+ struct fib_table *tb;
+ struct trie *t;
+@@ -1991,11 +1993,6 @@
+ trie_init(t);
+
+ if (id == RT_TABLE_LOCAL)
+- trie_local = t;
+- else if (id == RT_TABLE_MAIN)
+- trie_main = t;
+-
+- if (id == RT_TABLE_LOCAL)
+ printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
+
+ return tb;
+@@ -2004,6 +2001,8 @@
+ #ifdef CONFIG_PROC_FS
+ /* Depth first Trie walk iterator */
+ struct fib_trie_iter {
++ struct net *net;
++ struct trie *trie_local, *trie_main;
+ struct tnode *tnode;
+ struct trie *trie;
+ unsigned index;
+@@ -2170,7 +2169,21 @@
+
+ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
+ {
++ struct net *net = seq->private;
++ struct trie *trie_local, *trie_main;
+ struct trie_stat *stat;
++ struct fib_table *tb;
++
++ trie_local = NULL;
++ tb = fib_get_table(net, RT_TABLE_LOCAL);
++ if (tb)
++ trie_local = (struct trie *) tb->tb_data;
++
++ trie_main = NULL;
++ tb = fib_get_table(net, RT_TABLE_MAIN);
++ if (tb)
++ trie_main = (struct trie *) tb->tb_data;
++
+
+ stat = kmalloc(sizeof(*stat), GFP_KERNEL);
+ if (!stat)
+@@ -2197,7 +2210,15 @@
+
+ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
+ {
+- return single_open(file, fib_triestat_seq_show, NULL);
++ return single_open(file, fib_triestat_seq_show,
++ get_net(PROC_NET(inode)));
++}
++
++static int fib_triestat_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ put_net(seq->private);
++ return single_release(inode, file);
+ }
+
+ static const struct file_operations fib_triestat_fops = {
+@@ -2205,7 +2226,7 @@
+ .open = fib_triestat_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = single_release,
++ .release = fib_triestat_seq_release,
+ };
+
+ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
+@@ -2214,13 +2235,13 @@
+ loff_t idx = 0;
+ struct node *n;
+
+- for (n = fib_trie_get_first(iter, trie_local);
++ for (n = fib_trie_get_first(iter, iter->trie_local);
+ n; ++idx, n = fib_trie_get_next(iter)) {
+ if (pos == idx)
+ return n;
+ }
+
+- for (n = fib_trie_get_first(iter, trie_main);
++ for (n = fib_trie_get_first(iter, iter->trie_main);
+ n; ++idx, n = fib_trie_get_next(iter)) {
+ if (pos == idx)
+ return n;
+@@ -2230,10 +2251,23 @@
+
+ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++ struct fib_trie_iter *iter = seq->private;
++ struct fib_table *tb;
++
++ if (!iter->trie_local) {
++ tb = fib_get_table(iter->net, RT_TABLE_LOCAL);
++ if (tb)
++ iter->trie_local = (struct trie *) tb->tb_data;
++ }
++ if (!iter->trie_main) {
++ tb = fib_get_table(iter->net, RT_TABLE_MAIN);
++ if (tb)
++ iter->trie_main = (struct trie *) tb->tb_data;
++ }
+ rcu_read_lock();
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+- return fib_trie_get_idx(seq->private, *pos - 1);
++ return fib_trie_get_idx(iter, *pos - 1);
+ }
+
+ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+@@ -2251,8 +2285,8 @@
+ return v;
+
+ /* continue scan in next trie */
+- if (iter->trie == trie_local)
+- return fib_trie_get_first(iter, trie_main);
++ if (iter->trie == iter->trie_local)
++ return fib_trie_get_first(iter, iter->trie_main);
+
+ return NULL;
+ }
+@@ -2318,7 +2352,7 @@
+ return 0;
+
+ if (!NODE_PARENT(n)) {
+- if (iter->trie == trie_local)
++ if (iter->trie == iter->trie_local)
+ seq_puts(seq, "<local>:\n");
+ else
+ seq_puts(seq, "<main>:\n");
+@@ -2384,6 +2418,7 @@
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -2391,12 +2426,20 @@
+ goto out;
+ }
+
++static int fib_trie_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct fib_trie_iter *iter = seq->private;
++ put_net(iter->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations fib_trie_fops = {
+ .owner = THIS_MODULE,
+ .open = fib_trie_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = fib_trie_seq_release,
+ };
+
+ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+@@ -2434,7 +2477,7 @@
+ return 0;
+ }
+
+- if (iter->trie == trie_local)
++ if (iter->trie == iter->trie_local)
+ return 0;
+ if (IS_TNODE(l))
+ return 0;
+@@ -2505,6 +2548,7 @@
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -2517,35 +2561,37 @@
+ .open = fib_route_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = fib_trie_seq_release,
+ };
+
+-int __init fib_proc_init(void)
++int fib_proc_init(struct net *net)
+ {
+- if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
++ net->trie_last_dflt = -1;
++
++ if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
+ goto out1;
+
+- if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
++ if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops))
+ goto out2;
+
+- if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
++ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
+ goto out3;
+
+ return 0;
+
+ out3:
+- proc_net_remove("fib_triestat");
++ proc_net_remove(net, "fib_triestat");
+ out2:
+- proc_net_remove("fib_trie");
++ proc_net_remove(net, "fib_trie");
+ out1:
+ return -ENOMEM;
+ }
+
+-void __init fib_proc_exit(void)
++void fib_proc_exit(struct net *net)
+ {
+- proc_net_remove("fib_trie");
+- proc_net_remove("fib_triestat");
+- proc_net_remove("route");
++ proc_net_remove(net, "fib_trie");
++ proc_net_remove(net, "fib_triestat");
++ proc_net_remove(net, "route");
+ }
+
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/ipv4/icmp.c linux-2.6.22-591/net/ipv4/icmp.c
+--- linux-2.6.22-570/net/ipv4/icmp.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/icmp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -229,14 +229,13 @@
+ *
+ * On SMP we have one ICMP socket per-cpu.
+ */
+-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
+-#define icmp_socket __get_cpu_var(__icmp_socket)
++#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id()))
+
+-static __inline__ int icmp_xmit_lock(void)
++static __inline__ int icmp_xmit_lock(struct net *net)
+ {
+ local_bh_disable();
+
+- if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
++ if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) {
+ /* This can happen if the output path signals a
+ * dst_link_failure() for an outgoing ICMP packet.
+ */
+@@ -246,9 +245,9 @@
+ return 0;
+ }
+
+-static void icmp_xmit_unlock(void)
++static void icmp_xmit_unlock(struct net *net)
+ {
+- spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
++ spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock);
+ }
+
+ /*
+@@ -347,19 +346,20 @@
+ static void icmp_push_reply(struct icmp_bxm *icmp_param,
+ struct ipcm_cookie *ipc, struct rtable *rt)
+ {
++ struct net *net = icmp_param->skb->dev->nd_net;
+ struct sk_buff *skb;
+
+- if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
++ if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param,
+ icmp_param->data_len+icmp_param->head_len,
+ icmp_param->head_len,
+ ipc, rt, MSG_DONTWAIT) < 0)
+- ip_flush_pending_frames(icmp_socket->sk);
+- else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
++ ip_flush_pending_frames(icmp_socket(net)->sk);
++ else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) {
+ struct icmphdr *icmph = icmp_hdr(skb);
+ __wsum csum = 0;
+ struct sk_buff *skb1;
+
+- skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
++ skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) {
+ csum = csum_add(csum, skb1->csum);
+ }
+ csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
+@@ -367,7 +367,7 @@
+ icmp_param->head_len, csum);
+ icmph->checksum = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+- ip_push_pending_frames(icmp_socket->sk);
++ ip_push_pending_frames(icmp_socket(net)->sk);
+ }
+ }
+
+@@ -377,7 +377,8 @@
+
+ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
+ {
+- struct sock *sk = icmp_socket->sk;
++ struct net *net = icmp_param->skb->dev->nd_net;
++ struct sock *sk = icmp_socket(net)->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipcm_cookie ipc;
+ struct rtable *rt = (struct rtable *)skb->dst;
+@@ -386,7 +387,7 @@
+ if (ip_options_echo(&icmp_param->replyopts, skb))
+ return;
+
+- if (icmp_xmit_lock())
++ if (icmp_xmit_lock(net))
+ return;
+
+ icmp_param->data.icmph.checksum = 0;
+@@ -401,7 +402,8 @@
+ daddr = icmp_param->replyopts.faddr;
+ }
+ {
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = net,
++ .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = rt->rt_spec_dst,
+ .tos = RT_TOS(ip_hdr(skb)->tos) } },
+@@ -415,7 +417,7 @@
+ icmp_push_reply(icmp_param, &ipc, rt);
+ ip_rt_put(rt);
+ out_unlock:
+- icmp_xmit_unlock();
++ icmp_xmit_unlock(net);
+ }
+
+
+@@ -436,6 +438,7 @@
+ int room;
+ struct icmp_bxm icmp_param;
+ struct rtable *rt = (struct rtable *)skb_in->dst;
++ struct net *net;
+ struct ipcm_cookie ipc;
+ __be32 saddr;
+ u8 tos;
+@@ -443,6 +446,7 @@
+ if (!rt)
+ goto out;
+
++ net = rt->fl.fl_net;
+ /*
+ * Find the original header. It is expected to be valid, of course.
+ * Check this, icmp_send is called from the most obscure devices
+@@ -505,7 +509,7 @@
+ }
+ }
+
+- if (icmp_xmit_lock())
++ if (icmp_xmit_lock(net))
+ return;
+
+ /*
+@@ -517,7 +521,7 @@
+ struct net_device *dev = NULL;
+
+ if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
+- dev = dev_get_by_index(rt->fl.iif);
++ dev = dev_get_by_index(&init_net, rt->fl.iif);
+
+ if (dev) {
+ saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+@@ -545,12 +549,13 @@
+ icmp_param.skb = skb_in;
+ icmp_param.offset = skb_network_offset(skb_in);
+ icmp_out_count(icmp_param.data.icmph.type);
+- inet_sk(icmp_socket->sk)->tos = tos;
++ inet_sk(icmp_socket(net)->sk)->tos = tos;
+ ipc.addr = iph->saddr;
+ ipc.opt = &icmp_param.replyopts;
+
+ {
+ struct flowi fl = {
++ .fl_net = net,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = icmp_param.replyopts.srr ?
+@@ -593,7 +598,7 @@
+ ende:
+ ip_rt_put(rt);
+ out_unlock:
+- icmp_xmit_unlock();
++ icmp_xmit_unlock(net);
+ out:;
+ }
+
+@@ -604,6 +609,7 @@
+
+ static void icmp_unreach(struct sk_buff *skb)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct iphdr *iph;
+ struct icmphdr *icmph;
+ int hash, protocol;
+@@ -634,7 +640,7 @@
+ case ICMP_PORT_UNREACH:
+ break;
+ case ICMP_FRAG_NEEDED:
+- if (ipv4_config.no_pmtu_disc) {
++ if (net->sysctl_ipv4_no_pmtu_disc) {
+ LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
+ "fragmentation needed "
+ "and DF set.\n",
+@@ -678,7 +684,7 @@
+ */
+
+ if (!sysctl_icmp_ignore_bogus_error_responses &&
+- inet_addr_type(iph->daddr) == RTN_BROADCAST) {
++ inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
+ "type %u, code %u "
+@@ -707,7 +713,7 @@
+ hash = protocol & (MAX_INET_PROTOS - 1);
+ read_lock(&raw_v4_lock);
+ if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
+- while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
++ while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr,
+ iph->saddr,
+ skb->dev->ifindex, skb->skb_tag)) != NULL) {
+ raw_err(raw_sk, skb, info);
+@@ -1179,29 +1185,54 @@
+ },
+ };
+
+-void __init icmp_init(struct net_proto_family *ops)
++static void icmp_net_exit(struct net *net)
+ {
+- struct inet_sock *inet;
++ struct socket **sock;
+ int i;
+
+ for_each_possible_cpu(i) {
++ sock = percpu_ptr(net->__icmp_socket, i);
++ if (!*sock)
++ continue;
++ /* At the last minute lie and say this is a socket for
++ * the initial network namespace. So the socket will
++ * be safe to free.
++ */
++ (*sock)->sk->sk_net = get_net(&init_net);
++ sock_release(*sock);
++ *sock = NULL;
++ }
++ percpu_free(net->__icmp_socket);
++}
++
++static int icmp_net_init(struct net *net)
++{
++ struct socket **sock;
++ struct inet_sock *inet;
+ int err;
++ int i;
++
++ net->__icmp_socket = alloc_percpu(struct socket *);
++ if (!net->__icmp_socket)
++ return -ENOMEM;
++
++ for_each_possible_cpu(i) {
+
+- err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
+- &per_cpu(__icmp_socket, i));
++ sock = percpu_ptr(net->__icmp_socket, i);
+
++ err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock);
+ if (err < 0)
+- panic("Failed to create the ICMP control socket.\n");
++ goto fail;
+
+- per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
++ (*sock)->sk->sk_allocation = GFP_ATOMIC;
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff struct overhead.
+ */
+- per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
++ (*sock)->sk->sk_sndbuf =
+ (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+
+- inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
++ inet = inet_sk((*sock)->sk);
+ inet->uc_ttl = -1;
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+
+@@ -1209,8 +1240,27 @@
+ * see it, we do not wish this socket to see incoming
+ * packets.
+ */
+- per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
++ (*sock)->sk->sk_prot->unhash((*sock)->sk);
++
++ /* Don't hold an extra reference on the namespace */
++ put_net((*sock)->sk->sk_net);
+ }
++ return 0;
++fail:
++ icmp_net_exit(net);
++ return err;
++
++}
++
++static struct pernet_operations icmp_net_ops = {
++ .init = icmp_net_init,
++ .exit = icmp_net_exit,
++};
++
++void __init icmp_init(struct net_proto_family *ops)
++{
++ if (register_pernet_subsys(&icmp_net_ops))
++ panic("Failed to create the ICMP control socket.\n");
+ }
+
+ EXPORT_SYMBOL(icmp_err_convert);
+diff -Nurb linux-2.6.22-570/net/ipv4/igmp.c linux-2.6.22-591/net/ipv4/igmp.c
+--- linux-2.6.22-570/net/ipv4/igmp.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/igmp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -97,6 +97,7 @@
+ #include <net/route.h>
+ #include <net/sock.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ #include <linux/netfilter_ipv4.h>
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+@@ -129,12 +130,12 @@
+ */
+
+ #define IGMP_V1_SEEN(in_dev) \
+- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
+ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
+ ((in_dev)->mr_v1_seen && \
+ time_before(jiffies, (in_dev)->mr_v1_seen)))
+ #define IGMP_V2_SEEN(in_dev) \
+- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
+ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
+ ((in_dev)->mr_v2_seen && \
+ time_before(jiffies, (in_dev)->mr_v2_seen)))
+@@ -296,7 +297,8 @@
+ return NULL;
+
+ {
+- struct flowi fl = { .oif = dev->ifindex,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = dev->ifindex,
+ .nl_u = { .ip4_u = {
+ .daddr = IGMPV3_ALL_MCR } },
+ .proto = IPPROTO_IGMP };
+@@ -646,7 +648,8 @@
+ dst = group;
+
+ {
+- struct flowi fl = { .oif = dev->ifindex,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = dev->ifindex,
+ .nl_u = { .ip4_u = { .daddr = dst } },
+ .proto = IPPROTO_IGMP };
+ if (ip_route_output_key(&rt, &fl))
+@@ -929,6 +932,11 @@
+ struct in_device *in_dev = in_dev_get(skb->dev);
+ int len = skb->len;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ if (in_dev==NULL) {
+ kfree_skb(skb);
+ return 0;
+@@ -1393,20 +1401,22 @@
+
+ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+ {
+- struct flowi fl = { .nl_u = { .ip4_u =
+- { .daddr = imr->imr_multiaddr.s_addr } } };
++ struct flowi fl = {
++ .fl_net = &init_net,
++ .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } }
++ };
+ struct rtable *rt;
+ struct net_device *dev = NULL;
+ struct in_device *idev = NULL;
+
+ if (imr->imr_ifindex) {
+- idev = inetdev_by_index(imr->imr_ifindex);
++ idev = inetdev_by_index(&init_net, imr->imr_ifindex);
+ if (idev)
+ __in_dev_put(idev);
+ return idev;
+ }
+ if (imr->imr_address.s_addr) {
+- dev = ip_dev_find(imr->imr_address.s_addr);
++ dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
+ if (!dev)
+ return NULL;
+ dev_put(dev);
+@@ -2234,7 +2244,7 @@
+ struct in_device *in_dev;
+ inet->mc_list = iml->next;
+
+- in_dev = inetdev_by_index(iml->multi.imr_ifindex);
++ in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
+ (void) ip_mc_leave_src(sk, iml, in_dev);
+ if (in_dev != NULL) {
+ ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
+@@ -2291,7 +2301,7 @@
+ struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+
+ state->in_dev = NULL;
+- for_each_netdev(state->dev) {
++ for_each_netdev(&init_net, state->dev) {
+ struct in_device *in_dev;
+ in_dev = in_dev_get(state->dev);
+ if (!in_dev)
+@@ -2453,7 +2463,7 @@
+
+ state->idev = NULL;
+ state->im = NULL;
+- for_each_netdev(state->dev) {
++ for_each_netdev(&init_net, state->dev) {
+ struct in_device *idev;
+ idev = in_dev_get(state->dev);
+ if (unlikely(idev == NULL))
+@@ -2613,8 +2623,8 @@
+
+ int __init igmp_mc_proc_init(void)
+ {
+- proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
+- proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
++ proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
++ proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+ return 0;
+ }
+ #endif
+diff -Nurb linux-2.6.22-570/net/ipv4/inet_connection_sock.c linux-2.6.22-591/net/ipv4/inet_connection_sock.c
+--- linux-2.6.22-570/net/ipv4/inet_connection_sock.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/inet_connection_sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -32,7 +32,7 @@
+ /*
+ * This array holds the first and last local port number.
+ */
+-int sysctl_local_port_range[2] = { 32768, 61000 };
++//int sysctl_local_port_range[2] = { 32768, 61000 };
+
+ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+ {
+@@ -74,6 +74,7 @@
+
+ sk_for_each_bound(sk2, node, &tb->owners) {
+ if (sk != sk2 &&
++ (sk->sk_net == sk2->sk_net) &&
+ !inet_v6_ipv6only(sk2) &&
+ (!sk->sk_bound_dev_if ||
+ !sk2->sk_bound_dev_if ||
+@@ -98,6 +99,7 @@
+ int (*bind_conflict)(const struct sock *sk,
+ const struct inet_bind_bucket *tb))
+ {
++ struct net *net = sk->sk_net;
+ struct inet_bind_hashbucket *head;
+ struct hlist_node *node;
+ struct inet_bind_bucket *tb;
+@@ -105,16 +107,16 @@
+
+ local_bh_disable();
+ if (!snum) {
+- int low = sysctl_local_port_range[0];
+- int high = sysctl_local_port_range[1];
++ int low = sk->sk_net->sysctl_local_port_range[0];
++ int high = sk->sk_net->sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+ int rover = net_random() % (high - low) + low;
+
+ do {
+- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
++ head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)];
+ spin_lock(&head->lock);
+ inet_bind_bucket_for_each(tb, node, &head->chain)
+- if (tb->port == rover)
++ if ((tb->port == rover) && (tb->net == net))
+ goto next;
+ break;
+ next:
+@@ -138,10 +140,10 @@
+ */
+ snum = rover;
+ } else {
+- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
++ head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)];
+ spin_lock(&head->lock);
+ inet_bind_bucket_for_each(tb, node, &head->chain)
+- if (tb->port == snum)
++ if ((tb->port == snum) && (tb->net==net))
+ goto tb_found;
+ }
+ tb = NULL;
+@@ -161,7 +163,7 @@
+ }
+ tb_not_found:
+ ret = 1;
+- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
++ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL)
+ goto fail_unlock;
+ if (hlist_empty(&tb->owners)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -341,7 +343,8 @@
+ struct rtable *rt;
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct ip_options *opt = inet_rsk(req)->opt;
+- struct flowi fl = { .oif = sk->sk_bound_dev_if,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = sk->sk_bound_dev_if,
+ .nl_u = { .ip4_u =
+ { .daddr = ((opt && opt->srr) ?
+ opt->faddr :
+diff -Nurb linux-2.6.22-570/net/ipv4/inet_diag.c linux-2.6.22-591/net/ipv4/inet_diag.c
+--- linux-2.6.22-570/net/ipv4/inet_diag.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/inet_diag.c 2007-12-21 15:36:15.000000000 -0500
+@@ -227,6 +227,7 @@
+ static int inet_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ int err;
+ struct sock *sk;
+ struct inet_diag_req *req = NLMSG_DATA(nlh);
+@@ -242,7 +243,7 @@
+ /* TODO: lback */
+ sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
+ req->id.idiag_dport, req->id.idiag_src[0],
+- req->id.idiag_sport, req->id.idiag_if);
++ req->id.idiag_sport, req->id.idiag_if, net);
+ }
+ #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ else if (req->idiag_family == AF_INET6) {
+@@ -251,7 +252,7 @@
+ req->id.idiag_dport,
+ (struct in6_addr *)req->id.idiag_src,
+ req->id.idiag_sport,
+- req->id.idiag_if);
++ req->id.idiag_if, net);
+ }
+ #endif
+ else {
+@@ -906,8 +907,8 @@
+ if (!inet_diag_table)
+ goto out;
+
+- idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
+- NULL, THIS_MODULE);
++ idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
++ inet_diag_rcv, NULL, THIS_MODULE);
+ if (idiagnl == NULL)
+ goto out_free_table;
+ err = 0;
+diff -Nurb linux-2.6.22-570/net/ipv4/inet_hashtables.c linux-2.6.22-591/net/ipv4/inet_hashtables.c
+--- linux-2.6.22-570/net/ipv4/inet_hashtables.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/inet_hashtables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -29,11 +29,13 @@
+ */
+ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
+ struct inet_bind_hashbucket *head,
++ struct net *net,
+ const unsigned short snum)
+ {
+ struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+
+ if (tb != NULL) {
++ tb->net = net;
+ tb->port = snum;
+ tb->fastreuse = 0;
+ INIT_HLIST_HEAD(&tb->owners);
+@@ -66,7 +68,7 @@
+ */
+ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+ {
+- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
++ const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size);
+ struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+ struct inet_bind_bucket *tb;
+
+@@ -127,7 +129,7 @@
+ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+ const __be32 daddr,
+ const unsigned short hnum,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *result = NULL, *sk;
+ const struct hlist_node *node;
+@@ -149,6 +151,8 @@
+ continue;
+ score += 2;
+ }
++ if (sk->sk_net != net)
++ continue;
+ if (score == 5)
+ return sk;
+ if (score > hiscore) {
+@@ -163,22 +167,22 @@
+ /* Optimize the common listener case. */
+ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ const __be32 daddr, const unsigned short hnum,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *sk = NULL;
+ const struct hlist_head *head;
+
+ read_lock(&hashinfo->lhash_lock);
+- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
++ head = &hashinfo->listening_hash[net, inet_lhashfn(net, hnum)];
+ if (!hlist_empty(head)) {
+ const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+ if (inet->num == hnum && !sk->sk_node.next &&
+ v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
+ (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+- !sk->sk_bound_dev_if)
++ !sk->sk_bound_dev_if && (sk->sk_net == net))
+ goto sherry_cache;
+- sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
++ sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net );
+ }
+ if (sk) {
+ sherry_cache:
+@@ -196,12 +200,13 @@
+ {
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ struct inet_sock *inet = inet_sk(sk);
++ struct net *net = sk->sk_net;
+ __be32 daddr = inet->rcv_saddr;
+ __be32 saddr = inet->daddr;
+ int dif = sk->sk_bound_dev_if;
+ INET_ADDR_COOKIE(acookie, saddr, daddr)
+ const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
++ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ struct sock *sk2;
+ const struct hlist_node *node;
+@@ -214,7 +219,7 @@
+ sk_for_each(sk2, node, &head->twchain) {
+ tw = inet_twsk(sk2);
+
+- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
++ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) {
+ if (twsk_unique(sk, sk2, twp))
+ goto unique;
+ else
+@@ -225,7 +230,7 @@
+
+ /* And established part... */
+ sk_for_each(sk2, node, &head->chain) {
+- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net))
+ goto not_unique;
+ }
+
+@@ -271,6 +276,7 @@
+ int inet_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+ {
++ struct net *net = sk->sk_net;
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+@@ -278,8 +284,8 @@
+ int ret;
+
+ if (!snum) {
+- int low = sysctl_local_port_range[0];
+- int high = sysctl_local_port_range[1];
++ int low = sk->sk_net->sysctl_local_port_range[0];
++ int high = sk->sk_net->sysctl_local_port_range[1];
+ int range = high - low;
+ int i;
+ int port;
+@@ -291,7 +297,7 @@
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -299,7 +305,7 @@
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+- if (tb->port == port) {
++ if ((tb->port == port) && (tb->net == net)) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+@@ -311,7 +317,7 @@
+ }
+ }
+
+- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
++ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+@@ -346,7 +352,7 @@
+ goto out;
+ }
+
+- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+ if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+diff -Nurb linux-2.6.22-570/net/ipv4/inet_timewait_sock.c linux-2.6.22-591/net/ipv4/inet_timewait_sock.c
+--- linux-2.6.22-570/net/ipv4/inet_timewait_sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/inet_timewait_sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -31,7 +31,7 @@
+ write_unlock(&ehead->lock);
+
+ /* Disassociate with bind bucket. */
+- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
++ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)];
+ spin_lock(&bhead->lock);
+ tb = tw->tw_tb;
+ __hlist_del(&tw->tw_bind_node);
+@@ -65,7 +65,7 @@
+ Note, that any socket with inet->num != 0 MUST be bound in
+ binding cache, even if it is closed.
+ */
+- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
++ bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)];
+ spin_lock(&bhead->lock);
+ tw->tw_tb = icsk->icsk_bind_hash;
+ BUG_TRAP(icsk->icsk_bind_hash);
+diff -Nurb linux-2.6.22-570/net/ipv4/inetpeer.c linux-2.6.22-591/net/ipv4/inetpeer.c
+--- linux-2.6.22-570/net/ipv4/inetpeer.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/inetpeer.c 2007-12-21 15:36:15.000000000 -0500
+@@ -81,71 +81,94 @@
+ .avl_height = 0
+ };
+ #define peer_avl_empty (&peer_fake_node)
+-static struct inet_peer *peer_root = peer_avl_empty;
+ static DEFINE_RWLOCK(peer_pool_lock);
+ #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+
+-static int peer_total;
+-/* Exported for sysctl_net_ipv4. */
+-int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
+- * aggressively at this stage */
+-int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
+-int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
+-int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+-int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
+-
+-static struct inet_peer *inet_peer_unused_head;
+-static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+ static DEFINE_SPINLOCK(inet_peer_unused_lock);
+
+ static void peer_check_expire(unsigned long dummy);
+-static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
+
++static int inet_peers_net_init(struct net *net);
++static void inet_peers_net_exit(struct net *net);
++static struct pernet_operations inet_peers_net_ops = {
++ .init = inet_peers_net_init,
++ .exit = inet_peers_net_exit,
++};
+
+ /* Called from ip_output.c:ip_init */
+ void __init inet_initpeers(void)
+ {
++ peer_cachep = kmem_cache_create("inet_peer_cache",
++ sizeof(struct inet_peer),
++ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++ NULL, NULL);
++
++ register_pernet_subsys(&inet_peers_net_ops);
++}
++
++static int inet_peers_net_init(struct net *net)
++{
+ struct sysinfo si;
+
++ net->peer_root = peer_avl_empty;
++ net->inet_peer_unused_tailp = &net->inet_peer_unused_head;
++
++ net->inet_peer_threshold = 65536 + 128; /* start to throw entries more
++ * aggressively at this stage */
++ net->inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */
++ net->inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */
++ net->inet_peer_gc_mintime = 10 * HZ;
++ net->inet_peer_gc_maxtime = 120 * HZ;
++
+ /* Use the straight interface to information about memory. */
+ si_meminfo(&si);
++
+ /* The values below were suggested by Alexey Kuznetsov
+ * <kuznet@ms2.inr.ac.ru>. I don't have any opinion about the values
+ * myself. --SAW
+ */
+ if (si.totalram <= (32768*1024)/PAGE_SIZE)
+- inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
++ net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
+ if (si.totalram <= (16384*1024)/PAGE_SIZE)
+- inet_peer_threshold >>= 1; /* about 512KB */
++ net->inet_peer_threshold >>= 1; /* about 512KB */
+ if (si.totalram <= (8192*1024)/PAGE_SIZE)
+- inet_peer_threshold >>= 2; /* about 128KB */
++ net->inet_peer_threshold >>= 2; /* about 128KB */
+
+- peer_cachep = kmem_cache_create("inet_peer_cache",
+- sizeof(struct inet_peer),
+- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+- NULL, NULL);
+
++ init_timer(&net->peer_periodic_timer);
++ net->peer_periodic_timer.function = peer_check_expire;
+ /* All the timers, started at system startup tend
+ to synchronize. Perturb it a bit.
+ */
+- peer_periodic_timer.expires = jiffies
+- + net_random() % inet_peer_gc_maxtime
+- + inet_peer_gc_maxtime;
+- add_timer(&peer_periodic_timer);
++ net->peer_periodic_timer.expires = jiffies
++ + net_random() % net->inet_peer_gc_maxtime
++ + net->inet_peer_gc_maxtime;
++ /* Remember our namespace */
++ net->peer_periodic_timer.data = (unsigned long)net;
++ add_timer(&net->peer_periodic_timer);
++
++ return 0;
++}
++
++static void inet_peers_net_exit(struct net *net)
++{
++ del_timer(&net->peer_periodic_timer);
++ /* CHECKME do I need to do something to release all of the peers */
+ }
+
+ /* Called with or without local BH being disabled. */
+-static void unlink_from_unused(struct inet_peer *p)
++static void unlink_from_unused(struct net *net, struct inet_peer *p)
+ {
+ spin_lock_bh(&inet_peer_unused_lock);
+ if (p->unused_prevp != NULL) {
+ /* On unused list. */
+- *p->unused_prevp = p->unused_next;
+- if (p->unused_next != NULL)
+- p->unused_next->unused_prevp = p->unused_prevp;
++ *p->unused_prevp = p->u.unused_next;
++ if (p->u.unused_next != NULL)
++ p->u.unused_next->unused_prevp = p->unused_prevp;
+ else
+- inet_peer_unused_tailp = p->unused_prevp;
++ net->inet_peer_unused_tailp = p->unused_prevp;
+ p->unused_prevp = NULL; /* mark it as removed */
++ p->u.net = hold_net(net); /* Remember the net */
+ }
+ spin_unlock_bh(&inet_peer_unused_lock);
+ }
+@@ -160,9 +183,9 @@
+ struct inet_peer *u, **v; \
+ if (_stack) { \
+ stackptr = _stack; \
+- *stackptr++ = &peer_root; \
++ *stackptr++ = &net->peer_root; \
+ } \
+- for (u = peer_root; u != peer_avl_empty; ) { \
++ for (u = net->peer_root; u != peer_avl_empty; ) { \
+ if (_daddr == u->v4daddr) \
+ break; \
+ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
+@@ -279,7 +302,7 @@
+ } while(0)
+
+ /* May be called with local BH enabled. */
+-static void unlink_from_pool(struct inet_peer *p)
++static void unlink_from_pool(struct net *net, struct inet_peer *p)
+ {
+ int do_free;
+
+@@ -317,7 +340,7 @@
+ delp[1] = &t->avl_left; /* was &p->avl_left */
+ }
+ peer_avl_rebalance(stack, stackptr);
+- peer_total--;
++ net->peer_total--;
+ do_free = 1;
+ }
+ write_unlock_bh(&peer_pool_lock);
+@@ -335,13 +358,13 @@
+ }
+
+ /* May be called with local BH enabled. */
+-static int cleanup_once(unsigned long ttl)
++static int cleanup_once(struct net *net, unsigned long ttl)
+ {
+ struct inet_peer *p;
+
+ /* Remove the first entry from the list of unused nodes. */
+ spin_lock_bh(&inet_peer_unused_lock);
+- p = inet_peer_unused_head;
++ p = net->inet_peer_unused_head;
+ if (p != NULL) {
+ __u32 delta = (__u32)jiffies - p->dtime;
+ if (delta < ttl) {
+@@ -349,12 +372,13 @@
+ spin_unlock_bh(&inet_peer_unused_lock);
+ return -1;
+ }
+- inet_peer_unused_head = p->unused_next;
+- if (p->unused_next != NULL)
+- p->unused_next->unused_prevp = p->unused_prevp;
++ net->inet_peer_unused_head = p->u.unused_next;
++ if (p->u.unused_next != NULL)
++ p->u.unused_next->unused_prevp = p->unused_prevp;
+ else
+- inet_peer_unused_tailp = p->unused_prevp;
++ net->inet_peer_unused_tailp = p->unused_prevp;
+ p->unused_prevp = NULL; /* mark as not on the list */
++ p->u.net = hold_net(net);
+ /* Grab an extra reference to prevent node disappearing
+ * before unlink_from_pool() call. */
+ atomic_inc(&p->refcnt);
+@@ -367,12 +391,12 @@
+ * happen because of entry limits in route cache. */
+ return -1;
+
+- unlink_from_pool(p);
++ unlink_from_pool(net, p);
+ return 0;
+ }
+
+ /* Called with or without local BH being disabled. */
+-struct inet_peer *inet_getpeer(__be32 daddr, int create)
++struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create)
+ {
+ struct inet_peer *p, *n;
+ struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+@@ -387,7 +411,7 @@
+ if (p != peer_avl_empty) {
+ /* The existing node has been found. */
+ /* Remove the entry from unused list if it was there. */
+- unlink_from_unused(p);
++ unlink_from_unused(net, p);
+ return p;
+ }
+
+@@ -413,13 +437,13 @@
+ /* Link the node. */
+ link_to_pool(n);
+ n->unused_prevp = NULL; /* not on the list */
+- peer_total++;
++ n->u.net = hold_net(net); /* Remember the net */
++ net->peer_total++;
+ write_unlock_bh(&peer_pool_lock);
+
+- if (peer_total >= inet_peer_threshold)
++ if (net->peer_total >= net->inet_peer_threshold)
+ /* Remove one less-recently-used entry. */
+- cleanup_once(0);
+-
++ cleanup_once(net, 0);
+ return n;
+
+ out_free:
+@@ -427,25 +451,26 @@
+ atomic_inc(&p->refcnt);
+ write_unlock_bh(&peer_pool_lock);
+ /* Remove the entry from unused list if it was there. */
+- unlink_from_unused(p);
++ unlink_from_unused(net, p);
+ /* Free preallocated the preallocated node. */
+ kmem_cache_free(peer_cachep, n);
+ return p;
+ }
+
+ /* Called with local BH disabled. */
+-static void peer_check_expire(unsigned long dummy)
++static void peer_check_expire(unsigned long arg)
+ {
++ struct net *net = (void *)arg;
+ unsigned long now = jiffies;
+ int ttl;
+
+- if (peer_total >= inet_peer_threshold)
+- ttl = inet_peer_minttl;
++ if (net->peer_total >= net->inet_peer_threshold)
++ ttl = net->inet_peer_minttl;
+ else
+- ttl = inet_peer_maxttl
+- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
+- peer_total / inet_peer_threshold * HZ;
+- while (!cleanup_once(ttl)) {
++ ttl = net->inet_peer_maxttl
++ - (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ *
++ net->peer_total / net->inet_peer_threshold * HZ;
++ while (!cleanup_once(net, ttl)) {
+ if (jiffies != now)
+ break;
+ }
+@@ -453,25 +478,30 @@
+ /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
+ * interval depending on the total number of entries (more entries,
+ * less interval). */
+- if (peer_total >= inet_peer_threshold)
+- peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
++ if (net->peer_total >= net->inet_peer_threshold)
++ net->peer_periodic_timer.expires = jiffies
++ + net->inet_peer_gc_mintime;
+ else
+- peer_periodic_timer.expires = jiffies
+- + inet_peer_gc_maxtime
+- - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+- peer_total / inet_peer_threshold * HZ;
+- add_timer(&peer_periodic_timer);
++ net->peer_periodic_timer.expires = jiffies
++ + net->inet_peer_gc_maxtime
++ - (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ *
++ net->peer_total / net->inet_peer_threshold * HZ;
++ add_timer(&net->peer_periodic_timer);
+ }
+
+ void inet_putpeer(struct inet_peer *p)
+ {
+ spin_lock_bh(&inet_peer_unused_lock);
+ if (atomic_dec_and_test(&p->refcnt)) {
+- p->unused_prevp = inet_peer_unused_tailp;
+- p->unused_next = NULL;
+- *inet_peer_unused_tailp = p;
+- inet_peer_unused_tailp = &p->unused_next;
++ struct net *net = p->u.net;
++
++ p->unused_prevp = net->inet_peer_unused_tailp;
++ p->u.unused_next = NULL;
++ *net->inet_peer_unused_tailp = p;
++ net->inet_peer_unused_tailp = &p->u.unused_next;
+ p->dtime = (__u32)jiffies;
++
++ release_net(net);
+ }
+ spin_unlock_bh(&inet_peer_unused_lock);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_fragment.c linux-2.6.22-591/net/ipv4/ip_fragment.c
+--- linux-2.6.22-570/net/ipv4/ip_fragment.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ip_fragment.c 2007-12-21 15:36:15.000000000 -0500
+@@ -49,21 +49,6 @@
+ * as well. Or notify me, at least. --ANK
+ */
+
+-/* Fragment cache limits. We will commit 256K at one time. Should we
+- * cross that limit we will prune down to 192K. This should cope with
+- * even the most extreme cases without allowing an attacker to measurably
+- * harm machine performance.
+- */
+-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
+-
+-int sysctl_ipfrag_max_dist __read_mostly = 64;
+-
+-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
+- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
+- */
+-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
+-
+ struct ipfrag_skb_cb
+ {
+ struct inet_skb_parm h;
+@@ -96,6 +81,7 @@
+ int iif;
+ unsigned int rid;
+ struct inet_peer *peer;
++ struct net *net;
+ };
+
+ /* Hash table. */
+@@ -103,17 +89,13 @@
+ #define IPQ_HASHSZ 64
+
+ /* Per-bucket lock is easy to add now. */
+-static struct hlist_head ipq_hash[IPQ_HASHSZ];
+ static DEFINE_RWLOCK(ipfrag_lock);
+-static u32 ipfrag_hash_rnd;
+-static LIST_HEAD(ipq_lru_list);
+-int ip_frag_nqueues = 0;
+
+ static __inline__ void __ipq_unlink(struct ipq *qp)
+ {
+ hlist_del(&qp->list);
+ list_del(&qp->lru_list);
+- ip_frag_nqueues--;
++ qp->net->ip_frag_nqueues--;
+ }
+
+ static __inline__ void ipq_unlink(struct ipq *ipq)
+@@ -123,70 +105,71 @@
+ write_unlock(&ipfrag_lock);
+ }
+
+-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
++static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot)
+ {
+ return jhash_3words((__force u32)id << 16 | prot,
+ (__force u32)saddr, (__force u32)daddr,
+- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
++ net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+ }
+
+-static struct timer_list ipfrag_secret_timer;
+-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
+-
+-static void ipfrag_secret_rebuild(unsigned long dummy)
++static void ipfrag_secret_rebuild(unsigned long arg)
+ {
++ struct net *net = (void *)arg;
+ unsigned long now = jiffies;
+ int i;
+
+ write_lock(&ipfrag_lock);
+- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
++ get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32));
+ for (i = 0; i < IPQ_HASHSZ; i++) {
+ struct ipq *q;
++ struct hlist_head *head;
+ struct hlist_node *p, *n;
+
+- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
+- unsigned int hval = ipqhashfn(q->id, q->saddr,
++ head = &net->ipq_hash[i];
++ hlist_for_each_entry_safe(q, p, n, head, list) {
++ unsigned int hval = ipqhashfn(net, q->id, q->saddr,
+ q->daddr, q->protocol);
+
+ if (hval != i) {
+ hlist_del(&q->list);
+
+ /* Relink to new hash chain. */
+- hlist_add_head(&q->list, &ipq_hash[hval]);
++ hlist_add_head(&q->list, &net->ipq_hash[hval]);
+ }
+ }
+ }
+ write_unlock(&ipfrag_lock);
+
+- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
++ mod_timer(&net->ipfrag_secret_timer,
++ now + net->sysctl_ipfrag_secret_interval);
+ }
+
+-atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
+-
+ /* Memory Tracking Functions. */
+-static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
++static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work)
+ {
+ if (work)
+ *work -= skb->truesize;
+- atomic_sub(skb->truesize, &ip_frag_mem);
++ atomic_sub(skb->truesize, &net->ip_frag_mem);
+ kfree_skb(skb);
+ }
+
+ static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+ {
++ struct net *net = qp->net;
+ if (work)
+ *work -= sizeof(struct ipq);
+- atomic_sub(sizeof(struct ipq), &ip_frag_mem);
++ atomic_sub(sizeof(struct ipq), &net->ip_frag_mem);
++ release_net(net);
+ kfree(qp);
+ }
+
+-static __inline__ struct ipq *frag_alloc_queue(void)
++static __inline__ struct ipq *frag_alloc_queue(struct net *net)
+ {
+ struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+
+ if (!qp)
+ return NULL;
+- atomic_add(sizeof(struct ipq), &ip_frag_mem);
++ atomic_add(sizeof(struct ipq), &net->ip_frag_mem);
+ return qp;
+ }
+
+@@ -209,7 +192,7 @@
+ while (fp) {
+ struct sk_buff *xp = fp->next;
+
+- frag_kfree_skb(fp, work);
++ frag_kfree_skb(qp->net, fp, work);
+ fp = xp;
+ }
+
+@@ -241,23 +224,23 @@
+ /* Memory limiting on fragments. Evictor trashes the oldest
+ * fragment queue until we are back under the threshold.
+ */
+-static void ip_evictor(void)
++static void ip_evictor(struct net *net)
+ {
+ struct ipq *qp;
+ struct list_head *tmp;
+ int work;
+
+- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
++ work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh;
+ if (work <= 0)
+ return;
+
+ while (work > 0) {
+ read_lock(&ipfrag_lock);
+- if (list_empty(&ipq_lru_list)) {
++ if (list_empty(&net->ipq_lru_list)) {
+ read_unlock(&ipfrag_lock);
+ return;
+ }
+- tmp = ipq_lru_list.next;
++ tmp = net->ipq_lru_list.next;
+ qp = list_entry(tmp, struct ipq, lru_list);
+ atomic_inc(&qp->refcnt);
+ read_unlock(&ipfrag_lock);
+@@ -292,7 +275,7 @@
+ if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
+ struct sk_buff *head = qp->fragments;
+ /* Send an ICMP "Fragment Reassembly Timeout" message. */
+- if ((head->dev = dev_get_by_index(qp->iif)) != NULL) {
++ if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) {
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ dev_put(head->dev);
+ }
+@@ -304,7 +287,7 @@
+
+ /* Creation primitives. */
+
+-static struct ipq *ip_frag_intern(struct ipq *qp_in)
++static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in)
+ {
+ struct ipq *qp;
+ #ifdef CONFIG_SMP
+@@ -313,14 +296,14 @@
+ unsigned int hash;
+
+ write_lock(&ipfrag_lock);
+- hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
++ hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr,
+ qp_in->protocol);
+ #ifdef CONFIG_SMP
+ /* With SMP race we have to recheck hash table, because
+ * such entry could be created on other cpu, while we
+ * promoted read lock to write lock.
+ */
+- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
++ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
+ if (qp->id == qp_in->id &&
+ qp->saddr == qp_in->saddr &&
+ qp->daddr == qp_in->daddr &&
+@@ -336,26 +319,27 @@
+ #endif
+ qp = qp_in;
+
+- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
++ if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time))
+ atomic_inc(&qp->refcnt);
+
+ atomic_inc(&qp->refcnt);
+- hlist_add_head(&qp->list, &ipq_hash[hash]);
++ hlist_add_head(&qp->list, &net->ipq_hash[hash]);
+ INIT_LIST_HEAD(&qp->lru_list);
+- list_add_tail(&qp->lru_list, &ipq_lru_list);
+- ip_frag_nqueues++;
++ list_add_tail(&qp->lru_list, &net->ipq_lru_list);
++ net->ip_frag_nqueues++;
+ write_unlock(&ipfrag_lock);
+ return qp;
+ }
+
+ /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
++static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user)
+ {
+ struct ipq *qp;
+
+- if ((qp = frag_alloc_queue()) == NULL)
++ if ((qp = frag_alloc_queue(net)) == NULL)
+ goto out_nomem;
+
++ qp->net = hold_net(net);
+ qp->protocol = iph->protocol;
+ qp->last_in = 0;
+ qp->id = iph->id;
+@@ -366,7 +350,8 @@
+ qp->meat = 0;
+ qp->fragments = NULL;
+ qp->iif = 0;
+- qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
++ qp->peer = net->sysctl_ipfrag_max_dist ?
++ inet_getpeer(net, iph->saddr, 1) : NULL;
+
+ /* Initialize a timer for this entry. */
+ init_timer(&qp->timer);
+@@ -375,7 +360,7 @@
+ spin_lock_init(&qp->lock);
+ atomic_set(&qp->refcnt, 1);
+
+- return ip_frag_intern(qp);
++ return ip_frag_intern(net, qp);
+
+ out_nomem:
+ LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
+@@ -385,7 +370,7 @@
+ /* Find the correct entry in the "incomplete datagrams" queue for
+ * this IP datagram, and create new one, if nothing is found.
+ */
+-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
++static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
+ {
+ __be16 id = iph->id;
+ __be32 saddr = iph->saddr;
+@@ -396,8 +381,8 @@
+ struct hlist_node *n;
+
+ read_lock(&ipfrag_lock);
+- hash = ipqhashfn(id, saddr, daddr, protocol);
+- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
++ hash = ipqhashfn(net, id, saddr, daddr, protocol);
++ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
+ if (qp->id == id &&
+ qp->saddr == saddr &&
+ qp->daddr == daddr &&
+@@ -410,14 +395,14 @@
+ }
+ read_unlock(&ipfrag_lock);
+
+- return ip_frag_create(iph, user);
++ return ip_frag_create(net, iph, user);
+ }
+
+ /* Is the fragment too far ahead to be part of ipq? */
+ static inline int ip_frag_too_far(struct ipq *qp)
+ {
+ struct inet_peer *peer = qp->peer;
+- unsigned int max = sysctl_ipfrag_max_dist;
++ unsigned int max = qp->net->sysctl_ipfrag_max_dist;
+ unsigned int start, end;
+
+ int rc;
+@@ -442,7 +427,7 @@
+ {
+ struct sk_buff *fp;
+
+- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
++ if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) {
+ atomic_inc(&qp->refcnt);
+ return -ETIMEDOUT;
+ }
+@@ -450,7 +435,7 @@
+ fp = qp->fragments;
+ do {
+ struct sk_buff *xp = fp->next;
+- frag_kfree_skb(fp, NULL);
++ frag_kfree_skb(qp->net, fp, NULL);
+ fp = xp;
+ } while (fp);
+
+@@ -466,6 +451,7 @@
+ /* Add new segment to existing queue. */
+ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
++ struct net *net = qp->net;
+ struct sk_buff *prev, *next;
+ int flags, offset;
+ int ihl, end;
+@@ -576,7 +562,7 @@
+ qp->fragments = next;
+
+ qp->meat -= free_it->len;
+- frag_kfree_skb(free_it, NULL);
++ frag_kfree_skb(net, free_it, NULL);
+ }
+ }
+
+@@ -594,12 +580,12 @@
+ skb->dev = NULL;
+ qp->stamp = skb->tstamp;
+ qp->meat += skb->len;
+- atomic_add(skb->truesize, &ip_frag_mem);
++ atomic_add(skb->truesize, &net->ip_frag_mem);
+ if (offset == 0)
+ qp->last_in |= FIRST_IN;
+
+ write_lock(&ipfrag_lock);
+- list_move_tail(&qp->lru_list, &ipq_lru_list);
++ list_move_tail(&qp->lru_list, &net->ipq_lru_list);
+ write_unlock(&ipfrag_lock);
+
+ return;
+@@ -613,6 +599,7 @@
+
+ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+ {
++ struct net *net = qp->net;
+ struct iphdr *iph;
+ struct sk_buff *fp, *head = qp->fragments;
+ int len;
+@@ -654,12 +641,12 @@
+ head->len -= clone->len;
+ clone->csum = 0;
+ clone->ip_summed = head->ip_summed;
+- atomic_add(clone->truesize, &ip_frag_mem);
++ atomic_add(clone->truesize, &net->ip_frag_mem);
+ }
+
+ skb_shinfo(head)->frag_list = head->next;
+ skb_push(head, head->data - skb_network_header(head));
+- atomic_sub(head->truesize, &ip_frag_mem);
++ atomic_sub(head->truesize, &net->ip_frag_mem);
+
+ for (fp=head->next; fp; fp = fp->next) {
+ head->data_len += fp->len;
+@@ -669,7 +656,7 @@
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+- atomic_sub(fp->truesize, &ip_frag_mem);
++ atomic_sub(fp->truesize, &net->ip_frag_mem);
+ }
+
+ head->next = NULL;
+@@ -700,19 +687,20 @@
+ /* Process an incoming IP datagram fragment. */
+ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct ipq *qp;
+ struct net_device *dev;
+
+ IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+
+ /* Start by cleaning up the memory. */
+- if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+- ip_evictor();
++ if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh)
++ ip_evictor(net);
+
+ dev = skb->dev;
+
+ /* Lookup (or create) queue header */
+- if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
++ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
+ struct sk_buff *ret = NULL;
+
+ spin_lock(&qp->lock);
+@@ -733,15 +721,70 @@
+ return NULL;
+ }
+
+-void __init ipfrag_init(void)
++static int ipfrag_net_init(struct net *net)
+ {
+- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
++ struct timer_list *secret_timer;
++ int i;
++
++ /* Fragment cache limits. We will commit 256K at one time. Should we
++ * cross that limit we will prune down to 192K. This should cope with
++ * even the most extreme cases without allowing an attacker to measurably
++ * harm machine performance.
++ */
++ net->sysctl_ipfrag_high_thresh = 256*1024;
++ net->sysctl_ipfrag_low_thresh = 192*1024;
++ net->sysctl_ipfrag_max_dist = 64;
++
++ /* Important NOTE! Fragment queue must be destroyed before MSL expires.
++ * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
++ */
++ net->sysctl_ipfrag_time = IP_FRAG_TIME;
++
++ net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
++
++ net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL);
++ if (!net->ipq_hash)
++ return -ENOMEM;
++
++ for (i = 0; i < IPQ_HASHSZ; i++)
++ INIT_HLIST_HEAD(&net->ipq_hash[i]);
++ INIT_LIST_HEAD(&net->ipq_lru_list);
++ net->ip_frag_nqueues = 0;
++ atomic_set(&net->ip_frag_mem, 0);
++
++
++ net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ (jiffies ^ (jiffies >> 6)));
+
+- init_timer(&ipfrag_secret_timer);
+- ipfrag_secret_timer.function = ipfrag_secret_rebuild;
+- ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
+- add_timer(&ipfrag_secret_timer);
++ secret_timer = &net->ipfrag_secret_timer;
++ init_timer(secret_timer);
++ secret_timer->function = ipfrag_secret_rebuild;
++ secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval;
++ secret_timer->data = (unsigned long)net;
++ add_timer(secret_timer);
++
++ return 0;
++}
++
++static void ipfrag_net_exit(struct net *net)
++{
++ del_timer(&net->ipfrag_secret_timer);
++
++ net->sysctl_ipfrag_low_thresh = 0;
++ while (atomic_read(&net->ip_frag_mem))
++ ip_evictor(net);
++
++ kfree(net->ipq_hash);
++}
++
++static struct pernet_operations ipfrag_net_ops = {
++ .init = ipfrag_net_init,
++ .exit = ipfrag_net_exit,
++};
++
++void ipfrag_init(void)
++{
++ register_pernet_subsys(&ipfrag_net_ops);
+ }
+
+ EXPORT_SYMBOL(ip_defrag);
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_gre.c linux-2.6.22-591/net/ipv4/ip_gre.c
+--- linux-2.6.22-570/net/ipv4/ip_gre.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ip_gre.c 2007-12-21 15:36:15.000000000 -0500
+@@ -262,7 +262,7 @@
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(name, "gre%d", i);
+- if (__dev_get_by_name(name) == NULL)
++ if (__dev_get_by_name(&init_net, name) == NULL)
+ break;
+ }
+ if (i==100)
+@@ -397,6 +397,9 @@
+ struct flowi fl;
+ struct rtable *rt;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (p[1] != htons(ETH_P_IP))
+ return;
+
+@@ -475,6 +478,7 @@
+
+ /* Try to guess incoming interface */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.fl4_dst = eiph->saddr;
+ fl.fl4_tos = RT_TOS(eiph->tos);
+ fl.proto = IPPROTO_GRE;
+@@ -559,6 +563,10 @@
+ struct ip_tunnel *tunnel;
+ int offset = 4;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ if (!pskb_may_pull(skb, 16))
+ goto drop_nolock;
+
+@@ -740,7 +748,8 @@
+ }
+
+ {
+- struct flowi fl = { .oif = tunnel->parms.link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = tunnel->parms.link,
+ .nl_u = { .ip4_u =
+ { .daddr = dst,
+ .saddr = tiph->saddr,
+@@ -1095,7 +1104,8 @@
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (MULTICAST(t->parms.iph.daddr)) {
+- struct flowi fl = { .oif = t->parms.link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = t->parms.link,
+ .nl_u = { .ip4_u =
+ { .daddr = t->parms.iph.daddr,
+ .saddr = t->parms.iph.saddr,
+@@ -1118,7 +1128,7 @@
+ {
+ struct ip_tunnel *t = netdev_priv(dev);
+ if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+- struct in_device *in_dev = inetdev_by_index(t->mlink);
++ struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink);
+ if (in_dev) {
+ ip_mc_dec_group(in_dev, t->parms.iph.daddr);
+ in_dev_put(in_dev);
+@@ -1168,7 +1178,8 @@
+ /* Guess output device to choose reasonable mtu and hard_header_len */
+
+ if (iph->daddr) {
+- struct flowi fl = { .oif = tunnel->parms.link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = tunnel->parms.link,
+ .nl_u = { .ip4_u =
+ { .daddr = iph->daddr,
+ .saddr = iph->saddr,
+@@ -1195,7 +1206,7 @@
+ }
+
+ if (!tdev && tunnel->parms.link)
+- tdev = __dev_get_by_index(tunnel->parms.link);
++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+ if (tdev) {
+ hlen = tdev->hard_header_len;
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_input.c linux-2.6.22-591/net/ipv4/ip_input.c
+--- linux-2.6.22-570/net/ipv4/ip_input.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ip_input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -280,6 +280,10 @@
+ struct iphdr *iph;
+ struct net_device *dev = skb->dev;
+
++
++ if (skb->dev->nd_net != &init_net)
++ goto drop;
++
+ /* It looks as overkill, because not all
+ IP options require packet mangling.
+ But it is the easiest for now, especially taking
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_options.c linux-2.6.22-591/net/ipv4/ip_options.c
+--- linux-2.6.22-570/net/ipv4/ip_options.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/ip_options.c 2007-12-21 15:36:15.000000000 -0500
+@@ -151,7 +151,7 @@
+ __be32 addr;
+
+ memcpy(&addr, sptr+soffset-1, 4);
+- if (inet_addr_type(addr) != RTN_LOCAL) {
++ if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
+ dopt->ts_needtime = 1;
+ soffset += 8;
+ }
+@@ -400,7 +400,7 @@
+ {
+ __be32 addr;
+ memcpy(&addr, &optptr[optptr[2]-1], 4);
+- if (inet_addr_type(addr) == RTN_UNICAST)
++ if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
+ break;
+ if (skb)
+ timeptr = (__be32*)&optptr[optptr[2]+3];
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_output.c linux-2.6.22-591/net/ipv4/ip_output.c
+--- linux-2.6.22-570/net/ipv4/ip_output.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ip_output.c 2007-12-21 15:36:15.000000000 -0500
+@@ -83,8 +83,6 @@
+ #include <linux/netlink.h>
+ #include <linux/tcp.h>
+
+-int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
+-
+ /* Generate a checksum for an outgoing IP datagram. */
+ __inline__ void ip_send_check(struct iphdr *iph)
+ {
+@@ -317,7 +315,8 @@
+ daddr = opt->faddr;
+
+ {
+- struct flowi fl = { .oif = sk->sk_bound_dev_if,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = sk->sk_bound_dev_if,
+ .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = inet->saddr,
+@@ -837,7 +836,7 @@
+ */
+ if (transhdrlen &&
+ length + fragheaderlen <= mtu &&
+- rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
++ rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+ !exthdrlen)
+ csummode = CHECKSUM_PARTIAL;
+
+@@ -1352,7 +1351,8 @@
+ }
+
+ {
+- struct flowi fl = { .oif = arg->bound_dev_if,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = arg->bound_dev_if,
+ .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = rt->rt_spec_dst,
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_sockglue.c linux-2.6.22-591/net/ipv4/ip_sockglue.c
+--- linux-2.6.22-570/net/ipv4/ip_sockglue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ip_sockglue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -411,6 +411,7 @@
+ static int do_ip_setsockopt(struct sock *sk, int level,
+ int optname, char __user *optval, int optlen)
+ {
++ struct net *net = sk->sk_net;
+ struct inet_sock *inet = inet_sk(sk);
+ int val=0,err;
+
+@@ -596,13 +597,13 @@
+ err = 0;
+ break;
+ }
+- dev = ip_dev_find(mreq.imr_address.s_addr);
++ dev = ip_dev_find(net, mreq.imr_address.s_addr);
+ if (dev) {
+ mreq.imr_ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ } else
+- dev = __dev_get_by_index(mreq.imr_ifindex);
++ dev = __dev_get_by_index(net, mreq.imr_ifindex);
+
+
+ err = -EADDRNOTAVAIL;
+@@ -956,6 +957,7 @@
+ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+ {
++ struct net *net = sk->sk_net;
+ struct inet_sock *inet = inet_sk(sk);
+ int val;
+ int len;
+@@ -1023,7 +1025,7 @@
+ break;
+ case IP_TTL:
+ val = (inet->uc_ttl == -1 ?
+- sysctl_ip_default_ttl :
++ net->sysctl_ip_default_ttl :
+ inet->uc_ttl);
+ break;
+ case IP_HDRINCL:
+diff -Nurb linux-2.6.22-570/net/ipv4/ipcomp.c linux-2.6.22-591/net/ipv4/ipcomp.c
+--- linux-2.6.22-570/net/ipv4/ipcomp.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/ipcomp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -175,6 +175,9 @@
+ struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+ struct xfrm_state *x;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ return;
+@@ -486,3 +489,4 @@
+ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+ MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
+diff -Nurb linux-2.6.22-570/net/ipv4/ipconfig.c linux-2.6.22-591/net/ipv4/ipconfig.c
+--- linux-2.6.22-570/net/ipv4/ipconfig.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipconfig.c 2007-12-21 15:36:15.000000000 -0500
+@@ -59,6 +59,7 @@
+ #include <net/ip.h>
+ #include <net/ipconfig.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+
+ #include <asm/uaccess.h>
+ #include <net/checksum.h>
+@@ -184,16 +185,18 @@
+ struct ic_device *d, **last;
+ struct net_device *dev;
+ unsigned short oflags;
++ struct net_device *lo;
+
+ last = &ic_first_dev;
+ rtnl_lock();
+
+ /* bring loopback device up first */
+- if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
+- printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
++ lo = &init_net.loopback_dev;
++ if (dev_change_flags(lo, lo->flags | IFF_UP) < 0)
++ printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name);
+
+- for_each_netdev(dev) {
+- if (dev == &loopback_dev)
++ for_each_netdev(&init_net, dev) {
++ if (dev == lo)
+ continue;
+ if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+ (!(dev->flags & IFF_LOOPBACK) &&
+@@ -283,7 +286,7 @@
+
+ mm_segment_t oldfs = get_fs();
+ set_fs(get_ds());
+- res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
++ res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
+ set_fs(oldfs);
+ return res;
+ }
+@@ -294,7 +297,7 @@
+
+ mm_segment_t oldfs = get_fs();
+ set_fs(get_ds());
+- res = ip_rt_ioctl(cmd, (void __user *) arg);
++ res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
+ set_fs(oldfs);
+ return res;
+ }
+@@ -425,6 +428,9 @@
+ unsigned char *sha, *tha; /* s for "source", t for "target" */
+ struct ic_device *d;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ return NET_RX_DROP;
+
+@@ -834,6 +840,9 @@
+ struct ic_device *d;
+ int len, ext_len;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ /* Perform verifications before taking the lock. */
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+@@ -1253,7 +1262,7 @@
+ __be32 addr;
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops);
++ proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
+ #endif /* CONFIG_PROC_FS */
+
+ if (!ic_enable)
+diff -Nurb linux-2.6.22-570/net/ipv4/ipip.c linux-2.6.22-591/net/ipv4/ipip.c
+--- linux-2.6.22-570/net/ipv4/ipip.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipip.c 2007-12-21 15:36:15.000000000 -0500
+@@ -225,7 +225,7 @@
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(name, "tunl%d", i);
+- if (__dev_get_by_name(name) == NULL)
++ if (__dev_get_by_name(&init_net, name) == NULL)
+ break;
+ }
+ if (i==100)
+@@ -403,6 +403,7 @@
+
+ /* Try to guess incoming interface */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.fl4_daddr = eiph->saddr;
+ fl.fl4_tos = RT_TOS(eiph->tos);
+ fl.proto = IPPROTO_IPIP;
+@@ -542,7 +543,8 @@
+ }
+
+ {
+- struct flowi fl = { .oif = tunnel->parms.link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = tunnel->parms.link,
+ .nl_u = { .ip4_u =
+ { .daddr = dst,
+ .saddr = tiph->saddr,
+@@ -806,7 +808,8 @@
+ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+ if (iph->daddr) {
+- struct flowi fl = { .oif = tunnel->parms.link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = tunnel->parms.link,
+ .nl_u = { .ip4_u =
+ { .daddr = iph->daddr,
+ .saddr = iph->saddr,
+@@ -821,7 +824,7 @@
+ }
+
+ if (!tdev && tunnel->parms.link)
+- tdev = __dev_get_by_index(tunnel->parms.link);
++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+diff -Nurb linux-2.6.22-570/net/ipv4/ipmr.c linux-2.6.22-591/net/ipv4/ipmr.c
+--- linux-2.6.22-570/net/ipv4/ipmr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipmr.c 2007-12-21 15:36:15.000000000 -0500
+@@ -62,6 +62,7 @@
+ #include <linux/netfilter_ipv4.h>
+ #include <net/ipip.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ #include <net/netlink.h>
+
+ #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+@@ -124,7 +125,7 @@
+ {
+ struct net_device *dev;
+
+- dev = __dev_get_by_name("tunl0");
++ dev = __dev_get_by_name(&init_net, "tunl0");
+
+ if (dev) {
+ int err;
+@@ -148,7 +149,7 @@
+
+ dev = NULL;
+
+- if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
++ if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
+ dev->flags |= IFF_MULTICAST;
+
+ in_dev = __in_dev_get_rtnl(dev);
+@@ -320,7 +321,7 @@
+ e->error = -ETIMEDOUT;
+ memset(&e->msg, 0, sizeof(e->msg));
+
+- rtnl_unicast(skb, NETLINK_CB(skb).pid);
++ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ } else
+ kfree_skb(skb);
+ }
+@@ -422,7 +423,7 @@
+ return -ENOBUFS;
+ break;
+ case 0:
+- dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
++ dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ dev_put(dev);
+@@ -532,7 +533,7 @@
+ memset(&e->msg, 0, sizeof(e->msg));
+ }
+
+- rtnl_unicast(skb, NETLINK_CB(skb).pid);
++ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ } else
+ ip_mr_forward(skb, c, 0);
+ }
+@@ -848,7 +849,7 @@
+ {
+ rtnl_lock();
+ if (sk == mroute_socket) {
+- IPV4_DEVCONF_ALL(MC_FORWARDING)--;
++ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
+
+ write_lock_bh(&mrt_lock);
+ mroute_socket=NULL;
+@@ -897,7 +898,7 @@
+ mroute_socket=sk;
+ write_unlock_bh(&mrt_lock);
+
+- IPV4_DEVCONF_ALL(MC_FORWARDING)++;
++ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
+ }
+ rtnl_unlock();
+ return ret;
+@@ -1082,13 +1083,18 @@
+
+ static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
++ struct net_device *dev = ptr;
+ struct vif_device *v;
+ int ct;
++
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+ v=&vif_table[0];
+ for (ct=0;ct<maxvif;ct++,v++) {
+- if (v->dev==ptr)
++ if (v->dev==dev)
+ vif_delete(ct);
+ }
+ return NOTIFY_DONE;
+@@ -1171,7 +1177,8 @@
+ #endif
+
+ if (vif->flags&VIFF_TUNNEL) {
+- struct flowi fl = { .oif = vif->link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = vif->link,
+ .nl_u = { .ip4_u =
+ { .daddr = vif->remote,
+ .saddr = vif->local,
+@@ -1181,7 +1188,8 @@
+ goto out_free;
+ encap = sizeof(struct iphdr);
+ } else {
+- struct flowi fl = { .oif = vif->link,
++ struct flowi fl = { .fl_net = &init_net,
++ .oif = vif->link,
+ .nl_u = { .ip4_u =
+ { .daddr = iph->daddr,
+ .tos = RT_TOS(iph->tos) } },
+@@ -1498,6 +1506,10 @@
+ struct iphdr *encap;
+ struct net_device *reg_dev = NULL;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
+ goto drop;
+
+@@ -1922,7 +1934,7 @@
+ ipmr_expire_timer.function=ipmr_expire_process;
+ register_netdevice_notifier(&ip_mr_notifier);
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
+- proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
++ proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
++ proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c 2007-12-21 15:36:15.000000000 -0500
+@@ -32,6 +32,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/mutex.h>
++#include <net/net_namespace.h>
+
+ #include <net/ip_vs.h>
+
+@@ -616,12 +617,12 @@
+ int ip_vs_app_init(void)
+ {
+ /* we will replace it with proc_net_ipvs_create() soon */
+- proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
++ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+ return 0;
+ }
+
+
+ void ip_vs_app_cleanup(void)
+ {
+- proc_net_remove("ip_vs_app");
++ proc_net_remove(&init_net, "ip_vs_app");
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c 2007-12-21 15:36:15.000000000 -0500
+@@ -34,6 +34,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/jhash.h>
+ #include <linux/random.h>
++#include <net/net_namespace.h>
+
+ #include <net/ip_vs.h>
+
+@@ -922,7 +923,7 @@
+ rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+ }
+
+- proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
++ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+
+ /* calculate the random value for connection hash */
+ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+@@ -938,6 +939,6 @@
+
+ /* Release the empty cache */
+ kmem_cache_destroy(ip_vs_conn_cachep);
+- proc_net_remove("ip_vs_conn");
++ proc_net_remove(&init_net, "ip_vs_conn");
+ vfree(ip_vs_conn_tab);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c 2007-12-21 15:36:15.000000000 -0500
+@@ -460,7 +460,7 @@
+ and the destination is RTN_UNICAST (and not local), then create
+ a cache_bypass connection entry */
+ if (sysctl_ip_vs_cache_bypass && svc->fwmark
+- && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
++ && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
+ int ret, cs;
+ struct ip_vs_conn *cp;
+
+@@ -530,6 +530,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (!((*pskb)->ipvs_property))
+ return NF_ACCEPT;
+ /* The packet was sent from IPVS, exit this chain */
+@@ -734,6 +738,10 @@
+ struct ip_vs_conn *cp;
+ int ihl;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ EnterFunction(11);
+
+ if (skb->ipvs_property)
+@@ -818,7 +826,7 @@
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
++ if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0)
+ goto drop;
+ skb = *pskb;
+
+@@ -956,12 +964,16 @@
+ int ret, restart;
+ int ihl;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /*
+ * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ * ... don't know why 1st test DOES NOT include 2nd (?)
+ */
+ if (unlikely(skb->pkt_type != PACKET_HOST
+- || skb->dev == &loopback_dev || skb->sk)) {
++ || skb->dev == &init_net.loopback_dev || skb->sk)) {
+ IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+ skb->pkt_type,
+ ip_hdr(skb)->protocol,
+@@ -1062,6 +1074,10 @@
+ {
+ int r;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+ return NF_ACCEPT;
+
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c 2007-12-21 15:36:15.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <net/ip.h>
+ #include <net/route.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+
+ #include <asm/uaccess.h>
+
+@@ -679,7 +680,7 @@
+ conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+
+ /* check if local node and update the flags */
+- if (inet_addr_type(udest->addr) == RTN_LOCAL) {
++ if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
+ conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ | IP_VS_CONN_F_LOCALNODE;
+ }
+@@ -731,7 +732,7 @@
+
+ EnterFunction(2);
+
+- atype = inet_addr_type(udest->addr);
++ atype = inet_addr_type(&init_net, udest->addr);
+ if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+ return -EINVAL;
+
+@@ -1932,6 +1933,9 @@
+ struct ip_vs_service *svc;
+ struct ip_vs_dest_user *udest;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -2196,6 +2200,9 @@
+ unsigned char arg[128];
+ int ret = 0;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -2356,8 +2363,8 @@
+ return ret;
+ }
+
+- proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
+- proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
++ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
++ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+
+ sysctl_header = register_sysctl_table(vs_root_table);
+
+@@ -2390,8 +2397,8 @@
+ cancel_work_sync(&defense_work.work);
+ ip_vs_kill_estimator(&ip_vs_stats);
+ unregister_sysctl_table(sysctl_header);
+- proc_net_remove("ip_vs_stats");
+- proc_net_remove("ip_vs");
++ proc_net_remove(&init_net, "ip_vs_stats");
++ proc_net_remove(&init_net, "ip_vs");
+ nf_unregister_sockopt(&ip_vs_sockopts);
+ LeaveFunction(2);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c 2007-12-21 15:36:15.000000000 -0500
+@@ -843,7 +843,7 @@
+ INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
+ sysctl_header = register_sysctl_table(lblcr_root_table);
+ #ifdef CONFIG_IP_VS_LBLCR_DEBUG
+- proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
++ proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
+ #endif
+ return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ }
+@@ -852,7 +852,7 @@
+ static void __exit ip_vs_lblcr_cleanup(void)
+ {
+ #ifdef CONFIG_IP_VS_LBLCR_DEBUG
+- proc_net_remove("ip_vs_lblcr");
++ proc_net_remove(&init_net, "ip_vs_lblcr");
+ #endif
+ unregister_sysctl_table(sysctl_header);
+ unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c 2007-12-21 15:36:15.000000000 -0500
+@@ -387,7 +387,7 @@
+ struct net_device *dev;
+ struct inet_sock *inet = inet_sk(sk);
+
+- if ((dev = __dev_get_by_name(ifname)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+@@ -412,7 +412,7 @@
+ int num;
+
+ if (sync_state == IP_VS_STATE_MASTER) {
+- if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ return -ENODEV;
+
+ num = (dev->mtu - sizeof(struct iphdr) -
+@@ -423,7 +423,7 @@
+ IP_VS_DBG(7, "setting the maximum length of sync sending "
+ "message %d.\n", sync_send_mesg_maxlen);
+ } else if (sync_state == IP_VS_STATE_BACKUP) {
+- if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ return -ENODEV;
+
+ sync_recv_mesg_maxlen = dev->mtu -
+@@ -451,7 +451,7 @@
+ memset(&mreq, 0, sizeof(mreq));
+ memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+
+- if ((dev = __dev_get_by_name(ifname)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+ if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ return -EINVAL;
+@@ -472,7 +472,7 @@
+ __be32 addr;
+ struct sockaddr_in sin;
+
+- if ((dev = __dev_get_by_name(ifname)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ return -ENODEV;
+
+ addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c
+--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c 2007-12-21 15:36:15.000000000 -0500
+@@ -70,6 +70,7 @@
+ if (!(rt = (struct rtable *)
+ __ip_vs_dst_check(dest, rtos, 0))) {
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+@@ -93,6 +94,7 @@
+ spin_unlock(&dest->dst_lock);
+ } else {
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+@@ -160,6 +162,7 @@
+ u8 tos = iph->tos;
+ int mtu;
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath.c linux-2.6.22-591/net/ipv4/multipath.c
+--- linux-2.6.22-570/net/ipv4/multipath.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/multipath.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,55 +0,0 @@
+-/* multipath.c: IPV4 multipath algorithm support.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#include <linux/module.h>
+-#include <linux/errno.h>
+-#include <linux/netdevice.h>
+-#include <linux/spinlock.h>
+-
+-#include <net/ip_mp_alg.h>
+-
+-static DEFINE_SPINLOCK(alg_table_lock);
+-struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
+-
+-int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+-{
+- struct ip_mp_alg_ops **slot;
+- int err;
+-
+- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
+- !ops->mp_alg_select_route)
+- return -EINVAL;
+-
+- spin_lock(&alg_table_lock);
+- slot = &ip_mp_alg_table[n];
+- if (*slot != NULL) {
+- err = -EBUSY;
+- } else {
+- *slot = ops;
+- err = 0;
+- }
+- spin_unlock(&alg_table_lock);
+-
+- return err;
+-}
+-EXPORT_SYMBOL(multipath_alg_register);
+-
+-void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+-{
+- struct ip_mp_alg_ops **slot;
+-
+- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
+- return;
+-
+- spin_lock(&alg_table_lock);
+- slot = &ip_mp_alg_table[n];
+- if (*slot == ops)
+- *slot = NULL;
+- spin_unlock(&alg_table_lock);
+-
+- synchronize_net();
+-}
+-EXPORT_SYMBOL(multipath_alg_unregister);
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_drr.c linux-2.6.22-591/net/ipv4/multipath_drr.c
+--- linux-2.6.22-570/net/ipv4/multipath_drr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/multipath_drr.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,249 +0,0 @@
+-/*
+- * Device round robin policy for multipath.
+- *
+- *
+- * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
+- *
+- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-struct multipath_device {
+- int ifi; /* interface index of device */
+- atomic_t usecount;
+- int allocated;
+-};
+-
+-#define MULTIPATH_MAX_DEVICECANDIDATES 10
+-
+-static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
+-static DEFINE_SPINLOCK(state_lock);
+-
+-static int inline __multipath_findslot(void)
+-{
+- int i;
+-
+- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+- if (state[i].allocated == 0)
+- return i;
+- }
+- return -1;
+-}
+-
+-static int inline __multipath_finddev(int ifindex)
+-{
+- int i;
+-
+- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+- if (state[i].allocated != 0 &&
+- state[i].ifi == ifindex)
+- return i;
+- }
+- return -1;
+-}
+-
+-static int drr_dev_event(struct notifier_block *this,
+- unsigned long event, void *ptr)
+-{
+- struct net_device *dev = ptr;
+- int devidx;
+-
+- switch (event) {
+- case NETDEV_UNREGISTER:
+- case NETDEV_DOWN:
+- spin_lock_bh(&state_lock);
+-
+- devidx = __multipath_finddev(dev->ifindex);
+- if (devidx != -1) {
+- state[devidx].allocated = 0;
+- state[devidx].ifi = 0;
+- atomic_set(&state[devidx].usecount, 0);
+- }
+-
+- spin_unlock_bh(&state_lock);
+- break;
+- }
+-
+- return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block drr_dev_notifier = {
+- .notifier_call = drr_dev_event,
+-};
+-
+-
+-static void drr_safe_inc(atomic_t *usecount)
+-{
+- int n;
+-
+- atomic_inc(usecount);
+-
+- n = atomic_read(usecount);
+- if (n <= 0) {
+- int i;
+-
+- spin_lock_bh(&state_lock);
+-
+- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
+- atomic_set(&state[i].usecount, 0);
+-
+- spin_unlock_bh(&state_lock);
+- }
+-}
+-
+-static void drr_select_route(const struct flowi *flp,
+- struct rtable *first, struct rtable **rp)
+-{
+- struct rtable *nh, *result, *cur_min;
+- int min_usecount = -1;
+- int devidx = -1;
+- int cur_min_devidx = -1;
+-
+- /* 1. make sure all alt. nexthops have the same GC related data */
+- /* 2. determine the new candidate to be returned */
+- result = NULL;
+- cur_min = NULL;
+- for (nh = rcu_dereference(first); nh;
+- nh = rcu_dereference(nh->u.dst.rt_next)) {
+- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+- multipath_comparekeys(&nh->fl, flp)) {
+- int nh_ifidx = nh->u.dst.dev->ifindex;
+-
+- nh->u.dst.lastuse = jiffies;
+- nh->u.dst.__use++;
+- if (result != NULL)
+- continue;
+-
+- /* search for the output interface */
+-
+- /* this is not SMP safe, only add/remove are
+- * SMP safe as wrong usecount updates have no big
+- * impact
+- */
+- devidx = __multipath_finddev(nh_ifidx);
+- if (devidx == -1) {
+- /* add the interface to the array
+- * SMP safe
+- */
+- spin_lock_bh(&state_lock);
+-
+- /* due to SMP: search again */
+- devidx = __multipath_finddev(nh_ifidx);
+- if (devidx == -1) {
+- /* add entry for device */
+- devidx = __multipath_findslot();
+- if (devidx == -1) {
+- /* unlikely but possible */
+- continue;
+- }
+-
+- state[devidx].allocated = 1;
+- state[devidx].ifi = nh_ifidx;
+- atomic_set(&state[devidx].usecount, 0);
+- min_usecount = 0;
+- }
+-
+- spin_unlock_bh(&state_lock);
+- }
+-
+- if (min_usecount == 0) {
+- /* if the device has not been used it is
+- * the primary target
+- */
+- drr_safe_inc(&state[devidx].usecount);
+- result = nh;
+- } else {
+- int count =
+- atomic_read(&state[devidx].usecount);
+-
+- if (min_usecount == -1 ||
+- count < min_usecount) {
+- cur_min = nh;
+- cur_min_devidx = devidx;
+- min_usecount = count;
+- }
+- }
+- }
+- }
+-
+- if (!result) {
+- if (cur_min) {
+- drr_safe_inc(&state[cur_min_devidx].usecount);
+- result = cur_min;
+- } else {
+- result = first;
+- }
+- }
+-
+- *rp = result;
+-}
+-
+-static struct ip_mp_alg_ops drr_ops = {
+- .mp_alg_select_route = drr_select_route,
+-};
+-
+-static int __init drr_init(void)
+-{
+- int err = register_netdevice_notifier(&drr_dev_notifier);
+-
+- if (err)
+- return err;
+-
+- err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
+- if (err)
+- goto fail;
+-
+- return 0;
+-
+-fail:
+- unregister_netdevice_notifier(&drr_dev_notifier);
+- return err;
+-}
+-
+-static void __exit drr_exit(void)
+-{
+- unregister_netdevice_notifier(&drr_dev_notifier);
+- multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
+-}
+-
+-module_init(drr_init);
+-module_exit(drr_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_random.c linux-2.6.22-591/net/ipv4/multipath_random.c
+--- linux-2.6.22-570/net/ipv4/multipath_random.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/multipath_random.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,114 +0,0 @@
+-/*
+- * Random policy for multipath.
+- *
+- *
+- * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
+- *
+- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <linux/random.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-#define MULTIPATH_MAX_CANDIDATES 40
+-
+-static void random_select_route(const struct flowi *flp,
+- struct rtable *first,
+- struct rtable **rp)
+-{
+- struct rtable *rt;
+- struct rtable *decision;
+- unsigned char candidate_count = 0;
+-
+- /* count all candidate */
+- for (rt = rcu_dereference(first); rt;
+- rt = rcu_dereference(rt->u.dst.rt_next)) {
+- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+- multipath_comparekeys(&rt->fl, flp))
+- ++candidate_count;
+- }
+-
+- /* choose a random candidate */
+- decision = first;
+- if (candidate_count > 1) {
+- unsigned char i = 0;
+- unsigned char candidate_no = (unsigned char)
+- (random32() % candidate_count);
+-
+- /* find chosen candidate and adjust GC data for all candidates
+- * to ensure they stay in cache
+- */
+- for (rt = first; rt; rt = rt->u.dst.rt_next) {
+- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+- multipath_comparekeys(&rt->fl, flp)) {
+- rt->u.dst.lastuse = jiffies;
+-
+- if (i == candidate_no)
+- decision = rt;
+-
+- if (i >= candidate_count)
+- break;
+-
+- i++;
+- }
+- }
+- }
+-
+- decision->u.dst.__use++;
+- *rp = decision;
+-}
+-
+-static struct ip_mp_alg_ops random_ops = {
+- .mp_alg_select_route = random_select_route,
+-};
+-
+-static int __init random_init(void)
+-{
+- return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
+-}
+-
+-static void __exit random_exit(void)
+-{
+- multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
+-}
+-
+-module_init(random_init);
+-module_exit(random_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_rr.c linux-2.6.22-591/net/ipv4/multipath_rr.c
+--- linux-2.6.22-570/net/ipv4/multipath_rr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/multipath_rr.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,95 +0,0 @@
+-/*
+- * Round robin policy for multipath.
+- *
+- *
+- * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
+- *
+- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-static void rr_select_route(const struct flowi *flp,
+- struct rtable *first, struct rtable **rp)
+-{
+- struct rtable *nh, *result, *min_use_cand = NULL;
+- int min_use = -1;
+-
+- /* 1. make sure all alt. nexthops have the same GC related data
+- * 2. determine the new candidate to be returned
+- */
+- result = NULL;
+- for (nh = rcu_dereference(first); nh;
+- nh = rcu_dereference(nh->u.dst.rt_next)) {
+- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+- multipath_comparekeys(&nh->fl, flp)) {
+- nh->u.dst.lastuse = jiffies;
+-
+- if (min_use == -1 || nh->u.dst.__use < min_use) {
+- min_use = nh->u.dst.__use;
+- min_use_cand = nh;
+- }
+- }
+- }
+- result = min_use_cand;
+- if (!result)
+- result = first;
+-
+- result->u.dst.__use++;
+- *rp = result;
+-}
+-
+-static struct ip_mp_alg_ops rr_ops = {
+- .mp_alg_select_route = rr_select_route,
+-};
+-
+-static int __init rr_init(void)
+-{
+- return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
+-}
+-
+-static void __exit rr_exit(void)
+-{
+- multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
+-}
+-
+-module_init(rr_init);
+-module_exit(rr_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_wrandom.c linux-2.6.22-591/net/ipv4/multipath_wrandom.c
+--- linux-2.6.22-570/net/ipv4/multipath_wrandom.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/multipath_wrandom.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,329 +0,0 @@
+-/*
+- * Weighted random policy for multipath.
+- *
+- *
+- * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
+- *
+- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <linux/random.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_fib.h>
+-#include <net/ip_mp_alg.h>
+-
+-#define MULTIPATH_STATE_SIZE 15
+-
+-struct multipath_candidate {
+- struct multipath_candidate *next;
+- int power;
+- struct rtable *rt;
+-};
+-
+-struct multipath_dest {
+- struct list_head list;
+-
+- const struct fib_nh *nh_info;
+- __be32 netmask;
+- __be32 network;
+- unsigned char prefixlen;
+-
+- struct rcu_head rcu;
+-};
+-
+-struct multipath_bucket {
+- struct list_head head;
+- spinlock_t lock;
+-};
+-
+-struct multipath_route {
+- struct list_head list;
+-
+- int oif;
+- __be32 gw;
+- struct list_head dests;
+-
+- struct rcu_head rcu;
+-};
+-
+-/* state: primarily weight per route information */
+-static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
+-
+-static unsigned char __multipath_lookup_weight(const struct flowi *fl,
+- const struct rtable *rt)
+-{
+- const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
+- struct multipath_route *r;
+- struct multipath_route *target_route = NULL;
+- struct multipath_dest *d;
+- int weight = 1;
+-
+- /* lookup the weight information for a certain route */
+- rcu_read_lock();
+-
+- /* find state entry for gateway or add one if necessary */
+- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+- if (r->gw == rt->rt_gateway &&
+- r->oif == rt->idev->dev->ifindex) {
+- target_route = r;
+- break;
+- }
+- }
+-
+- if (!target_route) {
+- /* this should not happen... but we are prepared */
+- printk( KERN_CRIT"%s: missing state for gateway: %u and " \
+- "device %d\n", __FUNCTION__, rt->rt_gateway,
+- rt->idev->dev->ifindex);
+- goto out;
+- }
+-
+- /* find state entry for destination */
+- list_for_each_entry_rcu(d, &target_route->dests, list) {
+- __be32 targetnetwork = fl->fl4_dst &
+- inet_make_mask(d->prefixlen);
+-
+- if ((targetnetwork & d->netmask) == d->network) {
+- weight = d->nh_info->nh_weight;
+- goto out;
+- }
+- }
+-
+-out:
+- rcu_read_unlock();
+- return weight;
+-}
+-
+-static void wrandom_init_state(void)
+-{
+- int i;
+-
+- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+- INIT_LIST_HEAD(&state[i].head);
+- spin_lock_init(&state[i].lock);
+- }
+-}
+-
+-static void wrandom_select_route(const struct flowi *flp,
+- struct rtable *first,
+- struct rtable **rp)
+-{
+- struct rtable *rt;
+- struct rtable *decision;
+- struct multipath_candidate *first_mpc = NULL;
+- struct multipath_candidate *mpc, *last_mpc = NULL;
+- int power = 0;
+- int last_power;
+- int selector;
+- const size_t size_mpc = sizeof(struct multipath_candidate);
+-
+- /* collect all candidates and identify their weights */
+- for (rt = rcu_dereference(first); rt;
+- rt = rcu_dereference(rt->u.dst.rt_next)) {
+- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+- multipath_comparekeys(&rt->fl, flp)) {
+- struct multipath_candidate* mpc =
+- (struct multipath_candidate*)
+- kmalloc(size_mpc, GFP_ATOMIC);
+-
+- if (!mpc)
+- return;
+-
+- power += __multipath_lookup_weight(flp, rt) * 10000;
+-
+- mpc->power = power;
+- mpc->rt = rt;
+- mpc->next = NULL;
+-
+- if (!first_mpc)
+- first_mpc = mpc;
+- else
+- last_mpc->next = mpc;
+-
+- last_mpc = mpc;
+- }
+- }
+-
+- /* choose a weighted random candidate */
+- decision = first;
+- selector = random32() % power;
+- last_power = 0;
+-
+- /* select candidate, adjust GC data and cleanup local state */
+- decision = first;
+- last_mpc = NULL;
+- for (mpc = first_mpc; mpc; mpc = mpc->next) {
+- mpc->rt->u.dst.lastuse = jiffies;
+- if (last_power <= selector && selector < mpc->power)
+- decision = mpc->rt;
+-
+- last_power = mpc->power;
+- kfree(last_mpc);
+- last_mpc = mpc;
+- }
+-
+- /* concurrent __multipath_flush may lead to !last_mpc */
+- kfree(last_mpc);
+-
+- decision->u.dst.__use++;
+- *rp = decision;
+-}
+-
+-static void wrandom_set_nhinfo(__be32 network,
+- __be32 netmask,
+- unsigned char prefixlen,
+- const struct fib_nh *nh)
+-{
+- const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
+- struct multipath_route *r, *target_route = NULL;
+- struct multipath_dest *d, *target_dest = NULL;
+-
+- /* store the weight information for a certain route */
+- spin_lock_bh(&state[state_idx].lock);
+-
+- /* find state entry for gateway or add one if necessary */
+- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+- if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
+- target_route = r;
+- break;
+- }
+- }
+-
+- if (!target_route) {
+- const size_t size_rt = sizeof(struct multipath_route);
+- target_route = (struct multipath_route *)
+- kmalloc(size_rt, GFP_ATOMIC);
+-
+- target_route->gw = nh->nh_gw;
+- target_route->oif = nh->nh_oif;
+- memset(&target_route->rcu, 0, sizeof(struct rcu_head));
+- INIT_LIST_HEAD(&target_route->dests);
+-
+- list_add_rcu(&target_route->list, &state[state_idx].head);
+- }
+-
+- /* find state entry for destination or add one if necessary */
+- list_for_each_entry_rcu(d, &target_route->dests, list) {
+- if (d->nh_info == nh) {
+- target_dest = d;
+- break;
+- }
+- }
+-
+- if (!target_dest) {
+- const size_t size_dst = sizeof(struct multipath_dest);
+- target_dest = (struct multipath_dest*)
+- kmalloc(size_dst, GFP_ATOMIC);
+-
+- target_dest->nh_info = nh;
+- target_dest->network = network;
+- target_dest->netmask = netmask;
+- target_dest->prefixlen = prefixlen;
+- memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
+-
+- list_add_rcu(&target_dest->list, &target_route->dests);
+- }
+- /* else: we already stored this info for another destination =>
+- * we are finished
+- */
+-
+- spin_unlock_bh(&state[state_idx].lock);
+-}
+-
+-static void __multipath_free(struct rcu_head *head)
+-{
+- struct multipath_route *rt = container_of(head, struct multipath_route,
+- rcu);
+- kfree(rt);
+-}
+-
+-static void __multipath_free_dst(struct rcu_head *head)
+-{
+- struct multipath_dest *dst = container_of(head,
+- struct multipath_dest,
+- rcu);
+- kfree(dst);
+-}
+-
+-static void wrandom_flush(void)
+-{
+- int i;
+-
+- /* defere delete to all entries */
+- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+- struct multipath_route *r;
+-
+- spin_lock_bh(&state[i].lock);
+- list_for_each_entry_rcu(r, &state[i].head, list) {
+- struct multipath_dest *d;
+- list_for_each_entry_rcu(d, &r->dests, list) {
+- list_del_rcu(&d->list);
+- call_rcu(&d->rcu,
+- __multipath_free_dst);
+- }
+- list_del_rcu(&r->list);
+- call_rcu(&r->rcu,
+- __multipath_free);
+- }
+-
+- spin_unlock_bh(&state[i].lock);
+- }
+-}
+-
+-static struct ip_mp_alg_ops wrandom_ops = {
+- .mp_alg_select_route = wrandom_select_route,
+- .mp_alg_flush = wrandom_flush,
+- .mp_alg_set_nhinfo = wrandom_set_nhinfo,
+-};
+-
+-static int __init wrandom_init(void)
+-{
+- wrandom_init_state();
+-
+- return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
+-}
+-
+-static void __exit wrandom_exit(void)
+-{
+- multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
+-}
+-
+-module_init(wrandom_init);
+-module_exit(wrandom_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c
+--- linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <net/sock.h>
+
+ #include <asm/uaccess.h>
+ #include <linux/mutex.h>
+@@ -773,7 +774,7 @@
+ int ret;
+ struct arpt_table *t;
+
+- t = xt_find_table_lock(NF_ARP, entries->name);
++ t = xt_find_table_lock(&init_net, NF_ARP, entries->name);
+ if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n",
+@@ -843,7 +844,7 @@
+
+ duprintf("arp_tables: Translated table\n");
+
+- t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
++ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name),
+ "arptable_%s", tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -936,7 +937,7 @@
+ goto free;
+ }
+
+- t = xt_find_table_lock(NF_ARP, tmp.name);
++ t = xt_find_table_lock(&init_net, NF_ARP, tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+@@ -971,6 +972,9 @@
+ {
+ int ret;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -995,6 +999,9 @@
+ {
+ int ret;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -1016,7 +1023,7 @@
+ }
+ name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+
+- t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
++ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name),
+ "arptable_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct arpt_getinfo info;
+@@ -1116,7 +1123,7 @@
+ return ret;
+ }
+
+- ret = xt_register_table(table, &bootstrap, newinfo);
++ ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c
+--- linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -61,6 +61,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return arpt_do_table(pskb, hook, in, out, &packet_filter);
+ }
+
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <linux/mutex.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+
+ #define IPQ_QMAX_DEFAULT 1024
+ #define IPQ_PROC_FS_NAME "ip_queue"
+@@ -556,6 +557,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+@@ -575,7 +579,7 @@
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_FIREWALL && n->pid) {
+ write_lock_bh(&queue_lock);
+- if (n->pid == peer_pid)
++ if ((n->net == &init_net) && (n->pid == peer_pid))
+ __ipq_reset();
+ write_unlock_bh(&queue_lock);
+ }
+@@ -667,14 +671,14 @@
+ struct proc_dir_entry *proc;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+- ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
+- NULL, THIS_MODULE);
++ ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
++ ipq_rcv_sk, NULL, THIS_MODULE);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
++ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ if (proc)
+ proc->owner = THIS_MODULE;
+ else {
+@@ -695,8 +699,7 @@
+ cleanup_sysctl:
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+- proc_net_remove(IPQ_PROC_FS_NAME);
+-
++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+ cleanup_ipqnl:
+ sock_release(ipqnl->sk_socket);
+ mutex_lock(&ipqnl_mutex);
+@@ -715,7 +718,7 @@
+
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+- proc_net_remove(IPQ_PROC_FS_NAME);
++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+ sock_release(ipqnl->sk_socket);
+ mutex_lock(&ipqnl_mutex);
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -1039,7 +1039,7 @@
+ }
+ #endif
+
+-static int get_info(void __user *user, int *len, int compat)
++static int get_info(struct net *net, void __user *user, int *len, int compat)
+ {
+ char name[IPT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+@@ -1059,7 +1059,7 @@
+ if (compat)
+ xt_compat_lock(AF_INET);
+ #endif
+- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct ipt_getinfo info;
+@@ -1099,7 +1099,7 @@
+ }
+
+ static int
+-get_entries(struct ipt_get_entries __user *uptr, int *len)
++get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
+ {
+ int ret;
+ struct ipt_get_entries get;
+@@ -1119,7 +1119,7 @@
+ return -EINVAL;
+ }
+
+- t = xt_find_table_lock(AF_INET, get.name);
++ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n",
+@@ -1142,7 +1142,7 @@
+ }
+
+ static int
+-__do_replace(const char *name, unsigned int valid_hooks,
++__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_table_info *newinfo, unsigned int num_counters,
+ void __user *counters_ptr)
+ {
+@@ -1159,7 +1159,7 @@
+ goto out;
+ }
+
+- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -1211,7 +1211,7 @@
+ }
+
+ static int
+-do_replace(void __user *user, unsigned int len)
++do_replace(struct net *net, void __user *user, unsigned int len)
+ {
+ int ret;
+ struct ipt_replace tmp;
+@@ -1252,7 +1252,7 @@
+
+ duprintf("ip_tables: Translated table\n");
+
+- ret = __do_replace(tmp.name, tmp.valid_hooks,
++ ret = __do_replace(net, tmp.name, tmp.valid_hooks,
+ newinfo, tmp.num_counters,
+ tmp.counters);
+ if (ret)
+@@ -1289,7 +1289,7 @@
+ }
+
+ static int
+-do_add_counters(void __user *user, unsigned int len, int compat)
++do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
+ {
+ unsigned int i;
+ struct xt_counters_info tmp;
+@@ -1341,7 +1341,7 @@
+ goto free;
+ }
+
+- t = xt_find_table_lock(AF_INET, name);
++ t = xt_find_table_lock(net, AF_INET, name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+@@ -1745,7 +1745,7 @@
+ }
+
+ static int
+-compat_do_replace(void __user *user, unsigned int len)
++compat_do_replace(struct net *net, void __user *user, unsigned int len)
+ {
+ int ret;
+ struct compat_ipt_replace tmp;
+@@ -1786,7 +1786,7 @@
+
+ duprintf("compat_do_replace: Translated table\n");
+
+- ret = __do_replace(tmp.name, tmp.valid_hooks,
++ ret = __do_replace(net, tmp.name, tmp.valid_hooks,
+ newinfo, tmp.num_counters,
+ compat_ptr(tmp.counters));
+ if (ret)
+@@ -1811,11 +1811,11 @@
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+- ret = compat_do_replace(user, len);
++ ret = compat_do_replace(sk->sk_net, user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+- ret = do_add_counters(user, len, 1);
++ ret = do_add_counters(sk->sk_net, user, len, 1);
+ break;
+
+ default:
+@@ -1904,7 +1904,7 @@
+ }
+
+ static int
+-compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
++compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len)
+ {
+ int ret;
+ struct compat_ipt_get_entries get;
+@@ -1928,7 +1928,7 @@
+ }
+
+ xt_compat_lock(AF_INET);
+- t = xt_find_table_lock(AF_INET, get.name);
++ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+@@ -1966,10 +1966,10 @@
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+- ret = get_info(user, len, 1);
++ ret = get_info(sk->sk_net, user, len, 1);
+ break;
+ case IPT_SO_GET_ENTRIES:
+- ret = compat_get_entries(user, len);
++ ret = compat_get_entries(sk->sk_net, user, len);
+ break;
+ default:
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
+@@ -1988,11 +1988,11 @@
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+- ret = do_replace(user, len);
++ ret = do_replace(sk->sk_net, user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+- ret = do_add_counters(user, len, 0);
++ ret = do_add_counters(sk->sk_net, user, len, 0);
+ break;
+
+ default:
+@@ -2013,11 +2013,11 @@
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+- ret = get_info(user, len, 0);
++ ret = get_info(sk->sk_net, user, len, 0);
+ break;
+
+ case IPT_SO_GET_ENTRIES:
+- ret = get_entries(user, len);
++ ret = get_entries(sk->sk_net, user, len);
+ break;
+
+ case IPT_SO_GET_REVISION_MATCH:
+@@ -2054,7 +2054,7 @@
+ return ret;
+ }
+
+-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
++int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl)
+ {
+ int ret;
+ struct xt_table_info *newinfo;
+@@ -2082,7 +2082,7 @@
+ return ret;
+ }
+
+- ret = xt_register_table(table, &bootstrap, newinfo);
++ ret = xt_register_table(net, table, &bootstrap, newinfo);
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-12-21 15:36:15.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+
+ #define CLUSTERIP_VERSION "0.8"
+
+@@ -427,7 +428,7 @@
+ return 0;
+ }
+
+- dev = dev_get_by_name(e->ip.iniface);
++ dev = dev_get_by_name(&init_net, e->ip.iniface);
+ if (!dev) {
+ printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+ return 0;
+@@ -523,6 +524,10 @@
+ struct arp_payload *payload;
+ struct clusterip_config *c;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* we don't care about non-ethernet and non-ipv4 ARP */
+ if (arp->ar_hrd != htons(ARPHRD_ETHER)
+ || arp->ar_pro != htons(ETH_P_IP)
+@@ -735,7 +740,7 @@
+ goto cleanup_target;
+
+ #ifdef CONFIG_PROC_FS
+- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
++ clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
+ if (!clusterip_procdir) {
+ printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+ ret = -ENOMEM;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-12-21 15:36:15.000000000 -0500
+@@ -131,6 +131,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ conntracks which were associated with that device,
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c 2007-12-21 15:36:15.000000000 -0500
+@@ -137,7 +137,7 @@
+ )
+ addr_type = RTN_LOCAL;
+
+- if (ip_route_me_harder(&nskb, addr_type))
++ if (ip_route_me_harder(&init_net, &nskb, addr_type))
+ goto free_nskb;
+
+ nskb->ip_summed = CHECKSUM_NONE;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c 2007-12-21 15:36:15.000000000 -0500
+@@ -419,7 +419,8 @@
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+ setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+
+- nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
++ nflognl = netlink_kernel_create(&init_net,
++ NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
+ NULL, THIS_MODULE);
+ if (!nflognl)
+ return -ENOMEM;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c 2007-12-21 15:36:15.000000000 -0500
+@@ -24,7 +24,7 @@
+
+ static inline int match_type(__be32 addr, u_int16_t mask)
+ {
+- return !!(mask & (1 << inet_addr_type(addr)));
++ return !!(mask & (1 << inet_addr_type(&init_net, addr)));
+ }
+
+ static int match(const struct sk_buff *skb,
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c
+--- linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c 2007-12-21 15:36:15.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/bitops.h>
+ #include <linux/skbuff.h>
+ #include <linux/inet.h>
++#include <net/net_namespace.h>
+
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ipt_recent.h>
+@@ -485,7 +486,7 @@
+ #ifdef CONFIG_PROC_FS
+ if (err)
+ return err;
+- proc_dir = proc_mkdir("ipt_recent", proc_net);
++ proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+ if (proc_dir == NULL) {
+ xt_unregister_match(&recent_match);
+ err = -ENOMEM;
+@@ -499,7 +500,7 @@
+ BUG_ON(!list_empty(&tables));
+ xt_unregister_match(&recent_match);
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("ipt_recent", proc_net);
++ remove_proc_entry("ipt_recent", init_net.proc_net);
+ #endif
+ }
+
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -26,7 +26,7 @@
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+-} initial_table __initdata = {
++} initial_table = {
+ .repl = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+@@ -51,7 +51,7 @@
+ .term = IPT_ERROR_INIT, /* ERROR */
+ };
+
+-static struct xt_table packet_filter = {
++static struct xt_table ip_packet_filter_dflt = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+@@ -67,7 +67,9 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
+- return ipt_do_table(pskb, hook, in, out, &packet_filter);
++ struct net *net = (in?in:out)->nd_net;
++
++ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
+ }
+
+ static unsigned int
+@@ -77,6 +79,8 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ struct net *net = (in?in:out)->nd_net;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -86,7 +90,7 @@
+ return NF_ACCEPT;
+ }
+
+- return ipt_do_table(pskb, hook, in, out, &packet_filter);
++ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
+ }
+
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -117,6 +121,30 @@
+ static int forward = NF_ACCEPT;
+ module_param(forward, bool, 0000);
+
++static int iptable_filter_net_init(struct net *net)
++{
++ /* Allocate the table */
++ net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt,
++ sizeof(*net->ip_packet_filter),
++ GFP_KERNEL);
++ if (!net->ip_packet_filter)
++ return -ENOMEM;
++
++ /* Register table */
++ return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl);
++}
++
++static void iptable_filter_net_exit(struct net *net)
++{
++ ipt_unregister_table(net->ip_packet_filter);
++ kfree(net->ip_packet_filter);
++}
++
++static struct pernet_operations iptable_filter_net_ops = {
++ .init = iptable_filter_net_init,
++ .exit = iptable_filter_net_exit,
++};
++
+ static int __init iptable_filter_init(void)
+ {
+ int ret;
+@@ -130,7 +158,7 @@
+ initial_table.entries[1].target.verdict = -forward - 1;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_filter, &initial_table.repl);
++ ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+@@ -142,14 +170,14 @@
+ return ret;
+
+ cleanup_table:
+- ipt_unregister_table(&packet_filter);
++ unregister_pernet_subsys(&iptable_filter_net_ops);
+ return ret;
+ }
+
+ static void __exit iptable_filter_fini(void)
+ {
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+- ipt_unregister_table(&packet_filter);
++ unregister_pernet_subsys(&iptable_filter_net_ops);
+ }
+
+ module_init(iptable_filter_init);
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c 2007-12-21 15:36:15.000000000 -0500
+@@ -80,6 +80,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ }
+
+@@ -96,6 +100,10 @@
+ __be32 saddr, daddr;
+ u_int32_t mark;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -121,7 +129,7 @@
+ iph->daddr != daddr ||
+ (*pskb)->mark != mark ||
+ iph->tos != tos)
+- if (ip_route_me_harder(pskb, RTN_UNSPEC))
++ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+
+@@ -171,7 +179,7 @@
+ int ret;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
++ ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl);
+ if (ret < 0)
+ return ret;
+
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c
+--- linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c 2007-12-21 15:36:15.000000000 -0500
+@@ -52,6 +52,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ }
+
+@@ -96,7 +100,7 @@
+ int ret;
+
+ /* Register table */
+- ret = ipt_register_table(&packet_raw, &initial_table.repl);
++ ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl);
+ if (ret < 0)
+ return ret;
+
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -120,6 +120,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(pskb);
+ }
+@@ -135,6 +139,10 @@
+ struct nf_conn_help *help;
+ struct nf_conntrack_helper *helper;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+@@ -157,6 +165,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if ((*pskb)->nfct)
+@@ -180,6 +192,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return nf_conntrack_in(PF_INET, hooknum, pskb);
+ }
+
+@@ -189,6 +205,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -325,6 +345,9 @@
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ NF_CT_TUPLE_U_BLANK(&tuple);
+ tuple.src.u3.ip = inet->rcv_saddr;
+ tuple.src.u.tcp.port = inet->sport;
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-12-21 15:36:15.000000000 -0500
+@@ -11,6 +11,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/percpu.h>
++#include <net/net_namespace.h>
+
+ #include <linux/netfilter.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+@@ -378,16 +379,16 @@
+ {
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+
+- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
++ proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
+ if (!proc)
+ goto err1;
+
+- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
++ proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
+ &ip_exp_file_ops);
+ if (!proc_exp)
+ goto err2;
+
+- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
++ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat);
+ if (!proc_stat)
+ goto err3;
+
+@@ -397,16 +398,16 @@
+ return 0;
+
+ err3:
+- proc_net_remove("ip_conntrack_expect");
++ proc_net_remove(&init_net, "ip_conntrack_expect");
+ err2:
+- proc_net_remove("ip_conntrack");
++ proc_net_remove(&init_net, "ip_conntrack");
+ err1:
+ return -ENOMEM;
+ }
+
+ void __exit nf_conntrack_ipv4_compat_fini(void)
+ {
+- remove_proc_entry("ip_conntrack", proc_net_stat);
+- proc_net_remove("ip_conntrack_expect");
+- proc_net_remove("ip_conntrack");
++ remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
++ proc_net_remove(&init_net, "ip_conntrack_expect");
++ proc_net_remove(&init_net, "ip_conntrack");
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c 2007-12-21 15:36:12.000000000 -0500
+@@ -178,7 +178,7 @@
+ datalen = (*pskb)->len - iph->ihl*4;
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+- (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
++ (*pskb)->dev->features & NETIF_F_V4_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+@@ -265,7 +265,7 @@
+
+ if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+- (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
++ (*pskb)->dev->features & NETIF_F_V4_CSUM) {
+ (*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ (*pskb)->csum_start = skb_headroom(*pskb) +
+ skb_network_offset(*pskb) +
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c 2007-12-21 15:36:15.000000000 -0500
+@@ -98,7 +98,10 @@
+ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+ {
+ static int warned = 0;
+- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
++ struct flowi fl = {
++ .fl_net = &init_net,
++ .nl_u = { .ip4_u = { .daddr = dstip } }
++ };
+ struct rtable *rt;
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+@@ -252,7 +255,7 @@
+ {
+ int ret;
+
+- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
++ ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl);
+ if (ret != 0)
+ return ret;
+ ret = xt_register_target(&ipt_snat_reg);
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c 2007-12-21 15:36:15.000000000 -0500
+@@ -83,6 +83,10 @@
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* We never see fragments: conntrack defrags on pre-routing
+ and local-out, and nf_nat_out protects post-routing. */
+ NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+@@ -172,6 +176,10 @@
+ unsigned int ret;
+ __be32 daddr = ip_hdr(*pskb)->daddr;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(*pskb)->daddr) {
+@@ -194,6 +202,10 @@
+ #endif
+ unsigned int ret;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr) ||
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
+@@ -227,6 +239,10 @@
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr) ||
+ ip_hdrlen(*pskb) < sizeof(struct iphdr))
+@@ -239,7 +255,7 @@
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+- if (ip_route_me_harder(pskb, RTN_UNSPEC))
++ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ #ifdef CONFIG_XFRM
+@@ -262,6 +278,10 @@
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ DEBUGP("nf_nat_standalone: adjusting sequence number\n");
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter.c linux-2.6.22-591/net/ipv4/netfilter.c
+--- linux-2.6.22-570/net/ipv4/netfilter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/netfilter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -8,7 +8,7 @@
+ #include <net/ip.h>
+
+ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
++int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type)
+ {
+ const struct iphdr *iph = ip_hdr(*pskb);
+ struct rtable *rt;
+@@ -17,7 +17,8 @@
+ unsigned int hh_len;
+ unsigned int type;
+
+- type = inet_addr_type(iph->saddr);
++ fl.fl_net = net;
++ type = inet_addr_type(net, iph->saddr);
+ if (addr_type == RTN_UNSPEC)
+ addr_type = type;
+
+@@ -155,12 +156,13 @@
+ const struct ip_rt_info *rt_info = nf_info_reroute(info);
+
+ if (info->hook == NF_IP_LOCAL_OUT) {
++ struct net *net = (info->indev?info->indev:info->outdev)->nd_net;
+ const struct iphdr *iph = ip_hdr(*pskb);
+
+ if (!(iph->tos == rt_info->tos
+ && iph->daddr == rt_info->daddr
+ && iph->saddr == rt_info->saddr))
+- return ip_route_me_harder(pskb, RTN_UNSPEC);
++ return ip_route_me_harder(net, pskb, RTN_UNSPEC);
+ }
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/proc.c linux-2.6.22-591/net/ipv4/proc.c
+--- linux-2.6.22-570/net/ipv4/proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -44,6 +44,7 @@
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
+ #include <net/raw.h>
++#include <net/net_namespace.h>
+
+ static int fold_prot_inuse(struct proto *proto)
+ {
+@@ -69,8 +70,9 @@
+ seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
+ seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
+ seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+- seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
+- atomic_read(&ip_frag_mem));
++ seq_printf(seq, "FRAG: inuse %d memory %d\n",
++ init_net.ip_frag_nqueues,
++ atomic_read(&init_net.ip_frag_mem));
+ return 0;
+ }
+
+@@ -260,7 +262,8 @@
+ seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+
+ seq_printf(seq, "\nIp: %d %d",
+- IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
++ IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
++ init_net.sysctl_ip_default_ttl);
+
+ for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ seq_printf(seq, " %lu",
+@@ -380,20 +383,20 @@
+ {
+ int rc = 0;
+
+- if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
++ if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
+ goto out_netstat;
+
+- if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
++ if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
+ goto out_snmp;
+
+- if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
++ if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
+ goto out_sockstat;
+ out:
+ return rc;
+ out_sockstat:
+- proc_net_remove("snmp");
++ proc_net_remove(&init_net, "snmp");
+ out_snmp:
+- proc_net_remove("netstat");
++ proc_net_remove(&init_net, "netstat");
+ out_netstat:
+ rc = -ENOMEM;
+ goto out;
+diff -Nurb linux-2.6.22-570/net/ipv4/raw.c linux-2.6.22-591/net/ipv4/raw.c
+--- linux-2.6.22-570/net/ipv4/raw.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/raw.c 2007-12-21 15:36:15.000000000 -0500
+@@ -73,6 +73,7 @@
+ #include <net/inet_common.h>
+ #include <net/checksum.h>
+ #include <net/xfrm.h>
++#include <net/net_namespace.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+@@ -101,7 +102,7 @@
+ write_unlock_bh(&raw_v4_lock);
+ }
+
+-struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
++struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num,
+ __be32 raddr, __be32 laddr,
+ int dif, int tag)
+ {
+@@ -110,6 +111,9 @@
+ sk_for_each_from(sk, node) {
+ struct inet_sock *inet = inet_sk(sk);
+
++ if (sk->sk_net != net)
++ continue;
++
+ if (inet->num == num &&
+ !(inet->daddr && inet->daddr != raddr) &&
+ (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) &&
+@@ -152,6 +156,7 @@
+ */
+ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct sock *sk;
+ struct hlist_head *head;
+ int delivered = 0;
+@@ -160,7 +165,7 @@
+ head = &raw_v4_htable[hash];
+ if (hlist_empty(head))
+ goto out;
+- sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
++ sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
+ iph->saddr, iph->daddr,
+ skb->dev->ifindex, skb->skb_tag);
+
+@@ -173,7 +178,7 @@
+ if (clone)
+ raw_rcv(sk, clone);
+ }
+- sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
++ sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
+ iph->saddr, iph->daddr,
+ skb->dev->ifindex, skb->skb_tag);
+ }
+@@ -484,7 +489,8 @@
+ }
+
+ {
+- struct flowi fl = { .oif = ipc.oif,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = ipc.oif,
+ .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = saddr,
+@@ -574,7 +580,7 @@
+ if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+ goto out;
+ v4_map_sock_addr(inet, addr, &nsa);
+- chk_addr_ret = inet_addr_type(nsa.saddr);
++ chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr);
+ ret = -EADDRNOTAVAIL;
+ if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
+ chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+@@ -798,6 +804,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ struct raw_iter_state {
++ struct net *net;
+ int bucket;
+ };
+
+@@ -811,11 +818,14 @@
+ for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) {
+ struct hlist_node *node;
+
+- sk_for_each(sk, node, &raw_v4_htable[state->bucket])
++ sk_for_each(sk, node, &raw_v4_htable[state->bucket]) {
++ if (sk->sk_net != state->net)
++ continue;
+ if (sk->sk_family == PF_INET &&
+ nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ goto found;
+ }
++ }
+ sk = NULL;
+ found:
+ return sk;
+@@ -830,7 +840,7 @@
+ try_again:
+ ;
+ } while (sk && (sk->sk_family != PF_INET ||
+- !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
++ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net)));
+
+ if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ sk = sk_head(&raw_v4_htable[state->bucket]);
+@@ -933,6 +943,7 @@
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -940,23 +951,46 @@
+ goto out;
+ }
+
++static int raw_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct raw_iter_state *state = seq->private;
++ put_net(state->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations raw_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = raw_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = raw_seq_release,
+ };
+
+-int __init raw_proc_init(void)
++static int raw_proc_net_init(struct net *net)
+ {
+- if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
++ if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
++static void raw_proc_net_exit(struct net *net)
++{
++ proc_net_remove(net, "raw");
++}
++
++static struct pernet_operations raw_proc_net_ops = {
++ .init = raw_proc_net_init,
++ .exit = raw_proc_net_exit,
++};
++
++int __init raw_proc_init(void)
++{
++ return register_pernet_subsys(&raw_proc_net_ops);
++}
++
+ void __init raw_proc_exit(void)
+ {
+- proc_net_remove("raw");
++ unregister_pernet_subsys(&raw_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/ipv4/route.c linux-2.6.22-591/net/ipv4/route.c
+--- linux-2.6.22-570/net/ipv4/route.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -101,8 +101,8 @@
+ #include <net/tcp.h>
+ #include <net/icmp.h>
+ #include <net/xfrm.h>
+-#include <net/ip_mp_alg.h>
+ #include <net/netevent.h>
++#include <net/net_namespace.h>
+ #include <net/rtnetlink.h>
+ #ifdef CONFIG_SYSCTL
+ #include <linux/sysctl.h>
+@@ -266,6 +266,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
++ struct net *net;
+ int bucket;
+ };
+
+@@ -334,6 +335,7 @@
+
+ static int rt_cache_seq_show(struct seq_file *seq, void *v)
+ {
++ struct rt_cache_iter_state *st = seq->private;
+ if (v == SEQ_START_TOKEN)
+ seq_printf(seq, "%-127s\n",
+ "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
+@@ -343,6 +345,9 @@
+ struct rtable *r = v;
+ char temp[256];
+
++ if (r->fl.fl_net != st->net)
++ return 0;
++
+ sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
+ "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
+ r->u.dst.dev ? r->u.dst.dev->name : "*",
+@@ -385,6 +390,7 @@
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -392,12 +398,20 @@
+ goto out;
+ }
+
++static int rt_cache_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct rt_cache_iter_state *st = seq->private;
++ put_net(st->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations rt_cache_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = rt_cache_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = rt_cache_seq_release,
+ };
+
+
+@@ -495,13 +509,11 @@
+
+ static __inline__ void rt_free(struct rtable *rt)
+ {
+- multipath_remove(rt);
+ call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+ }
+
+ static __inline__ void rt_drop(struct rtable *rt)
+ {
+- multipath_remove(rt);
+ ip_rt_put(rt);
+ call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+ }
+@@ -565,61 +577,16 @@
+
+ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+ {
+- return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
++ return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
+ (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
+ (fl1->mark ^ fl2->mark) |
+ (*(u16 *)&fl1->nl_u.ip4_u.tos ^
+ *(u16 *)&fl2->nl_u.ip4_u.tos) |
+ (fl1->oif ^ fl2->oif) |
+- (fl1->iif ^ fl2->iif)) == 0;
++ (fl1->iif ^ fl2->iif)) == 0) &&
++ fl1->fl_net == fl2->fl_net;
+ }
+
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
+- struct rtable *expentry,
+- int *removed_count)
+-{
+- int passedexpired = 0;
+- struct rtable **nextstep = NULL;
+- struct rtable **rthp = chain_head;
+- struct rtable *rth;
+-
+- if (removed_count)
+- *removed_count = 0;
+-
+- while ((rth = *rthp) != NULL) {
+- if (rth == expentry)
+- passedexpired = 1;
+-
+- if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
+- compare_keys(&(*rthp)->fl, &expentry->fl)) {
+- if (*rthp == expentry) {
+- *rthp = rth->u.dst.rt_next;
+- continue;
+- } else {
+- *rthp = rth->u.dst.rt_next;
+- rt_free(rth);
+- if (removed_count)
+- ++(*removed_count);
+- }
+- } else {
+- if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
+- passedexpired && !nextstep)
+- nextstep = &rth->u.dst.rt_next;
+-
+- rthp = &rth->u.dst.rt_next;
+- }
+- }
+-
+- rt_free(expentry);
+- if (removed_count)
+- ++(*removed_count);
+-
+- return nextstep;
+-}
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-
+-
+ /* This runs via a timer and thus is always in BH context. */
+ static void rt_check_expire(unsigned long dummy)
+ {
+@@ -658,23 +625,9 @@
+ }
+
+ /* Cleanup aged off entries. */
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- /* remove all related balanced entries if necessary */
+- if (rth->u.dst.flags & DST_BALANCED) {
+- rthp = rt_remove_balanced_route(
+- &rt_hash_table[i].chain,
+- rth, NULL);
+- if (!rthp)
+- break;
+- } else {
+ *rthp = rth->u.dst.rt_next;
+ rt_free(rth);
+ }
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- *rthp = rth->u.dst.rt_next;
+- rt_free(rth);
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- }
+ spin_unlock(rt_hash_lock_addr(i));
+
+ /* Fallback loop breaker. */
+@@ -721,9 +674,6 @@
+ if (delay < 0)
+ delay = ip_rt_min_delay;
+
+- /* flush existing multipath state*/
+- multipath_flush();
+-
+ spin_lock_bh(&rt_flush_lock);
+
+ if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
+@@ -842,31 +792,10 @@
+ rthp = &rth->u.dst.rt_next;
+ continue;
+ }
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- /* remove all related balanced entries
+- * if necessary
+- */
+- if (rth->u.dst.flags & DST_BALANCED) {
+- int r;
+-
+- rthp = rt_remove_balanced_route(
+- &rt_hash_table[k].chain,
+- rth,
+- &r);
+- goal -= r;
+- if (!rthp)
+- break;
+- } else {
+ *rthp = rth->u.dst.rt_next;
+ rt_free(rth);
+ goal--;
+ }
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- *rthp = rth->u.dst.rt_next;
+- rt_free(rth);
+- goal--;
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- }
+ spin_unlock_bh(rt_hash_lock_addr(k));
+ if (goal <= 0)
+ break;
+@@ -939,12 +868,7 @@
+
+ spin_lock_bh(rt_hash_lock_addr(hash));
+ while ((rth = *rthp) != NULL) {
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- if (!(rth->u.dst.flags & DST_BALANCED) &&
+- compare_keys(&rth->fl, &rt->fl)) {
+-#else
+ if (compare_keys(&rth->fl, &rt->fl)) {
+-#endif
+ /* Put it first */
+ *rthp = rth->u.dst.rt_next;
+ /*
+@@ -1055,7 +979,7 @@
+ static DEFINE_SPINLOCK(rt_peer_lock);
+ struct inet_peer *peer;
+
+- peer = inet_getpeer(rt->rt_dst, create);
++ peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create);
+
+ spin_lock_bh(&rt_peer_lock);
+ if (rt->peer == NULL) {
+@@ -1148,7 +1072,7 @@
+ if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
+ goto reject_redirect;
+ } else {
+- if (inet_addr_type(new_gw) != RTN_UNICAST)
++ if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST)
+ goto reject_redirect;
+ }
+
+@@ -1189,6 +1113,7 @@
+
+ /* Copy all the information. */
+ *rt = *rth;
++ hold_net(rt->fl.fl_net);
+ INIT_RCU_HEAD(&rt->u.dst.rcu_head);
+ rt->u.dst.__use = 1;
+ atomic_set(&rt->u.dst.__refcnt, 1);
+@@ -1407,7 +1332,7 @@
+ __be32 daddr = iph->daddr;
+ unsigned short est_mtu = 0;
+
+- if (ipv4_config.no_pmtu_disc)
++ if (init_net.sysctl_ipv4_no_pmtu_disc)
+ return 0;
+
+ for (i = 0; i < 2; i++) {
+@@ -1489,6 +1414,7 @@
+ rt->idev = NULL;
+ in_dev_put(idev);
+ }
++ release_net(rt->fl.fl_net);
+ }
+
+ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+@@ -1496,8 +1422,9 @@
+ {
+ struct rtable *rt = (struct rtable *) dst;
+ struct in_device *idev = rt->idev;
+- if (dev != &loopback_dev && idev && idev->dev == dev) {
+- struct in_device *loopback_idev = in_dev_get(&loopback_dev);
++ struct net *net = dev->nd_net;
++ if (dev != &net->loopback_dev && idev && idev->dev == dev) {
++ struct in_device *loopback_idev = in_dev_get(&net->loopback_dev);
+ if (loopback_idev) {
+ rt->idev = loopback_idev;
+ in_dev_put(idev);
+@@ -1584,7 +1511,7 @@
+ rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
+
+ if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+- rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
++ rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl;
+ if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
+ rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+ if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
+@@ -1605,6 +1532,7 @@
+ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, int our)
+ {
++ struct net *net = dev->nd_net;
+ unsigned hash;
+ struct rtable *rth;
+ __be32 spec_dst;
+@@ -1638,6 +1566,7 @@
+ rth->u.dst.flags= DST_HOST;
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ rth->u.dst.flags |= DST_NOPOLICY;
++ rth->fl.fl_net = hold_net(net);
+ rth->fl.fl4_dst = daddr;
+ rth->rt_dst = daddr;
+ rth->fl.fl4_tos = tos;
+@@ -1649,7 +1578,7 @@
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &net->loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->fl.oif = 0;
+@@ -1774,14 +1703,11 @@
+
+ atomic_set(&rth->u.dst.__refcnt, 1);
+ rth->u.dst.flags= DST_HOST;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- if (res->fi->fib_nhs > 1)
+- rth->u.dst.flags |= DST_BALANCED;
+-#endif
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ rth->u.dst.flags |= DST_NOPOLICY;
+ if (IN_DEV_CONF_GET(out_dev, NOXFRM))
+ rth->u.dst.flags |= DST_NOXFRM;
++ rth->fl.fl_net = hold_net(in_dev->dev->nd_net);
+ rth->fl.fl4_dst = daddr;
+ rth->rt_dst = daddr;
+ rth->fl.fl4_tos = tos;
+@@ -1812,7 +1738,7 @@
+ return err;
+ }
+
+-static inline int ip_mkroute_input_def(struct sk_buff *skb,
++static inline int ip_mkroute_input(struct sk_buff *skb,
+ struct fib_result* res,
+ const struct flowi *fl,
+ struct in_device *in_dev,
+@@ -1837,63 +1763,6 @@
+ return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+ }
+
+-static inline int ip_mkroute_input(struct sk_buff *skb,
+- struct fib_result* res,
+- const struct flowi *fl,
+- struct in_device *in_dev,
+- __be32 daddr, __be32 saddr, u32 tos)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- struct rtable* rth = NULL, *rtres;
+- unsigned char hop, hopcount;
+- int err = -EINVAL;
+- unsigned int hash;
+-
+- if (res->fi)
+- hopcount = res->fi->fib_nhs;
+- else
+- hopcount = 1;
+-
+- /* distinguish between multipath and singlepath */
+- if (hopcount < 2)
+- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
+- saddr, tos);
+-
+- /* add all alternatives to the routing cache */
+- for (hop = 0; hop < hopcount; hop++) {
+- res->nh_sel = hop;
+-
+- /* put reference to previous result */
+- if (hop)
+- ip_rt_put(rtres);
+-
+- /* create a routing cache entry */
+- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
+- &rth);
+- if (err)
+- return err;
+-
+- /* put it into the cache */
+- hash = rt_hash(daddr, saddr, fl->iif);
+- err = rt_intern_hash(hash, rth, &rtres);
+- if (err)
+- return err;
+-
+- /* forward hop information to multipath impl. */
+- multipath_set_nhinfo(rth,
+- FIB_RES_NETWORK(*res),
+- FIB_RES_NETMASK(*res),
+- res->prefixlen,
+- &FIB_RES_NH(*res));
+- }
+- skb->dst = &rtres->u.dst;
+- return err;
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-}
+-
+-
+ /*
+ * NOTE. We drop all the packets that has local source
+ * addresses, because every properly looped back packet
+@@ -1907,9 +1776,11 @@
+ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev)
+ {
++ struct net *net = dev->nd_net;
+ struct fib_result res;
+ struct in_device *in_dev = in_dev_get(dev);
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = net,
++ .nl_u = { .ip4_u =
+ { .daddr = daddr,
+ .saddr = saddr,
+ .tos = tos,
+@@ -1967,7 +1838,7 @@
+ if (res.type == RTN_LOCAL) {
+ int result;
+ result = fib_validate_source(saddr, daddr, tos,
+- loopback_dev.ifindex,
++ net->loopback_dev.ifindex,
+ dev, &spec_dst, &itag);
+ if (result < 0)
+ goto martian_source;
+@@ -2023,6 +1894,7 @@
+ rth->u.dst.flags= DST_HOST;
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ rth->u.dst.flags |= DST_NOPOLICY;
++ rth->fl.fl_net = hold_net(net);
+ rth->fl.fl4_dst = daddr;
+ rth->rt_dst = daddr;
+ rth->fl.fl4_tos = tos;
+@@ -2034,7 +1906,7 @@
+ #endif
+ rth->rt_iif =
+ rth->fl.iif = dev->ifindex;
+- rth->u.dst.dev = &loopback_dev;
++ rth->u.dst.dev = &net->loopback_dev;
+ dev_hold(rth->u.dst.dev);
+ rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->rt_gateway = daddr;
+@@ -2092,6 +1964,7 @@
+ struct rtable * rth;
+ unsigned hash;
+ int iif = dev->ifindex;
++ struct net *net = dev->nd_net;
+
+ tos &= IPTOS_RT_MASK;
+ hash = rt_hash(daddr, saddr, iif);
+@@ -2104,7 +1977,8 @@
+ rth->fl.iif == iif &&
+ rth->fl.oif == 0 &&
+ rth->fl.mark == skb->mark &&
+- rth->fl.fl4_tos == tos) {
++ rth->fl.fl4_tos == tos &&
++ rth->fl.fl_net == net) {
+ rth->u.dst.lastuse = jiffies;
+ dst_hold(&rth->u.dst);
+ rth->u.dst.__use++;
+@@ -2211,18 +2085,12 @@
+
+ atomic_set(&rth->u.dst.__refcnt, 1);
+ rth->u.dst.flags= DST_HOST;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- if (res->fi) {
+- rth->rt_multipath_alg = res->fi->fib_mp_alg;
+- if (res->fi->fib_nhs > 1)
+- rth->u.dst.flags |= DST_BALANCED;
+- }
+-#endif
+ if (IN_DEV_CONF_GET(in_dev, NOXFRM))
+ rth->u.dst.flags |= DST_NOXFRM;
+ if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ rth->u.dst.flags |= DST_NOPOLICY;
+
++ rth->fl.fl_net = hold_net(oldflp->fl_net);
+ rth->fl.fl4_dst = oldflp->fl4_dst;
+ rth->fl.fl4_tos = tos;
+ rth->fl.fl4_src = oldflp->fl4_src;
+@@ -2277,7 +2145,7 @@
+ return err;
+ }
+
+-static inline int ip_mkroute_output_def(struct rtable **rp,
++static inline int ip_mkroute_output(struct rtable **rp,
+ struct fib_result* res,
+ const struct flowi *fl,
+ const struct flowi *oldflp,
+@@ -2295,68 +2163,6 @@
+ return err;
+ }
+
+-static inline int ip_mkroute_output(struct rtable** rp,
+- struct fib_result* res,
+- const struct flowi *fl,
+- const struct flowi *oldflp,
+- struct net_device *dev_out,
+- unsigned flags)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- unsigned char hop;
+- unsigned hash;
+- int err = -EINVAL;
+- struct rtable *rth = NULL;
+-
+- if (res->fi && res->fi->fib_nhs > 1) {
+- unsigned char hopcount = res->fi->fib_nhs;
+-
+- for (hop = 0; hop < hopcount; hop++) {
+- struct net_device *dev2nexthop;
+-
+- res->nh_sel = hop;
+-
+- /* hold a work reference to the output device */
+- dev2nexthop = FIB_RES_DEV(*res);
+- dev_hold(dev2nexthop);
+-
+- /* put reference to previous result */
+- if (hop)
+- ip_rt_put(*rp);
+-
+- err = __mkroute_output(&rth, res, fl, oldflp,
+- dev2nexthop, flags);
+-
+- if (err != 0)
+- goto cleanup;
+-
+- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
+- oldflp->oif);
+- err = rt_intern_hash(hash, rth, rp);
+-
+- /* forward hop information to multipath impl. */
+- multipath_set_nhinfo(rth,
+- FIB_RES_NETWORK(*res),
+- FIB_RES_NETMASK(*res),
+- res->prefixlen,
+- &FIB_RES_NH(*res));
+- cleanup:
+- /* release work reference to output device */
+- dev_put(dev2nexthop);
+-
+- if (err != 0)
+- return err;
+- }
+- return err;
+- } else {
+- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
+- flags);
+- }
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
+-#endif
+-}
+-
+ /*
+ * Major route resolver routine.
+ */
+@@ -2364,7 +2170,9 @@
+ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+ {
+ u32 tos = RT_FL_TOS(oldflp);
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct net *net = oldflp->fl_net;
++ struct flowi fl = { .fl_net = net,
++ .nl_u = { .ip4_u =
+ { .daddr = oldflp->fl4_dst,
+ .saddr = oldflp->fl4_src,
+ .tos = tos & IPTOS_RT_MASK,
+@@ -2373,7 +2181,7 @@
+ RT_SCOPE_UNIVERSE),
+ } },
+ .mark = oldflp->mark,
+- .iif = loopback_dev.ifindex,
++ .iif = net->loopback_dev.ifindex,
+ .oif = oldflp->oif };
+ struct fib_result res;
+ unsigned flags = 0;
+@@ -2395,7 +2203,7 @@
+ goto out;
+
+ /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+- dev_out = ip_dev_find(oldflp->fl4_src);
++ dev_out = ip_dev_find(net, oldflp->fl4_src);
+ if (dev_out == NULL)
+ goto out;
+
+@@ -2434,7 +2242,7 @@
+
+
+ if (oldflp->oif) {
+- dev_out = dev_get_by_index(oldflp->oif);
++ dev_out = dev_get_by_index(net, oldflp->oif);
+ err = -ENODEV;
+ if (dev_out == NULL)
+ goto out;
+@@ -2467,9 +2275,9 @@
+ fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &net->loopback_dev;
+ dev_hold(dev_out);
+- fl.oif = loopback_dev.ifindex;
++ fl.oif = net->loopback_dev.ifindex;
+ res.type = RTN_LOCAL;
+ flags |= RTCF_LOCAL;
+ goto make_route;
+@@ -2514,7 +2322,7 @@
+ fl.fl4_src = fl.fl4_dst;
+ if (dev_out)
+ dev_put(dev_out);
+- dev_out = &loopback_dev;
++ dev_out = &net->loopback_dev;
+ dev_hold(dev_out);
+ fl.oif = dev_out->ifindex;
+ if (res.fi)
+@@ -2568,19 +2376,9 @@
+ rth->fl.iif == 0 &&
+ rth->fl.oif == flp->oif &&
+ rth->fl.mark == flp->mark &&
++ rth->fl.fl_net == flp->fl_net &&
+ !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ (IPTOS_RT_MASK | RTO_ONLINK))) {
+-
+- /* check for multipath routes and choose one if
+- * necessary
+- */
+- if (multipath_select_route(flp, rth, rp)) {
+- dst_hold(&(*rp)->u.dst);
+- RT_CACHE_STAT_INC(out_hit);
+- rcu_read_unlock_bh();
+- return 0;
+- }
+-
+ rth->u.dst.lastuse = jiffies;
+ dst_hold(&rth->u.dst);
+ rth->u.dst.__use++;
+@@ -2729,10 +2527,6 @@
+ if (rt->u.dst.tclassid)
+ NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+ #endif
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+- if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+- NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
+-#endif
+ if (rt->fl.iif)
+ NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
+ else if (rt->rt_src != rt->fl.fl4_src)
+@@ -2759,7 +2553,7 @@
+ __be32 dst = rt->rt_dst;
+
+ if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+- IPV4_DEVCONF_ALL(MC_FORWARDING)) {
++ IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
+ int err = ipmr_get_route(skb, r, nowait);
+ if (err <= 0) {
+ if (!nowait) {
+@@ -2790,6 +2584,7 @@
+
+ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ struct rtmsg *rtm;
+ struct nlattr *tb[RTA_MAX+1];
+ struct rtable *rt = NULL;
+@@ -2828,7 +2623,7 @@
+ if (iif) {
+ struct net_device *dev;
+
+- dev = __dev_get_by_index(iif);
++ dev = __dev_get_by_index(net, iif);
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto errout_free;
+@@ -2845,6 +2640,7 @@
+ err = -rt->u.dst.error;
+ } else {
+ struct flowi fl = {
++ .fl_net = net,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dst,
+@@ -2869,7 +2665,7 @@
+ if (err <= 0)
+ goto errout_free;
+
+- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ errout:
+ return err;
+
+@@ -3182,6 +2978,48 @@
+ }
+ __setup("rhash_entries=", set_rhash_entries);
+
++
++static void ip_rt_net_exit(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++# ifdef CONFIG_NET_CLS_ROUTE
++ proc_net_remove(net, "rt_acct");
++# endif
++ remove_proc_entry("rt_cache", net->proc_net_stat);
++ proc_net_remove(net, "rt_cache");
++#endif
++ rt_run_flush(0);
++}
++
++static int ip_rt_net_init(struct net *net)
++{
++ int error = -ENOMEM;
++#ifdef CONFIG_PROC_FS
++ struct proc_dir_entry *rtstat_pde;
++ if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops))
++ goto out;
++ if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
++ net->proc_net_stat)))
++ goto out;
++ rtstat_pde->proc_fops = &rt_cpu_seq_fops;
++# ifdef CONFIG_NET_CLS_ROUTE
++ if (!create_proc_read_entry("rt_acct", 0, net->proc_net,
++ ip_rt_acct_read, NULL))
++ goto out;
++# endif
++#endif
++ error = 0;
++out:
++ if (error)
++ ip_rt_net_exit(net);
++ return error;
++}
++
++struct pernet_operations ip_rt_net_ops = {
++ .init = ip_rt_net_init,
++ .exit = ip_rt_net_exit,
++};
++
+ int __init ip_rt_init(void)
+ {
+ int rc = 0;
+@@ -3245,20 +3083,7 @@
+ ip_rt_secret_interval;
+ add_timer(&rt_secret_timer);
+
+-#ifdef CONFIG_PROC_FS
+- {
+- struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
+- if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+- !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
+- proc_net_stat))) {
+- return -ENOMEM;
+- }
+- rtstat_pde->proc_fops = &rt_cpu_seq_fops;
+- }
+-#ifdef CONFIG_NET_CLS_ROUTE
+- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+-#endif
+-#endif
++ register_pernet_subsys(&ip_rt_net_ops);
+ #ifdef CONFIG_XFRM
+ xfrm_init();
+ xfrm4_init();
+diff -Nurb linux-2.6.22-570/net/ipv4/syncookies.c linux-2.6.22-591/net/ipv4/syncookies.c
+--- linux-2.6.22-570/net/ipv4/syncookies.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/syncookies.c 2007-12-21 15:36:15.000000000 -0500
+@@ -253,7 +253,8 @@
+ * no easy way to do this.
+ */
+ {
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = &init_net,
++ .nl_u = { .ip4_u =
+ { .daddr = ((opt && opt->srr) ?
+ opt->faddr :
+ ireq->rmt_addr),
+diff -Nurb linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:02.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -29,21 +29,21 @@
+ static int ip_local_port_range_max[] = { 65535, 65535 };
+ #endif
+
+-struct ipv4_config ipv4_config;
+-
+ #ifdef CONFIG_SYSCTL
+
+ static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+- int val = IPV4_DEVCONF_ALL(FORWARDING);
++ struct net *net = ctl->extra2;
++ int *valp = ctl->data;
++ int old = *valp;
+ int ret;
+
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+- if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
+- inet_forward_change();
++ if (write && *valp != old)
++ inet_forward_change(net);
+
+ return ret;
+ }
+@@ -53,6 +53,7 @@
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+ {
++ struct net *net = table->extra2;
+ int *valp = table->data;
+ int new;
+
+@@ -85,7 +86,7 @@
+ }
+
+ *valp = new;
+- inet_forward_change();
++ inet_forward_change(net);
+ return 1;
+ }
+
+@@ -188,22 +189,6 @@
+
+ ctl_table ipv4_table[] = {
+ {
+- .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
+- .procname = "tcp_timestamps",
+- .data = &sysctl_tcp_timestamps,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_TCP_WINDOW_SCALING,
+- .procname = "tcp_window_scaling",
+- .data = &sysctl_tcp_window_scaling,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+ .ctl_name = NET_IPV4_TCP_SACK,
+ .procname = "tcp_sack",
+ .data = &sysctl_tcp_sack,
+@@ -220,40 +205,6 @@
+ .proc_handler = &proc_dointvec
+ },
+ {
+- .ctl_name = NET_IPV4_FORWARD,
+- .procname = "ip_forward",
+- .data = &IPV4_DEVCONF_ALL(FORWARDING),
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &ipv4_sysctl_forward,
+- .strategy = &ipv4_sysctl_forward_strategy
+- },
+- {
+- .ctl_name = NET_IPV4_DEFAULT_TTL,
+- .procname = "ip_default_ttl",
+- .data = &sysctl_ip_default_ttl,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &ipv4_doint_and_flush,
+- .strategy = &ipv4_doint_and_flush_strategy,
+- },
+- {
+- .ctl_name = NET_IPV4_NO_PMTU_DISC,
+- .procname = "ip_no_pmtu_disc",
+- .data = &ipv4_config.no_pmtu_disc,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_NONLOCAL_BIND,
+- .procname = "ip_nonlocal_bind",
+- .data = &sysctl_ip_nonlocal_bind,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+ .ctl_name = NET_IPV4_TCP_SYN_RETRIES,
+ .procname = "tcp_syn_retries",
+ .data = &sysctl_tcp_syn_retries,
+@@ -286,39 +237,6 @@
+ .proc_handler = &proc_dointvec
+ },
+ {
+- .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
+- .procname = "ipfrag_high_thresh",
+- .data = &sysctl_ipfrag_high_thresh,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
+- .procname = "ipfrag_low_thresh",
+- .data = &sysctl_ipfrag_low_thresh,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_DYNADDR,
+- .procname = "ip_dynaddr",
+- .data = &sysctl_ip_dynaddr,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_IPFRAG_TIME,
+- .procname = "ipfrag_time",
+- .data = &sysctl_ipfrag_time,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+ .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME,
+ .procname = "tcp_keepalive_time",
+ .data = &sysctl_tcp_keepalive_time,
+@@ -422,17 +340,6 @@
+ .proc_handler = &proc_dointvec
+ },
+ {
+- .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
+- .procname = "ip_local_port_range",
+- .data = &sysctl_local_port_range,
+- .maxlen = sizeof(sysctl_local_port_range),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_minmax,
+- .strategy = &sysctl_intvec,
+- .extra1 = ip_local_port_range_min,
+- .extra2 = ip_local_port_range_max
+- },
+- {
+ .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
+ .procname = "icmp_echo_ignore_all",
+ .data = &sysctl_icmp_echo_ignore_all,
+@@ -534,50 +441,6 @@
+ .proc_handler = &proc_dointvec
+ },
+ {
+- .ctl_name = NET_IPV4_INET_PEER_THRESHOLD,
+- .procname = "inet_peer_threshold",
+- .data = &inet_peer_threshold,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec
+- },
+- {
+- .ctl_name = NET_IPV4_INET_PEER_MINTTL,
+- .procname = "inet_peer_minttl",
+- .data = &inet_peer_minttl,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+- .ctl_name = NET_IPV4_INET_PEER_MAXTTL,
+- .procname = "inet_peer_maxttl",
+- .data = &inet_peer_maxttl,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+- .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME,
+- .procname = "inet_peer_gc_mintime",
+- .data = &inet_peer_gc_mintime,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+- .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME,
+- .procname = "inet_peer_gc_maxtime",
+- .data = &inet_peer_gc_maxtime,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+ .ctl_name = NET_TCP_ORPHAN_RETRIES,
+ .procname = "tcp_orphan_retries",
+ .data = &sysctl_tcp_orphan_retries,
+@@ -706,24 +569,6 @@
+ .proc_handler = &proc_dointvec
+ },
+ {
+- .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
+- .procname = "ipfrag_secret_interval",
+- .data = &sysctl_ipfrag_secret_interval,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies
+- },
+- {
+- .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
+- .procname = "ipfrag_max_dist",
+- .data = &sysctl_ipfrag_max_dist,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_minmax,
+- .extra1 = &zero
+- },
+- {
+ .ctl_name = NET_TCP_NO_METRICS_SAVE,
+ .procname = "tcp_no_metrics_save",
+ .data = &sysctl_tcp_nometrics_save,
+@@ -865,6 +710,181 @@
+ { .ctl_name = 0 }
+ };
+
+-#endif /* CONFIG_SYSCTL */
++struct ctl_table multi_ipv4_table[] = {
++ {
++ /* .data is filled in by devinet_net_init.
++ * As a consequence this table entry must be the first
++ * entry in multi_ipv4_table.
++ */
++ .ctl_name = NET_IPV4_FORWARD,
++ .procname = "ip_forward",
++ .data = NULL,
++ .extra2 = &init_net,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &ipv4_sysctl_forward,
++ .strategy = &ipv4_sysctl_forward_strategy
++ },
++ {
++ .ctl_name = NET_IPV4_DEFAULT_TTL,
++ .procname = "ip_default_ttl",
++ .data = &init_net.sysctl_ip_default_ttl,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &ipv4_doint_and_flush,
++ .strategy = &ipv4_doint_and_flush_strategy,
++ },
++ {
++ .ctl_name = NET_IPV4_NO_PMTU_DISC,
++ .procname = "ip_no_pmtu_disc",
++ .data = &init_net.sysctl_ipv4_no_pmtu_disc,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_NONLOCAL_BIND,
++ .procname = "ip_nonlocal_bind",
++ .data = &init_net.sysctl_ip_nonlocal_bind,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
++ .procname = "ip_local_port_range",
++ .data = &init_net.sysctl_local_port_range,
++ .maxlen = sizeof(init_net.sysctl_local_port_range),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_minmax,
++ .strategy = &sysctl_intvec,
++ .extra1 = ip_local_port_range_min,
++ .extra2 = ip_local_port_range_max
++ },
++ {
++ .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
++ .procname = "ipfrag_high_thresh",
++ .data = &init_net.sysctl_ipfrag_high_thresh,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
++ .procname = "ipfrag_low_thresh",
++ .data = &init_net.sysctl_ipfrag_low_thresh,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_IPFRAG_TIME,
++ .procname = "ipfrag_time",
++ .data = &init_net.sysctl_ipfrag_time,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
++ .procname = "ipfrag_secret_interval",
++ .data = &init_net.sysctl_ipfrag_secret_interval,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
++ .procname = "ipfrag_max_dist",
++ .data = &init_net.sysctl_ipfrag_max_dist,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_minmax,
++ .extra1 = &zero
++ },
++ {
++ .ctl_name = NET_IPV4_DYNADDR,
++ .procname = "ip_dynaddr",
++ .data = &init_net.sysctl_ip_dynaddr,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
++ .procname = "ip_local_port_range",
++ .data = &init_net.sysctl_local_port_range,
++ .maxlen = sizeof(init_net.sysctl_local_port_range),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_minmax,
++ .strategy = &sysctl_intvec,
++ .extra1 = ip_local_port_range_min,
++ .extra2 = ip_local_port_range_max
++ },
++ {
++ .ctl_name = NET_IPV4_INET_PEER_THRESHOLD,
++ .procname = "inet_peer_threshold",
++ .data = &init_net.inet_peer_threshold,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_IPV4_INET_PEER_MINTTL,
++ .procname = "inet_peer_minttl",
++ .data = &init_net.inet_peer_minttl,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_INET_PEER_MAXTTL,
++ .procname = "inet_peer_maxttl",
++ .data = &init_net.inet_peer_maxttl,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME,
++ .procname = "inet_peer_gc_mintime",
++ .data = &init_net.inet_peer_gc_mintime,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME,
++ .procname = "inet_peer_gc_maxtime",
++ .data = &init_net.inet_peer_gc_maxtime,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec_jiffies,
++ .strategy = &sysctl_jiffies
++ },
++ {
++ .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
++ .procname = "tcp_timestamps",
++ .data = &init_net.sysctl_tcp_timestamps,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++
++ },
++ {
++ .ctl_name = NET_IPV4_TCP_WINDOW_SCALING,
++ .procname = "tcp_window_scaling",
++ .data = &init_net.sysctl_tcp_window_scaling,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {}
++};
+
+-EXPORT_SYMBOL(ipv4_config);
++#endif /* CONFIG_SYSCTL */
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp.c linux-2.6.22-591/net/ipv4/tcp.c
+--- linux-2.6.22-570/net/ipv4/tcp.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/tcp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -2409,6 +2409,23 @@
+ }
+ __setup("thash_entries=", set_thash_entries);
+
++static int tcp_net_init(struct net *net)
++{
++ /*
++ * This array holds the first and last local port number.
++ */
++ net->sysctl_local_port_range[0] = 32768;
++ net->sysctl_local_port_range[1] = 61000;
++
++ net->sysctl_tcp_timestamps = 1;
++ net->sysctl_tcp_window_scaling = 1;
++ return 0;
++}
++
++static struct pernet_operations tcp_net_ops = {
++ .init = tcp_net_init,
++};
++
+ void __init tcp_init(void)
+ {
+ struct sk_buff *skb = NULL;
+@@ -2502,6 +2519,8 @@
+ sysctl_tcp_rmem[1] = 87380;
+ sysctl_tcp_rmem[2] = max(87380, max_share);
+
++ register_pernet_subsys(&tcp_net_ops);
++
+ printk(KERN_INFO "TCP: Hash tables configured "
+ "(established %d bind %d)\n",
+ tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size);
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_input.c linux-2.6.22-591/net/ipv4/tcp_input.c
+--- linux-2.6.22-570/net/ipv4/tcp_input.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/tcp_input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -72,8 +72,6 @@
+ #include <asm/unaligned.h>
+ #include <net/netdma.h>
+
+-int sysctl_tcp_timestamps __read_mostly = 1;
+-int sysctl_tcp_window_scaling __read_mostly = 1;
+ int sysctl_tcp_sack __read_mostly = 1;
+ int sysctl_tcp_fack __read_mostly = 1;
+ int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+@@ -2922,7 +2920,7 @@
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
+- if (sysctl_tcp_window_scaling) {
++ if (init_net.sysctl_tcp_window_scaling) {
+ __u8 snd_wscale = *(__u8 *) ptr;
+ opt_rx->wscale_ok = 1;
+ if (snd_wscale > 14) {
+@@ -2938,7 +2936,7 @@
+ case TCPOPT_TIMESTAMP:
+ if (opsize==TCPOLEN_TIMESTAMP) {
+ if ((estab && opt_rx->tstamp_ok) ||
+- (!estab && sysctl_tcp_timestamps)) {
++ (!estab && init_net.sysctl_tcp_timestamps)) {
+ opt_rx->saw_tstamp = 1;
+ opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+ opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-591/net/ipv4/tcp_ipv4.c
+--- linux-2.6.22-570/net/ipv4/tcp_ipv4.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -71,6 +71,7 @@
+ #include <net/timewait_sock.h>
+ #include <net/xfrm.h>
+ #include <net/netdma.h>
++#include <net/net_namespace.h>
+
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+@@ -353,6 +354,7 @@
+
+ void tcp_v4_err(struct sk_buff *skb, u32 info)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct iphdr *iph = (struct iphdr *)skb->data;
+ struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+ struct tcp_sock *tp;
+@@ -369,7 +371,7 @@
+ }
+
+ sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
+- th->source, inet_iif(skb));
++ th->source, inet_iif(skb), net);
+ if (!sk) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+@@ -1499,7 +1501,8 @@
+ return tcp_check_req(sk, skb, req, prev);
+
+ nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+- iph->daddr, th->dest, inet_iif(skb));
++ iph->daddr, th->dest, inet_iif(skb),
++ sk->sk_net);
+
+ if (nsk) {
+ if (nsk->sk_state != TCP_TIME_WAIT) {
+@@ -1618,6 +1621,7 @@
+
+ int tcp_v4_rcv(struct sk_buff *skb)
+ {
++ struct net *net = skb->dev->nd_net;
+ const struct iphdr *iph;
+ struct tcphdr *th;
+ struct sock *sk;
+@@ -1657,7 +1661,7 @@
+ TCP_SKB_CB(skb)->sacked = 0;
+
+ sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+- iph->daddr, th->dest, inet_iif(skb));
++ iph->daddr, th->dest, inet_iif(skb), net);
+ if (!sk)
+ goto no_tcp_socket;
+
+@@ -1732,7 +1736,7 @@
+ case TCP_TW_SYN: {
+ struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+ iph->daddr, th->dest,
+- inet_iif(skb));
++ inet_iif(skb), net);
+ if (sk2) {
+ inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_put(inet_twsk(sk));
+@@ -1766,7 +1770,7 @@
+ int release_it = 0;
+
+ if (!rt || rt->rt_dst != inet->daddr) {
+- peer = inet_getpeer(inet->daddr, 1);
++ peer = inet_getpeer(sk->sk_net, inet->daddr, 1);
+ release_it = 1;
+ } else {
+ if (!rt->peer)
+@@ -1791,7 +1795,7 @@
+
+ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
+ {
+- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
++ struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1);
+
+ if (peer) {
+ const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+@@ -1980,7 +1984,8 @@
+ if (req->sk &&
+ !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ continue;
+- if (req->rsk_ops->family == st->family) {
++ if ((req->rsk_ops->family == st->family) &&
++ (req->sk->sk_net == st->net)) {
+ cur = req;
+ goto out;
+ }
+@@ -2004,6 +2009,8 @@
+ }
+ get_sk:
+ sk_for_each_from(sk, node) {
++ if (sk->sk_net != st->net)
++ continue;
+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
+ sk, sk->sk_nid, nx_current_nid());
+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+@@ -2054,11 +2061,10 @@
+ struct hlist_node *node;
+ struct inet_timewait_sock *tw;
+
+- /* We can reschedule _before_ having picked the target: */
+- cond_resched_softirq();
+-
+- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
++ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++ if (sk->sk_net != st->net)
++ continue;
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egf: %p [#%d] (from %d)",
+ sk, sk->sk_nid, nx_current_nid());
+@@ -2072,6 +2078,8 @@
+ st->state = TCP_SEQ_STATE_TIME_WAIT;
+ inet_twsk_for_each(tw, node,
+ &tcp_hashinfo.ehash[st->bucket].twchain) {
++ if (tw->tw_net != st->net)
++ continue;
+ vxdprintk(VXD_CBIT(net, 6),
+ "tw: %p [#%d] (from %d)",
+ tw, tw->tw_nid, nx_current_nid());
+@@ -2082,7 +2090,7 @@
+ rc = tw;
+ goto out;
+ }
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ }
+ out:
+@@ -2102,7 +2110,8 @@
+ tw = cur;
+ tw = tw_next(tw);
+ get_tw:
+- while (tw && (tw->tw_family != st->family ||
++ while (tw && ((tw->tw_net != st->net) ||
++ (tw->tw_family != st->family) ||
+ !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
+ tw = tw_next(tw);
+ }
+@@ -2110,14 +2119,11 @@
+ cur = tw;
+ goto out;
+ }
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+
+- /* We can reschedule between buckets: */
+- cond_resched_softirq();
+-
+ if (++st->bucket < tcp_hashinfo.ehash_size) {
+- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
++ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+ } else {
+ cur = NULL;
+@@ -2130,6 +2136,8 @@
+ vxdprintk(VXD_CBIT(net, 6),
+ "sk,egn: %p [#%d] (from %d)",
+ sk, sk->sk_nid, nx_current_nid());
++ if (sk->sk_net != st->net)
++ continue;
+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ continue;
+ if (sk->sk_family == st->family)
+@@ -2167,7 +2175,6 @@
+
+ if (!rc) {
+ inet_listen_unlock(&tcp_hashinfo);
+- local_bh_disable();
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ rc = established_get_idx(seq, pos);
+ }
+@@ -2200,7 +2207,6 @@
+ rc = listening_get_next(seq, v);
+ if (!rc) {
+ inet_listen_unlock(&tcp_hashinfo);
+- local_bh_disable();
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ rc = established_get_first(seq);
+ }
+@@ -2232,8 +2238,7 @@
+ case TCP_SEQ_STATE_TIME_WAIT:
+ case TCP_SEQ_STATE_ESTABLISHED:
+ if (v)
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+- local_bh_enable();
++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ break;
+ }
+ }
+@@ -2262,6 +2267,7 @@
+ goto out_kfree;
+ seq = file->private_data;
+ seq->private = s;
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -2269,20 +2275,30 @@
+ goto out;
+ }
+
+-int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
++static int tcp_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct tcp_iter_state *st = seq->private;
++ put_net(st->net);
++ return seq_release_private(inode, file);
++}
++
++int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
+ {
+ int rc = 0;
+ struct proc_dir_entry *p;
+
+ if (!afinfo)
+ return -EINVAL;
++ if (net == &init_net) {
+ afinfo->seq_fops->owner = afinfo->owner;
+ afinfo->seq_fops->open = tcp_seq_open;
+ afinfo->seq_fops->read = seq_read;
+ afinfo->seq_fops->llseek = seq_lseek;
+- afinfo->seq_fops->release = seq_release_private;
++ afinfo->seq_fops->release = tcp_seq_release;
++ }
+
+- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ if (p)
+ p->data = afinfo;
+ else
+@@ -2290,11 +2306,12 @@
+ return rc;
+ }
+
+-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
++void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
+ {
+ if (!afinfo)
+ return;
+- proc_net_remove(afinfo->name);
++ proc_net_remove(net, afinfo->name);
++ if (net == &init_net)
+ memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+
+@@ -2439,14 +2456,29 @@
+ .seq_fops = &tcp4_seq_fops,
+ };
+
++static int tcp4_proc_net_init(struct net *net)
++{
++ return tcp_proc_register(net, &tcp4_seq_afinfo);
++}
++
++static void tcp4_proc_net_exit(struct net *net)
++{
++ tcp_proc_unregister(net, &tcp4_seq_afinfo);
++}
++
++static struct pernet_operations tcp4_proc_net_ops = {
++ .init = tcp4_proc_net_init,
++ .exit = tcp4_proc_net_exit,
++};
++
+ int __init tcp4_proc_init(void)
+ {
+- return tcp_proc_register(&tcp4_seq_afinfo);
++ return register_pernet_subsys(&tcp4_proc_net_ops);
+ }
+
+ void tcp4_proc_exit(void)
+ {
+- tcp_proc_unregister(&tcp4_seq_afinfo);
++ unregister_pernet_subsys(&tcp4_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+
+@@ -2508,6 +2540,5 @@
+ EXPORT_SYMBOL(tcp_proc_register);
+ EXPORT_SYMBOL(tcp_proc_unregister);
+ #endif
+-EXPORT_SYMBOL(sysctl_local_port_range);
+ EXPORT_SYMBOL(sysctl_tcp_low_latency);
+
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig
+--- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,2483 +0,0 @@
+-/*
+- * INET An implementation of the TCP/IP protocol suite for the LINUX
+- * operating system. INET is implemented using the BSD Socket
+- * interface as the means of communication with the user level.
+- *
+- * Implementation of the Transmission Control Protocol(TCP).
+- *
+- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
+- *
+- * IPv4 specific functions
+- *
+- *
+- * code split from:
+- * linux/ipv4/tcp.c
+- * linux/ipv4/tcp_input.c
+- * linux/ipv4/tcp_output.c
+- *
+- * See tcp.c for author information
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-/*
+- * Changes:
+- * David S. Miller : New socket lookup architecture.
+- * This code is dedicated to John Dyson.
+- * David S. Miller : Change semantics of established hash,
+- * half is devoted to TIME_WAIT sockets
+- * and the rest go in the other half.
+- * Andi Kleen : Add support for syncookies and fixed
+- * some bugs: ip options weren't passed to
+- * the TCP layer, missed a check for an
+- * ACK bit.
+- * Andi Kleen : Implemented fast path mtu discovery.
+- * Fixed many serious bugs in the
+- * request_sock handling and moved
+- * most of it into the af independent code.
+- * Added tail drop and some other bugfixes.
+- * Added new listen semantics.
+- * Mike McLagan : Routing by source
+- * Juan Jose Ciarlante: ip_dynaddr bits
+- * Andi Kleen: various fixes.
+- * Vitaly E. Lavrov : Transparent proxy revived after year
+- * coma.
+- * Andi Kleen : Fix new listen.
+- * Andi Kleen : Fix accept error reporting.
+- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
+- * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
+- * a single port at the same time.
+- */
+-
+-
+-#include <linux/types.h>
+-#include <linux/fcntl.h>
+-#include <linux/module.h>
+-#include <linux/random.h>
+-#include <linux/cache.h>
+-#include <linux/jhash.h>
+-#include <linux/init.h>
+-#include <linux/times.h>
+-
+-#include <net/icmp.h>
+-#include <net/inet_hashtables.h>
+-#include <net/tcp.h>
+-#include <net/transp_v6.h>
+-#include <net/ipv6.h>
+-#include <net/inet_common.h>
+-#include <net/timewait_sock.h>
+-#include <net/xfrm.h>
+-#include <net/netdma.h>
+-
+-#include <linux/inet.h>
+-#include <linux/ipv6.h>
+-#include <linux/stddef.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-
+-#include <linux/crypto.h>
+-#include <linux/scatterlist.h>
+-
+-int sysctl_tcp_tw_reuse __read_mostly;
+-int sysctl_tcp_low_latency __read_mostly;
+-
+-/* Check TCP sequence numbers in ICMP packets. */
+-#define ICMP_MIN_LENGTH 8
+-
+-/* Socket used for sending RSTs */
+-static struct socket *tcp_socket __read_mostly;
+-
+-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+-
+-#ifdef CONFIG_TCP_MD5SIG
+-static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
+- __be32 addr);
+-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+- __be32 saddr, __be32 daddr,
+- struct tcphdr *th, int protocol,
+- int tcplen);
+-#endif
+-
+-struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
+- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
+- .lhash_users = ATOMIC_INIT(0),
+- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
+-};
+-
+-static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
+-{
+- return inet_csk_get_port(&tcp_hashinfo, sk, snum,
+- inet_csk_bind_conflict);
+-}
+-
+-static void tcp_v4_hash(struct sock *sk)
+-{
+- inet_hash(&tcp_hashinfo, sk);
+-}
+-
+-void tcp_unhash(struct sock *sk)
+-{
+- inet_unhash(&tcp_hashinfo, sk);
+-}
+-
+-static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
+-{
+- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
+- ip_hdr(skb)->saddr,
+- tcp_hdr(skb)->dest,
+- tcp_hdr(skb)->source);
+-}
+-
+-int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
+-{
+- const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+- struct tcp_sock *tp = tcp_sk(sk);
+-
+- /* With PAWS, it is safe from the viewpoint
+- of data integrity. Even without PAWS it is safe provided sequence
+- spaces do not overlap i.e. at data rates <= 80Mbit/sec.
+-
+- Actually, the idea is close to VJ's one, only timestamp cache is
+- held not per host, but per port pair and TW bucket is used as state
+- holder.
+-
+- If TW bucket has been already destroyed we fall back to VJ's scheme
+- and use initial timestamp retrieved from peer table.
+- */
+- if (tcptw->tw_ts_recent_stamp &&
+- (twp == NULL || (sysctl_tcp_tw_reuse &&
+- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+- if (tp->write_seq == 0)
+- tp->write_seq = 1;
+- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
+- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+- sock_hold(sktw);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+-
+-/* This will initiate an outgoing connection. */
+-int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+-{
+- struct inet_sock *inet = inet_sk(sk);
+- struct tcp_sock *tp = tcp_sk(sk);
+- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+- struct rtable *rt;
+- __be32 daddr, nexthop;
+- int tmp;
+- int err;
+-
+- if (addr_len < sizeof(struct sockaddr_in))
+- return -EINVAL;
+-
+- if (usin->sin_family != AF_INET)
+- return -EAFNOSUPPORT;
+-
+- nexthop = daddr = usin->sin_addr.s_addr;
+- if (inet->opt && inet->opt->srr) {
+- if (!daddr)
+- return -EINVAL;
+- nexthop = inet->opt->faddr;
+- }
+-
+- tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+- IPPROTO_TCP,
+- inet->sport, usin->sin_port, sk, 1);
+- if (tmp < 0) {
+- if (tmp == -ENETUNREACH)
+- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+- return tmp;
+- }
+-
+- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+- ip_rt_put(rt);
+- return -ENETUNREACH;
+- }
+-
+- if (!inet->opt || !inet->opt->srr)
+- daddr = rt->rt_dst;
+-
+- if (!inet->saddr)
+- inet->saddr = rt->rt_src;
+- inet->rcv_saddr = inet->saddr;
+-
+- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+- /* Reset inherited state */
+- tp->rx_opt.ts_recent = 0;
+- tp->rx_opt.ts_recent_stamp = 0;
+- tp->write_seq = 0;
+- }
+-
+- if (tcp_death_row.sysctl_tw_recycle &&
+- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+- struct inet_peer *peer = rt_get_peer(rt);
+- /*
+- * VJ's idea. We save last timestamp seen from
+- * the destination in peer table, when entering state
+- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+- * when trying new connection.
+- */
+- if (peer != NULL &&
+- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
+- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+- tp->rx_opt.ts_recent = peer->tcp_ts;
+- }
+- }
+-
+- inet->dport = usin->sin_port;
+- inet->daddr = daddr;
+-
+- inet_csk(sk)->icsk_ext_hdr_len = 0;
+- if (inet->opt)
+- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
+-
+- tp->rx_opt.mss_clamp = 536;
+-
+- /* Socket identity is still unknown (sport may be zero).
+- * However we set state to SYN-SENT and not releasing socket
+- * lock select source port, enter ourselves into the hash tables and
+- * complete initialization after this.
+- */
+- tcp_set_state(sk, TCP_SYN_SENT);
+- err = inet_hash_connect(&tcp_death_row, sk);
+- if (err)
+- goto failure;
+-
+- err = ip_route_newports(&rt, IPPROTO_TCP,
+- inet->sport, inet->dport, sk);
+- if (err)
+- goto failure;
+-
+- /* OK, now commit destination to socket. */
+- sk->sk_gso_type = SKB_GSO_TCPV4;
+- sk_setup_caps(sk, &rt->u.dst);
+-
+- if (!tp->write_seq)
+- tp->write_seq = secure_tcp_sequence_number(inet->saddr,
+- inet->daddr,
+- inet->sport,
+- usin->sin_port);
+-
+- inet->id = tp->write_seq ^ jiffies;
+-
+- err = tcp_connect(sk);
+- rt = NULL;
+- if (err)
+- goto failure;
+-
+- return 0;
+-
+-failure:
+- /*
+- * This unhashes the socket and releases the local port,
+- * if necessary.
+- */
+- tcp_set_state(sk, TCP_CLOSE);
+- ip_rt_put(rt);
+- sk->sk_route_caps = 0;
+- inet->dport = 0;
+- return err;
+-}
+-
+-/*
+- * This routine does path mtu discovery as defined in RFC1191.
+- */
+-static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
+-{
+- struct dst_entry *dst;
+- struct inet_sock *inet = inet_sk(sk);
+-
+- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
+- * send out by Linux are always <576bytes so they should go through
+- * unfragmented).
+- */
+- if (sk->sk_state == TCP_LISTEN)
+- return;
+-
+- /* We don't check in the destentry if pmtu discovery is forbidden
+- * on this route. We just assume that no packet_to_big packets
+- * are send back when pmtu discovery is not active.
+- * There is a small race when the user changes this flag in the
+- * route, but I think that's acceptable.
+- */
+- if ((dst = __sk_dst_check(sk, 0)) == NULL)
+- return;
+-
+- dst->ops->update_pmtu(dst, mtu);
+-
+- /* Something is about to be wrong... Remember soft error
+- * for the case, if this connection will not able to recover.
+- */
+- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+- sk->sk_err_soft = EMSGSIZE;
+-
+- mtu = dst_mtu(dst);
+-
+- if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+- inet_csk(sk)->icsk_pmtu_cookie > mtu) {
+- tcp_sync_mss(sk, mtu);
+-
+- /* Resend the TCP packet because it's
+- * clear that the old packet has been
+- * dropped. This is the new "fast" path mtu
+- * discovery.
+- */
+- tcp_simple_retransmit(sk);
+- } /* else let the usual retransmit timer handle it */
+-}
+-
+-/*
+- * This routine is called by the ICMP module when it gets some
+- * sort of error condition. If err < 0 then the socket should
+- * be closed and the error returned to the user. If err > 0
+- * it's just the icmp type << 8 | icmp code. After adjustment
+- * header points to the first 8 bytes of the tcp header. We need
+- * to find the appropriate port.
+- *
+- * The locking strategy used here is very "optimistic". When
+- * someone else accesses the socket the ICMP is just dropped
+- * and for some paths there is no check at all.
+- * A more general error queue to queue errors for later handling
+- * is probably better.
+- *
+- */
+-
+-void tcp_v4_err(struct sk_buff *skb, u32 info)
+-{
+- struct iphdr *iph = (struct iphdr *)skb->data;
+- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+- struct tcp_sock *tp;
+- struct inet_sock *inet;
+- const int type = icmp_hdr(skb)->type;
+- const int code = icmp_hdr(skb)->code;
+- struct sock *sk;
+- __u32 seq;
+- int err;
+-
+- if (skb->len < (iph->ihl << 2) + 8) {
+- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+- return;
+- }
+-
+- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
+- th->source, inet_iif(skb));
+- if (!sk) {
+- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+- return;
+- }
+- if (sk->sk_state == TCP_TIME_WAIT) {
+- inet_twsk_put(inet_twsk(sk));
+- return;
+- }
+-
+- bh_lock_sock(sk);
+- /* If too many ICMPs get dropped on busy
+- * servers this needs to be solved differently.
+- */
+- if (sock_owned_by_user(sk))
+- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+-
+- if (sk->sk_state == TCP_CLOSE)
+- goto out;
+-
+- tp = tcp_sk(sk);
+- seq = ntohl(th->seq);
+- if (sk->sk_state != TCP_LISTEN &&
+- !between(seq, tp->snd_una, tp->snd_nxt)) {
+- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+- goto out;
+- }
+-
+- switch (type) {
+- case ICMP_SOURCE_QUENCH:
+- /* Just silently ignore these. */
+- goto out;
+- case ICMP_PARAMETERPROB:
+- err = EPROTO;
+- break;
+- case ICMP_DEST_UNREACH:
+- if (code > NR_ICMP_UNREACH)
+- goto out;
+-
+- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+- if (!sock_owned_by_user(sk))
+- do_pmtu_discovery(sk, iph, info);
+- goto out;
+- }
+-
+- err = icmp_err_convert[code].errno;
+- break;
+- case ICMP_TIME_EXCEEDED:
+- err = EHOSTUNREACH;
+- break;
+- default:
+- goto out;
+- }
+-
+- switch (sk->sk_state) {
+- struct request_sock *req, **prev;
+- case TCP_LISTEN:
+- if (sock_owned_by_user(sk))
+- goto out;
+-
+- req = inet_csk_search_req(sk, &prev, th->dest,
+- iph->daddr, iph->saddr);
+- if (!req)
+- goto out;
+-
+- /* ICMPs are not backlogged, hence we cannot get
+- an established socket here.
+- */
+- BUG_TRAP(!req->sk);
+-
+- if (seq != tcp_rsk(req)->snt_isn) {
+- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+- goto out;
+- }
+-
+- /*
+- * Still in SYN_RECV, just remove it silently.
+- * There is no good way to pass the error to the newly
+- * created socket, and POSIX does not want network
+- * errors returned from accept().
+- */
+- inet_csk_reqsk_queue_drop(sk, req, prev);
+- goto out;
+-
+- case TCP_SYN_SENT:
+- case TCP_SYN_RECV: /* Cannot happen.
+- It can f.e. if SYNs crossed.
+- */
+- if (!sock_owned_by_user(sk)) {
+- sk->sk_err = err;
+-
+- sk->sk_error_report(sk);
+-
+- tcp_done(sk);
+- } else {
+- sk->sk_err_soft = err;
+- }
+- goto out;
+- }
+-
+- /* If we've already connected we will keep trying
+- * until we time out, or the user gives up.
+- *
+- * rfc1122 4.2.3.9 allows to consider as hard errors
+- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+- * but it is obsoleted by pmtu discovery).
+- *
+- * Note, that in modern internet, where routing is unreliable
+- * and in each dark corner broken firewalls sit, sending random
+- * errors ordered by their masters even this two messages finally lose
+- * their original sense (even Linux sends invalid PORT_UNREACHs)
+- *
+- * Now we are in compliance with RFCs.
+- * --ANK (980905)
+- */
+-
+- inet = inet_sk(sk);
+- if (!sock_owned_by_user(sk) && inet->recverr) {
+- sk->sk_err = err;
+- sk->sk_error_report(sk);
+- } else { /* Only an error on timeout */
+- sk->sk_err_soft = err;
+- }
+-
+-out:
+- bh_unlock_sock(sk);
+- sock_put(sk);
+-}
+-
+-/* This routine computes an IPv4 TCP checksum. */
+-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+-{
+- struct inet_sock *inet = inet_sk(sk);
+- struct tcphdr *th = tcp_hdr(skb);
+-
+- if (skb->ip_summed == CHECKSUM_PARTIAL) {
+- th->check = ~tcp_v4_check(len, inet->saddr,
+- inet->daddr, 0);
+- skb->csum_start = skb_transport_header(skb) - skb->head;
+- skb->csum_offset = offsetof(struct tcphdr, check);
+- } else {
+- th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
+- csum_partial((char *)th,
+- th->doff << 2,
+- skb->csum));
+- }
+-}
+-
+-int tcp_v4_gso_send_check(struct sk_buff *skb)
+-{
+- const struct iphdr *iph;
+- struct tcphdr *th;
+-
+- if (!pskb_may_pull(skb, sizeof(*th)))
+- return -EINVAL;
+-
+- iph = ip_hdr(skb);
+- th = tcp_hdr(skb);
+-
+- th->check = 0;
+- th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
+- skb->csum_start = skb_transport_header(skb) - skb->head;
+- skb->csum_offset = offsetof(struct tcphdr, check);
+- skb->ip_summed = CHECKSUM_PARTIAL;
+- return 0;
+-}
+-
+-/*
+- * This routine will send an RST to the other tcp.
+- *
+- * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
+- * for reset.
+- * Answer: if a packet caused RST, it is not for a socket
+- * existing in our system, if it is matched to a socket,
+- * it is just duplicate segment or bug in other side's TCP.
+- * So that we build reply only basing on parameters
+- * arrived with segment.
+- * Exception: precedence violation. We do not implement it in any case.
+- */
+-
+-static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
+-{
+- struct tcphdr *th = tcp_hdr(skb);
+- struct {
+- struct tcphdr th;
+-#ifdef CONFIG_TCP_MD5SIG
+- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
+-#endif
+- } rep;
+- struct ip_reply_arg arg;
+-#ifdef CONFIG_TCP_MD5SIG
+- struct tcp_md5sig_key *key;
+-#endif
+-
+- /* Never send a reset in response to a reset. */
+- if (th->rst)
+- return;
+-
+- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
+- return;
+-
+- /* Swap the send and the receive. */
+- memset(&rep, 0, sizeof(rep));
+- rep.th.dest = th->source;
+- rep.th.source = th->dest;
+- rep.th.doff = sizeof(struct tcphdr) / 4;
+- rep.th.rst = 1;
+-
+- if (th->ack) {
+- rep.th.seq = th->ack_seq;
+- } else {
+- rep.th.ack = 1;
+- rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
+- skb->len - (th->doff << 2));
+- }
+-
+- memset(&arg, 0, sizeof(arg));
+- arg.iov[0].iov_base = (unsigned char *)&rep;
+- arg.iov[0].iov_len = sizeof(rep.th);
+-
+-#ifdef CONFIG_TCP_MD5SIG
+- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
+- if (key) {
+- rep.opt[0] = htonl((TCPOPT_NOP << 24) |
+- (TCPOPT_NOP << 16) |
+- (TCPOPT_MD5SIG << 8) |
+- TCPOLEN_MD5SIG);
+- /* Update length and the length the header thinks exists */
+- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
+- rep.th.doff = arg.iov[0].iov_len / 4;
+-
+- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
+- key,
+- ip_hdr(skb)->daddr,
+- ip_hdr(skb)->saddr,
+- &rep.th, IPPROTO_TCP,
+- arg.iov[0].iov_len);
+- }
+-#endif
+- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+- ip_hdr(skb)->saddr, /* XXX */
+- sizeof(struct tcphdr), IPPROTO_TCP, 0);
+- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+-
+- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+-
+- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+-}
+-
+-/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
+- outside socket context is ugly, certainly. What can I do?
+- */
+-
+-static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
+- struct sk_buff *skb, u32 seq, u32 ack,
+- u32 win, u32 ts)
+-{
+- struct tcphdr *th = tcp_hdr(skb);
+- struct {
+- struct tcphdr th;
+- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
+-#ifdef CONFIG_TCP_MD5SIG
+- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
+-#endif
+- ];
+- } rep;
+- struct ip_reply_arg arg;
+-#ifdef CONFIG_TCP_MD5SIG
+- struct tcp_md5sig_key *key;
+- struct tcp_md5sig_key tw_key;
+-#endif
+-
+- memset(&rep.th, 0, sizeof(struct tcphdr));
+- memset(&arg, 0, sizeof(arg));
+-
+- arg.iov[0].iov_base = (unsigned char *)&rep;
+- arg.iov[0].iov_len = sizeof(rep.th);
+- if (ts) {
+- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+- (TCPOPT_TIMESTAMP << 8) |
+- TCPOLEN_TIMESTAMP);
+- rep.opt[1] = htonl(tcp_time_stamp);
+- rep.opt[2] = htonl(ts);
+- arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
+- }
+-
+- /* Swap the send and the receive. */
+- rep.th.dest = th->source;
+- rep.th.source = th->dest;
+- rep.th.doff = arg.iov[0].iov_len / 4;
+- rep.th.seq = htonl(seq);
+- rep.th.ack_seq = htonl(ack);
+- rep.th.ack = 1;
+- rep.th.window = htons(win);
+-
+-#ifdef CONFIG_TCP_MD5SIG
+- /*
+- * The SKB holds an imcoming packet, but may not have a valid ->sk
+- * pointer. This is especially the case when we're dealing with a
+- * TIME_WAIT ack, because the sk structure is long gone, and only
+- * the tcp_timewait_sock remains. So the md5 key is stashed in that
+- * structure, and we use it in preference. I believe that (twsk ||
+- * skb->sk) holds true, but we program defensively.
+- */
+- if (!twsk && skb->sk) {
+- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
+- } else if (twsk && twsk->tw_md5_keylen) {
+- tw_key.key = twsk->tw_md5_key;
+- tw_key.keylen = twsk->tw_md5_keylen;
+- key = &tw_key;
+- } else
+- key = NULL;
+-
+- if (key) {
+- int offset = (ts) ? 3 : 0;
+-
+- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
+- (TCPOPT_NOP << 16) |
+- (TCPOPT_MD5SIG << 8) |
+- TCPOLEN_MD5SIG);
+- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
+- rep.th.doff = arg.iov[0].iov_len/4;
+-
+- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
+- key,
+- ip_hdr(skb)->daddr,
+- ip_hdr(skb)->saddr,
+- &rep.th, IPPROTO_TCP,
+- arg.iov[0].iov_len);
+- }
+-#endif
+- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+- ip_hdr(skb)->saddr, /* XXX */
+- arg.iov[0].iov_len, IPPROTO_TCP, 0);
+- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+- if (twsk)
+- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
+-
+- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
+-
+- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+-}
+-
+-static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+-{
+- struct inet_timewait_sock *tw = inet_twsk(sk);
+- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+-
+- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+- tcptw->tw_ts_recent);
+-
+- inet_twsk_put(tw);
+-}
+-
+-static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
+- struct request_sock *req)
+-{
+- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
+- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
+- req->ts_recent);
+-}
+-
+-/*
+- * Send a SYN-ACK after having received an ACK.
+- * This still operates on a request_sock only, not on a big
+- * socket.
+- */
+-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+- struct dst_entry *dst)
+-{
+- const struct inet_request_sock *ireq = inet_rsk(req);
+- int err = -1;
+- struct sk_buff * skb;
+-
+- /* First, grab a route. */
+- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
+- goto out;
+-
+- skb = tcp_make_synack(sk, dst, req);
+-
+- if (skb) {
+- struct tcphdr *th = tcp_hdr(skb);
+-
+- th->check = tcp_v4_check(skb->len,
+- ireq->loc_addr,
+- ireq->rmt_addr,
+- csum_partial((char *)th, skb->len,
+- skb->csum));
+-
+- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+- ireq->rmt_addr,
+- ireq->opt);
+- err = net_xmit_eval(err);
+- }
+-
+-out:
+- dst_release(dst);
+- return err;
+-}
+-
+-/*
+- * IPv4 request_sock destructor.
+- */
+-static void tcp_v4_reqsk_destructor(struct request_sock *req)
+-{
+- kfree(inet_rsk(req)->opt);
+-}
+-
+-#ifdef CONFIG_SYN_COOKIES
+-static void syn_flood_warning(struct sk_buff *skb)
+-{
+- static unsigned long warntime;
+-
+- if (time_after(jiffies, (warntime + HZ * 60))) {
+- warntime = jiffies;
+- printk(KERN_INFO
+- "possible SYN flooding on port %d. Sending cookies.\n",
+- ntohs(tcp_hdr(skb)->dest));
+- }
+-}
+-#endif
+-
+-/*
+- * Save and compile IPv4 options into the request_sock if needed.
+- */
+-static struct ip_options *tcp_v4_save_options(struct sock *sk,
+- struct sk_buff *skb)
+-{
+- struct ip_options *opt = &(IPCB(skb)->opt);
+- struct ip_options *dopt = NULL;
+-
+- if (opt && opt->optlen) {
+- int opt_size = optlength(opt);
+- dopt = kmalloc(opt_size, GFP_ATOMIC);
+- if (dopt) {
+- if (ip_options_echo(dopt, skb)) {
+- kfree(dopt);
+- dopt = NULL;
+- }
+- }
+- }
+- return dopt;
+-}
+-
+-#ifdef CONFIG_TCP_MD5SIG
+-/*
+- * RFC2385 MD5 checksumming requires a mapping of
+- * IP address->MD5 Key.
+- * We need to maintain these in the sk structure.
+- */
+-
+-/* Find the Key structure for an address. */
+-static struct tcp_md5sig_key *
+- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+-{
+- struct tcp_sock *tp = tcp_sk(sk);
+- int i;
+-
+- if (!tp->md5sig_info || !tp->md5sig_info->entries4)
+- return NULL;
+- for (i = 0; i < tp->md5sig_info->entries4; i++) {
+- if (tp->md5sig_info->keys4[i].addr == addr)
+- return &tp->md5sig_info->keys4[i].base;
+- }
+- return NULL;
+-}
+-
+-struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
+- struct sock *addr_sk)
+-{
+- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
+-}
+-
+-EXPORT_SYMBOL(tcp_v4_md5_lookup);
+-
+-static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
+- struct request_sock *req)
+-{
+- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
+-}
+-
+-/* This can be called on a newly created socket, from other files */
+-int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
+- u8 *newkey, u8 newkeylen)
+-{
+- /* Add Key to the list */
+- struct tcp4_md5sig_key *key;
+- struct tcp_sock *tp = tcp_sk(sk);
+- struct tcp4_md5sig_key *keys;
+-
+- key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
+- if (key) {
+- /* Pre-existing entry - just update that one. */
+- kfree(key->base.key);
+- key->base.key = newkey;
+- key->base.keylen = newkeylen;
+- } else {
+- struct tcp_md5sig_info *md5sig;
+-
+- if (!tp->md5sig_info) {
+- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
+- GFP_ATOMIC);
+- if (!tp->md5sig_info) {
+- kfree(newkey);
+- return -ENOMEM;
+- }
+- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+- }
+- if (tcp_alloc_md5sig_pool() == NULL) {
+- kfree(newkey);
+- return -ENOMEM;
+- }
+- md5sig = tp->md5sig_info;
+-
+- if (md5sig->alloced4 == md5sig->entries4) {
+- keys = kmalloc((sizeof(*keys) *
+- (md5sig->entries4 + 1)), GFP_ATOMIC);
+- if (!keys) {
+- kfree(newkey);
+- tcp_free_md5sig_pool();
+- return -ENOMEM;
+- }
+-
+- if (md5sig->entries4)
+- memcpy(keys, md5sig->keys4,
+- sizeof(*keys) * md5sig->entries4);
+-
+- /* Free old key list, and reference new one */
+- if (md5sig->keys4)
+- kfree(md5sig->keys4);
+- md5sig->keys4 = keys;
+- md5sig->alloced4++;
+- }
+- md5sig->entries4++;
+- md5sig->keys4[md5sig->entries4 - 1].addr = addr;
+- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
+- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
+- }
+- return 0;
+-}
+-
+-EXPORT_SYMBOL(tcp_v4_md5_do_add);
+-
+-static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
+- u8 *newkey, u8 newkeylen)
+-{
+- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
+- newkey, newkeylen);
+-}
+-
+-int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
+-{
+- struct tcp_sock *tp = tcp_sk(sk);
+- int i;
+-
+- for (i = 0; i < tp->md5sig_info->entries4; i++) {
+- if (tp->md5sig_info->keys4[i].addr == addr) {
+- /* Free the key */
+- kfree(tp->md5sig_info->keys4[i].base.key);
+- tp->md5sig_info->entries4--;
+-
+- if (tp->md5sig_info->entries4 == 0) {
+- kfree(tp->md5sig_info->keys4);
+- tp->md5sig_info->keys4 = NULL;
+- tp->md5sig_info->alloced4 = 0;
+- } else if (tp->md5sig_info->entries4 != i) {
+- /* Need to do some manipulation */
+- memcpy(&tp->md5sig_info->keys4[i],
+- &tp->md5sig_info->keys4[i+1],
+- (tp->md5sig_info->entries4 - i) *
+- sizeof(struct tcp4_md5sig_key));
+- }
+- tcp_free_md5sig_pool();
+- return 0;
+- }
+- }
+- return -ENOENT;
+-}
+-
+-EXPORT_SYMBOL(tcp_v4_md5_do_del);
+-
+-static void tcp_v4_clear_md5_list(struct sock *sk)
+-{
+- struct tcp_sock *tp = tcp_sk(sk);
+-
+- /* Free each key, then the set of key keys,
+- * the crypto element, and then decrement our
+- * hold on the last resort crypto.
+- */
+- if (tp->md5sig_info->entries4) {
+- int i;
+- for (i = 0; i < tp->md5sig_info->entries4; i++)
+- kfree(tp->md5sig_info->keys4[i].base.key);
+- tp->md5sig_info->entries4 = 0;
+- tcp_free_md5sig_pool();
+- }
+- if (tp->md5sig_info->keys4) {
+- kfree(tp->md5sig_info->keys4);
+- tp->md5sig_info->keys4 = NULL;
+- tp->md5sig_info->alloced4 = 0;
+- }
+-}
+-
+-static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
+- int optlen)
+-{
+- struct tcp_md5sig cmd;
+- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+- u8 *newkey;
+-
+- if (optlen < sizeof(cmd))
+- return -EINVAL;
+-
+- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+- return -EFAULT;
+-
+- if (sin->sin_family != AF_INET)
+- return -EINVAL;
+-
+- if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
+- if (!tcp_sk(sk)->md5sig_info)
+- return -ENOENT;
+- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
+- }
+-
+- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
+- return -EINVAL;
+-
+- if (!tcp_sk(sk)->md5sig_info) {
+- struct tcp_sock *tp = tcp_sk(sk);
+- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
+-
+- if (!p)
+- return -EINVAL;
+-
+- tp->md5sig_info = p;
+- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+- }
+-
+- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+- if (!newkey)
+- return -ENOMEM;
+- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
+- newkey, cmd.tcpm_keylen);
+-}
+-
+-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+- __be32 saddr, __be32 daddr,
+- struct tcphdr *th, int protocol,
+- int tcplen)
+-{
+- struct scatterlist sg[4];
+- __u16 data_len;
+- int block = 0;
+- __sum16 old_checksum;
+- struct tcp_md5sig_pool *hp;
+- struct tcp4_pseudohdr *bp;
+- struct hash_desc *desc;
+- int err;
+- unsigned int nbytes = 0;
+-
+- /*
+- * Okay, so RFC2385 is turned on for this connection,
+- * so we need to generate the MD5 hash for the packet now.
+- */
+-
+- hp = tcp_get_md5sig_pool();
+- if (!hp)
+- goto clear_hash_noput;
+-
+- bp = &hp->md5_blk.ip4;
+- desc = &hp->md5_desc;
+-
+- /*
+- * 1. the TCP pseudo-header (in the order: source IP address,
+- * destination IP address, zero-padded protocol number, and
+- * segment length)
+- */
+- bp->saddr = saddr;
+- bp->daddr = daddr;
+- bp->pad = 0;
+- bp->protocol = protocol;
+- bp->len = htons(tcplen);
+- sg_set_buf(&sg[block++], bp, sizeof(*bp));
+- nbytes += sizeof(*bp);
+-
+- /* 2. the TCP header, excluding options, and assuming a
+- * checksum of zero/
+- */
+- old_checksum = th->check;
+- th->check = 0;
+- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
+- nbytes += sizeof(struct tcphdr);
+-
+- /* 3. the TCP segment data (if any) */
+- data_len = tcplen - (th->doff << 2);
+- if (data_len > 0) {
+- unsigned char *data = (unsigned char *)th + (th->doff << 2);
+- sg_set_buf(&sg[block++], data, data_len);
+- nbytes += data_len;
+- }
+-
+- /* 4. an independently-specified key or password, known to both
+- * TCPs and presumably connection-specific
+- */
+- sg_set_buf(&sg[block++], key->key, key->keylen);
+- nbytes += key->keylen;
+-
+- /* Now store the Hash into the packet */
+- err = crypto_hash_init(desc);
+- if (err)
+- goto clear_hash;
+- err = crypto_hash_update(desc, sg, nbytes);
+- if (err)
+- goto clear_hash;
+- err = crypto_hash_final(desc, md5_hash);
+- if (err)
+- goto clear_hash;
+-
+- /* Reset header, and free up the crypto */
+- tcp_put_md5sig_pool();
+- th->check = old_checksum;
+-
+-out:
+- return 0;
+-clear_hash:
+- tcp_put_md5sig_pool();
+-clear_hash_noput:
+- memset(md5_hash, 0, 16);
+- goto out;
+-}
+-
+-int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
+- struct sock *sk,
+- struct dst_entry *dst,
+- struct request_sock *req,
+- struct tcphdr *th, int protocol,
+- int tcplen)
+-{
+- __be32 saddr, daddr;
+-
+- if (sk) {
+- saddr = inet_sk(sk)->saddr;
+- daddr = inet_sk(sk)->daddr;
+- } else {
+- struct rtable *rt = (struct rtable *)dst;
+- BUG_ON(!rt);
+- saddr = rt->rt_src;
+- daddr = rt->rt_dst;
+- }
+- return tcp_v4_do_calc_md5_hash(md5_hash, key,
+- saddr, daddr,
+- th, protocol, tcplen);
+-}
+-
+-EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
+-
+-static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
+-{
+- /*
+- * This gets called for each TCP segment that arrives
+- * so we want to be efficient.
+- * We have 3 drop cases:
+- * o No MD5 hash and one expected.
+- * o MD5 hash and we're not expecting one.
+- * o MD5 hash and its wrong.
+- */
+- __u8 *hash_location = NULL;
+- struct tcp_md5sig_key *hash_expected;
+- const struct iphdr *iph = ip_hdr(skb);
+- struct tcphdr *th = tcp_hdr(skb);
+- int length = (th->doff << 2) - sizeof(struct tcphdr);
+- int genhash;
+- unsigned char *ptr;
+- unsigned char newhash[16];
+-
+- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+-
+- /*
+- * If the TCP option length is less than the TCP_MD5SIG
+- * option length, then we can shortcut
+- */
+- if (length < TCPOLEN_MD5SIG) {
+- if (hash_expected)
+- return 1;
+- else
+- return 0;
+- }
+-
+- /* Okay, we can't shortcut - we have to grub through the options */
+- ptr = (unsigned char *)(th + 1);
+- while (length > 0) {
+- int opcode = *ptr++;
+- int opsize;
+-
+- switch (opcode) {
+- case TCPOPT_EOL:
+- goto done_opts;
+- case TCPOPT_NOP:
+- length--;
+- continue;
+- default:
+- opsize = *ptr++;
+- if (opsize < 2)
+- goto done_opts;
+- if (opsize > length)
+- goto done_opts;
+-
+- if (opcode == TCPOPT_MD5SIG) {
+- hash_location = ptr;
+- goto done_opts;
+- }
+- }
+- ptr += opsize-2;
+- length -= opsize;
+- }
+-done_opts:
+- /* We've parsed the options - do we have a hash? */
+- if (!hash_expected && !hash_location)
+- return 0;
+-
+- if (hash_expected && !hash_location) {
+- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
+- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
+- NIPQUAD(iph->saddr), ntohs(th->source),
+- NIPQUAD(iph->daddr), ntohs(th->dest));
+- return 1;
+- }
+-
+- if (!hash_expected && hash_location) {
+- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
+- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
+- NIPQUAD(iph->saddr), ntohs(th->source),
+- NIPQUAD(iph->daddr), ntohs(th->dest));
+- return 1;
+- }
+-
+- /* Okay, so this is hash_expected and hash_location -
+- * so we need to calculate the checksum.
+- */
+- genhash = tcp_v4_do_calc_md5_hash(newhash,
+- hash_expected,
+- iph->saddr, iph->daddr,
+- th, sk->sk_protocol,
+- skb->len);
+-
+- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+- if (net_ratelimit()) {
+- printk(KERN_INFO "MD5 Hash failed for "
+- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
+- NIPQUAD(iph->saddr), ntohs(th->source),
+- NIPQUAD(iph->daddr), ntohs(th->dest),
+- genhash ? " tcp_v4_calc_md5_hash failed" : "");
+- }
+- return 1;
+- }
+- return 0;
+-}
+-
+-#endif
+-
+-struct request_sock_ops tcp_request_sock_ops __read_mostly = {
+- .family = PF_INET,
+- .obj_size = sizeof(struct tcp_request_sock),
+- .rtx_syn_ack = tcp_v4_send_synack,
+- .send_ack = tcp_v4_reqsk_send_ack,
+- .destructor = tcp_v4_reqsk_destructor,
+- .send_reset = tcp_v4_send_reset,
+-};
+-
+-#ifdef CONFIG_TCP_MD5SIG
+-static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+- .md5_lookup = tcp_v4_reqsk_md5_lookup,
+-};
+-#endif
+-
+-static struct timewait_sock_ops tcp_timewait_sock_ops = {
+- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
+- .twsk_unique = tcp_twsk_unique,
+- .twsk_destructor= tcp_twsk_destructor,
+-};
+-
+-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+-{
+- struct inet_request_sock *ireq;
+- struct tcp_options_received tmp_opt;
+- struct request_sock *req;
+- __be32 saddr = ip_hdr(skb)->saddr;
+- __be32 daddr = ip_hdr(skb)->daddr;
+- __u32 isn = TCP_SKB_CB(skb)->when;
+- struct dst_entry *dst = NULL;
+-#ifdef CONFIG_SYN_COOKIES
+- int want_cookie = 0;
+-#else
+-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
+-#endif
+-
+- /* Never answer to SYNs send to broadcast or multicast */
+- if (((struct rtable *)skb->dst)->rt_flags &
+- (RTCF_BROADCAST | RTCF_MULTICAST))
+- goto drop;
+-
+- /* TW buckets are converted to open requests without
+- * limitations, they conserve resources and peer is
+- * evidently real one.
+- */
+- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+-#ifdef CONFIG_SYN_COOKIES
+- if (sysctl_tcp_syncookies) {
+- want_cookie = 1;
+- } else
+-#endif
+- goto drop;
+- }
+-
+- /* Accept backlog is full. If we have already queued enough
+- * of warm entries in syn queue, drop request. It is better than
+- * clogging syn queue with openreqs with exponentially increasing
+- * timeout.
+- */
+- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+- goto drop;
+-
+- req = reqsk_alloc(&tcp_request_sock_ops);
+- if (!req)
+- goto drop;
+-
+-#ifdef CONFIG_TCP_MD5SIG
+- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
+-#endif
+-
+- tcp_clear_options(&tmp_opt);
+- tmp_opt.mss_clamp = 536;
+- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
+-
+- tcp_parse_options(skb, &tmp_opt, 0);
+-
+- if (want_cookie) {
+- tcp_clear_options(&tmp_opt);
+- tmp_opt.saw_tstamp = 0;
+- }
+-
+- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
+- /* Some OSes (unknown ones, but I see them on web server, which
+- * contains information interesting only for windows'
+- * users) do not send their stamp in SYN. It is easy case.
+- * We simply do not advertise TS support.
+- */
+- tmp_opt.saw_tstamp = 0;
+- tmp_opt.tstamp_ok = 0;
+- }
+- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+-
+- tcp_openreq_init(req, &tmp_opt, skb);
+-
+- if (security_inet_conn_request(sk, skb, req))
+- goto drop_and_free;
+-
+- ireq = inet_rsk(req);
+- ireq->loc_addr = daddr;
+- ireq->rmt_addr = saddr;
+- ireq->opt = tcp_v4_save_options(sk, skb);
+- if (!want_cookie)
+- TCP_ECN_create_request(req, tcp_hdr(skb));
+-
+- if (want_cookie) {
+-#ifdef CONFIG_SYN_COOKIES
+- syn_flood_warning(skb);
+-#endif
+- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+- } else if (!isn) {
+- struct inet_peer *peer = NULL;
+-
+- /* VJ's idea. We save last timestamp seen
+- * from the destination in peer table, when entering
+- * state TIME-WAIT, and check against it before
+- * accepting new connection request.
+- *
+- * If "isn" is not zero, this request hit alive
+- * timewait bucket, so that all the necessary checks
+- * are made in the function processing timewait state.
+- */
+- if (tmp_opt.saw_tstamp &&
+- tcp_death_row.sysctl_tw_recycle &&
+- (dst = inet_csk_route_req(sk, req)) != NULL &&
+- (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
+- peer->v4daddr == saddr) {
+- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+- (s32)(peer->tcp_ts - req->ts_recent) >
+- TCP_PAWS_WINDOW) {
+- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
+- dst_release(dst);
+- goto drop_and_free;
+- }
+- }
+- /* Kill the following clause, if you dislike this way. */
+- else if (!sysctl_tcp_syncookies &&
+- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+- (sysctl_max_syn_backlog >> 2)) &&
+- (!peer || !peer->tcp_ts_stamp) &&
+- (!dst || !dst_metric(dst, RTAX_RTT))) {
+- /* Without syncookies last quarter of
+- * backlog is filled with destinations,
+- * proven to be alive.
+- * It means that we continue to communicate
+- * to destinations, already remembered
+- * to the moment of synflood.
+- */
+- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
+- "request from %u.%u.%u.%u/%u\n",
+- NIPQUAD(saddr),
+- ntohs(tcp_hdr(skb)->source));
+- dst_release(dst);
+- goto drop_and_free;
+- }
+-
+- isn = tcp_v4_init_sequence(skb);
+- }
+- tcp_rsk(req)->snt_isn = isn;
+-
+- if (tcp_v4_send_synack(sk, req, dst))
+- goto drop_and_free;
+-
+- if (want_cookie) {
+- reqsk_free(req);
+- } else {
+- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+- }
+- return 0;
+-
+-drop_and_free:
+- reqsk_free(req);
+-drop:
+- return 0;
+-}
+-
+-
+-/*
+- * The three way handshake has completed - we got a valid synack -
+- * now create the new socket.
+- */
+-struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+- struct request_sock *req,
+- struct dst_entry *dst)
+-{
+- struct inet_request_sock *ireq;
+- struct inet_sock *newinet;
+- struct tcp_sock *newtp;
+- struct sock *newsk;
+-#ifdef CONFIG_TCP_MD5SIG
+- struct tcp_md5sig_key *key;
+-#endif
+-
+- if (sk_acceptq_is_full(sk))
+- goto exit_overflow;
+-
+- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
+- goto exit;
+-
+- newsk = tcp_create_openreq_child(sk, req, skb);
+- if (!newsk)
+- goto exit;
+-
+- newsk->sk_gso_type = SKB_GSO_TCPV4;
+- sk_setup_caps(newsk, dst);
+-
+- newtp = tcp_sk(newsk);
+- newinet = inet_sk(newsk);
+- ireq = inet_rsk(req);
+- newinet->daddr = ireq->rmt_addr;
+- newinet->rcv_saddr = ireq->loc_addr;
+- newinet->saddr = ireq->loc_addr;
+- newinet->opt = ireq->opt;
+- ireq->opt = NULL;
+- newinet->mc_index = inet_iif(skb);
+- newinet->mc_ttl = ip_hdr(skb)->ttl;
+- inet_csk(newsk)->icsk_ext_hdr_len = 0;
+- if (newinet->opt)
+- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
+- newinet->id = newtp->write_seq ^ jiffies;
+-
+- tcp_mtup_init(newsk);
+- tcp_sync_mss(newsk, dst_mtu(dst));
+- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+- tcp_initialize_rcv_mss(newsk);
+-
+-#ifdef CONFIG_TCP_MD5SIG
+- /* Copy over the MD5 key from the original socket */
+- if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
+- /*
+- * We're using one, so create a matching key
+- * on the newsk structure. If we fail to get
+- * memory, then we end up not copying the key
+- * across. Shucks.
+- */
+- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
+- if (newkey != NULL)
+- tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
+- newkey, key->keylen);
+- }
+-#endif
+-
+- __inet_hash(&tcp_hashinfo, newsk, 0);
+- __inet_inherit_port(&tcp_hashinfo, sk, newsk);
+-
+- return newsk;
+-
+-exit_overflow:
+- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+-exit:
+- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+- dst_release(dst);
+- return NULL;
+-}
+-
+-static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+-{
+- struct tcphdr *th = tcp_hdr(skb);
+- const struct iphdr *iph = ip_hdr(skb);
+- struct sock *nsk;
+- struct request_sock **prev;
+- /* Find possible connection requests. */
+- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
+- iph->saddr, iph->daddr);
+- if (req)
+- return tcp_check_req(sk, skb, req, prev);
+-
+- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+- iph->daddr, th->dest, inet_iif(skb));
+-
+- if (nsk) {
+- if (nsk->sk_state != TCP_TIME_WAIT) {
+- bh_lock_sock(nsk);
+- return nsk;
+- }
+- inet_twsk_put(inet_twsk(nsk));
+- return NULL;
+- }
+-
+-#ifdef CONFIG_SYN_COOKIES
+- if (!th->rst && !th->syn && th->ack)
+- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
+-#endif
+- return sk;
+-}
+-
+-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
+-{
+- const struct iphdr *iph = ip_hdr(skb);
+-
+- if (skb->ip_summed == CHECKSUM_COMPLETE) {
+- if (!tcp_v4_check(skb->len, iph->saddr,
+- iph->daddr, skb->csum)) {
+- skb->ip_summed = CHECKSUM_UNNECESSARY;
+- return 0;
+- }
+- }
+-
+- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+- skb->len, IPPROTO_TCP, 0);
+-
+- if (skb->len <= 76) {
+- return __skb_checksum_complete(skb);
+- }
+- return 0;
+-}
+-
+-
+-/* The socket must have it's spinlock held when we get
+- * here.
+- *
+- * We have a potential double-lock case here, so even when
+- * doing backlog processing we use the BH locking scheme.
+- * This is because we cannot sleep with the original spinlock
+- * held.
+- */
+-int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+-{
+- struct sock *rsk;
+-#ifdef CONFIG_TCP_MD5SIG
+- /*
+- * We really want to reject the packet as early as possible
+- * if:
+- * o We're expecting an MD5'd packet and this is no MD5 tcp option
+- * o There is an MD5 option and we're not expecting one
+- */
+- if (tcp_v4_inbound_md5_hash(sk, skb))
+- goto discard;
+-#endif
+-
+- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+- TCP_CHECK_TIMER(sk);
+- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
+- rsk = sk;
+- goto reset;
+- }
+- TCP_CHECK_TIMER(sk);
+- return 0;
+- }
+-
+- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
+- goto csum_err;
+-
+- if (sk->sk_state == TCP_LISTEN) {
+- struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+- if (!nsk)
+- goto discard;
+-
+- if (nsk != sk) {
+- if (tcp_child_process(sk, nsk, skb)) {
+- rsk = nsk;
+- goto reset;
+- }
+- return 0;
+- }
+- }
+-
+- TCP_CHECK_TIMER(sk);
+- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
+- rsk = sk;
+- goto reset;
+- }
+- TCP_CHECK_TIMER(sk);
+- return 0;
+-
+-reset:
+- tcp_v4_send_reset(rsk, skb);
+-discard:
+- kfree_skb(skb);
+- /* Be careful here. If this function gets more complicated and
+- * gcc suffers from register pressure on the x86, sk (in %ebx)
+- * might be destroyed here. This current version compiles correctly,
+- * but you have been warned.
+- */
+- return 0;
+-
+-csum_err:
+- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+- goto discard;
+-}
+-
+-/*
+- * From tcp_input.c
+- */
+-
+-int tcp_v4_rcv(struct sk_buff *skb)
+-{
+- const struct iphdr *iph;
+- struct tcphdr *th;
+- struct sock *sk;
+- int ret;
+-
+- if (skb->pkt_type != PACKET_HOST)
+- goto discard_it;
+-
+- /* Count it even if it's bad */
+- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+-
+- if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
+- goto discard_it;
+-
+- th = tcp_hdr(skb);
+-
+- if (th->doff < sizeof(struct tcphdr) / 4)
+- goto bad_packet;
+- if (!pskb_may_pull(skb, th->doff * 4))
+- goto discard_it;
+-
+- /* An explanation is required here, I think.
+- * Packet length and doff are validated by header prediction,
+- * provided case of th->doff==0 is eliminated.
+- * So, we defer the checks. */
+- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
+- goto bad_packet;
+-
+- th = tcp_hdr(skb);
+- iph = ip_hdr(skb);
+- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+- skb->len - th->doff * 4);
+- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+- TCP_SKB_CB(skb)->when = 0;
+- TCP_SKB_CB(skb)->flags = iph->tos;
+- TCP_SKB_CB(skb)->sacked = 0;
+-
+- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+- iph->daddr, th->dest, inet_iif(skb));
+- if (!sk)
+- goto no_tcp_socket;
+-
+-process:
+- if (sk->sk_state == TCP_TIME_WAIT)
+- goto do_time_wait;
+-
+- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+- goto discard_and_relse;
+- nf_reset(skb);
+-
+- if (sk_filter(sk, skb))
+- goto discard_and_relse;
+-
+- skb->dev = NULL;
+-
+- bh_lock_sock_nested(sk);
+- ret = 0;
+- if (!sock_owned_by_user(sk)) {
+-#ifdef CONFIG_NET_DMA
+- struct tcp_sock *tp = tcp_sk(sk);
+- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+- tp->ucopy.dma_chan = get_softnet_dma();
+- if (tp->ucopy.dma_chan)
+- ret = tcp_v4_do_rcv(sk, skb);
+- else
+-#endif
+- {
+- if (!tcp_prequeue(sk, skb))
+- ret = tcp_v4_do_rcv(sk, skb);
+- }
+- } else
+- sk_add_backlog(sk, skb);
+- bh_unlock_sock(sk);
+-
+- sock_put(sk);
+-
+- return ret;
+-
+-no_tcp_socket:
+- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+- goto discard_it;
+-
+- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+-bad_packet:
+- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+- } else {
+- tcp_v4_send_reset(NULL, skb);
+- }
+-
+-discard_it:
+- /* Discard frame. */
+- kfree_skb(skb);
+- return 0;
+-
+-discard_and_relse:
+- sock_put(sk);
+- goto discard_it;
+-
+-do_time_wait:
+- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+- inet_twsk_put(inet_twsk(sk));
+- goto discard_it;
+- }
+-
+- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+- inet_twsk_put(inet_twsk(sk));
+- goto discard_it;
+- }
+- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+- case TCP_TW_SYN: {
+- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+- iph->daddr, th->dest,
+- inet_iif(skb));
+- if (sk2) {
+- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+- inet_twsk_put(inet_twsk(sk));
+- sk = sk2;
+- goto process;
+- }
+- /* Fall through to ACK */
+- }
+- case TCP_TW_ACK:
+- tcp_v4_timewait_ack(sk, skb);
+- break;
+- case TCP_TW_RST:
+- goto no_tcp_socket;
+- case TCP_TW_SUCCESS:;
+- }
+- goto discard_it;
+-}
+-
+-/* VJ's idea. Save last timestamp seen from this destination
+- * and hold it at least for normal timewait interval to use for duplicate
+- * segment detection in subsequent connections, before they enter synchronized
+- * state.
+- */
+-
+-int tcp_v4_remember_stamp(struct sock *sk)
+-{
+- struct inet_sock *inet = inet_sk(sk);
+- struct tcp_sock *tp = tcp_sk(sk);
+- struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
+- struct inet_peer *peer = NULL;
+- int release_it = 0;
+-
+- if (!rt || rt->rt_dst != inet->daddr) {
+- peer = inet_getpeer(inet->daddr, 1);
+- release_it = 1;
+- } else {
+- if (!rt->peer)
+- rt_bind_peer(rt, 1);
+- peer = rt->peer;
+- }
+-
+- if (peer) {
+- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
+- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
+- peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
+- peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
+- peer->tcp_ts = tp->rx_opt.ts_recent;
+- }
+- if (release_it)
+- inet_putpeer(peer);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
+-{
+- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
+-
+- if (peer) {
+- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+-
+- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
+- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
+- peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
+- peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
+- peer->tcp_ts = tcptw->tw_ts_recent;
+- }
+- inet_putpeer(peer);
+- return 1;
+- }
+-
+- return 0;
+-}
+-
+-struct inet_connection_sock_af_ops ipv4_specific = {
+- .queue_xmit = ip_queue_xmit,
+- .send_check = tcp_v4_send_check,
+- .rebuild_header = inet_sk_rebuild_header,
+- .conn_request = tcp_v4_conn_request,
+- .syn_recv_sock = tcp_v4_syn_recv_sock,
+- .remember_stamp = tcp_v4_remember_stamp,
+- .net_header_len = sizeof(struct iphdr),
+- .setsockopt = ip_setsockopt,
+- .getsockopt = ip_getsockopt,
+- .addr2sockaddr = inet_csk_addr2sockaddr,
+- .sockaddr_len = sizeof(struct sockaddr_in),
+-#ifdef CONFIG_COMPAT
+- .compat_setsockopt = compat_ip_setsockopt,
+- .compat_getsockopt = compat_ip_getsockopt,
+-#endif
+-};
+-
+-#ifdef CONFIG_TCP_MD5SIG
+-static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+- .md5_lookup = tcp_v4_md5_lookup,
+- .calc_md5_hash = tcp_v4_calc_md5_hash,
+- .md5_add = tcp_v4_md5_add_func,
+- .md5_parse = tcp_v4_parse_md5_keys,
+-};
+-#endif
+-
+-/* NOTE: A lot of things set to zero explicitly by call to
+- * sk_alloc() so need not be done here.
+- */
+-static int tcp_v4_init_sock(struct sock *sk)
+-{
+- struct inet_connection_sock *icsk = inet_csk(sk);
+- struct tcp_sock *tp = tcp_sk(sk);
+-
+- skb_queue_head_init(&tp->out_of_order_queue);
+- tcp_init_xmit_timers(sk);
+- tcp_prequeue_init(tp);
+-
+- icsk->icsk_rto = TCP_TIMEOUT_INIT;
+- tp->mdev = TCP_TIMEOUT_INIT;
+-
+- /* So many TCP implementations out there (incorrectly) count the
+- * initial SYN frame in their delayed-ACK and congestion control
+- * algorithms that we must have the following bandaid to talk
+- * efficiently to them. -DaveM
+- */
+- tp->snd_cwnd = 2;
+-
+- /* See draft-stevens-tcpca-spec-01 for discussion of the
+- * initialization of these values.
+- */
+- tp->snd_ssthresh = 0x7fffffff; /* Infinity */
+- tp->snd_cwnd_clamp = ~0;
+- tp->mss_cache = 536;
+-
+- tp->reordering = sysctl_tcp_reordering;
+- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+-
+- sk->sk_state = TCP_CLOSE;
+-
+- sk->sk_write_space = sk_stream_write_space;
+- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+-
+- icsk->icsk_af_ops = &ipv4_specific;
+- icsk->icsk_sync_mss = tcp_sync_mss;
+-#ifdef CONFIG_TCP_MD5SIG
+- tp->af_specific = &tcp_sock_ipv4_specific;
+-#endif
+-
+- sk->sk_sndbuf = sysctl_tcp_wmem[1];
+- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+-
+- atomic_inc(&tcp_sockets_allocated);
+-
+- return 0;
+-}
+-
+-int tcp_v4_destroy_sock(struct sock *sk)
+-{
+- struct tcp_sock *tp = tcp_sk(sk);
+-
+- tcp_clear_xmit_timers(sk);
+-
+- tcp_cleanup_congestion_control(sk);
+-
+- /* Cleanup up the write buffer. */
+- tcp_write_queue_purge(sk);
+-
+- /* Cleans up our, hopefully empty, out_of_order_queue. */
+- __skb_queue_purge(&tp->out_of_order_queue);
+-
+-#ifdef CONFIG_TCP_MD5SIG
+- /* Clean up the MD5 key list, if any */
+- if (tp->md5sig_info) {
+- tcp_v4_clear_md5_list(sk);
+- kfree(tp->md5sig_info);
+- tp->md5sig_info = NULL;
+- }
+-#endif
+-
+-#ifdef CONFIG_NET_DMA
+- /* Cleans up our sk_async_wait_queue */
+- __skb_queue_purge(&sk->sk_async_wait_queue);
+-#endif
+-
+- /* Clean prequeue, it must be empty really */
+- __skb_queue_purge(&tp->ucopy.prequeue);
+-
+- /* Clean up a referenced TCP bind bucket. */
+- if (inet_csk(sk)->icsk_bind_hash)
+- inet_put_port(&tcp_hashinfo, sk);
+-
+- /*
+- * If sendmsg cached page exists, toss it.
+- */
+- if (sk->sk_sndmsg_page) {
+- __free_page(sk->sk_sndmsg_page);
+- sk->sk_sndmsg_page = NULL;
+- }
+-
+- atomic_dec(&tcp_sockets_allocated);
+-
+- return 0;
+-}
+-
+-EXPORT_SYMBOL(tcp_v4_destroy_sock);
+-
+-#ifdef CONFIG_PROC_FS
+-/* Proc filesystem TCP sock list dumping. */
+-
+-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+-{
+- return hlist_empty(head) ? NULL :
+- list_entry(head->first, struct inet_timewait_sock, tw_node);
+-}
+-
+-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
+-{
+- return tw->tw_node.next ?
+- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+-}
+-
+-static void *listening_get_next(struct seq_file *seq, void *cur)
+-{
+- struct inet_connection_sock *icsk;
+- struct hlist_node *node;
+- struct sock *sk = cur;
+- struct tcp_iter_state* st = seq->private;
+-
+- if (!sk) {
+- st->bucket = 0;
+- sk = sk_head(&tcp_hashinfo.listening_hash[0]);
+- goto get_sk;
+- }
+-
+- ++st->num;
+-
+- if (st->state == TCP_SEQ_STATE_OPENREQ) {
+- struct request_sock *req = cur;
+-
+- icsk = inet_csk(st->syn_wait_sk);
+- req = req->dl_next;
+- while (1) {
+- while (req) {
+- if (req->rsk_ops->family == st->family) {
+- cur = req;
+- goto out;
+- }
+- req = req->dl_next;
+- }
+- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
+- break;
+-get_req:
+- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
+- }
+- sk = sk_next(st->syn_wait_sk);
+- st->state = TCP_SEQ_STATE_LISTENING;
+- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- } else {
+- icsk = inet_csk(sk);
+- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- if (reqsk_queue_len(&icsk->icsk_accept_queue))
+- goto start_req;
+- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- sk = sk_next(sk);
+- }
+-get_sk:
+- sk_for_each_from(sk, node) {
+- if (sk->sk_family == st->family) {
+- cur = sk;
+- goto out;
+- }
+- icsk = inet_csk(sk);
+- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
+-start_req:
+- st->uid = sock_i_uid(sk);
+- st->syn_wait_sk = sk;
+- st->state = TCP_SEQ_STATE_OPENREQ;
+- st->sbucket = 0;
+- goto get_req;
+- }
+- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- }
+- if (++st->bucket < INET_LHTABLE_SIZE) {
+- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
+- goto get_sk;
+- }
+- cur = NULL;
+-out:
+- return cur;
+-}
+-
+-static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
+-{
+- void *rc = listening_get_next(seq, NULL);
+-
+- while (rc && *pos) {
+- rc = listening_get_next(seq, rc);
+- --*pos;
+- }
+- return rc;
+-}
+-
+-static void *established_get_first(struct seq_file *seq)
+-{
+- struct tcp_iter_state* st = seq->private;
+- void *rc = NULL;
+-
+- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
+- struct sock *sk;
+- struct hlist_node *node;
+- struct inet_timewait_sock *tw;
+-
+- /* We can reschedule _before_ having picked the target: */
+- cond_resched_softirq();
+-
+- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+- if (sk->sk_family != st->family) {
+- continue;
+- }
+- rc = sk;
+- goto out;
+- }
+- st->state = TCP_SEQ_STATE_TIME_WAIT;
+- inet_twsk_for_each(tw, node,
+- &tcp_hashinfo.ehash[st->bucket].twchain) {
+- if (tw->tw_family != st->family) {
+- continue;
+- }
+- rc = tw;
+- goto out;
+- }
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+- st->state = TCP_SEQ_STATE_ESTABLISHED;
+- }
+-out:
+- return rc;
+-}
+-
+-static void *established_get_next(struct seq_file *seq, void *cur)
+-{
+- struct sock *sk = cur;
+- struct inet_timewait_sock *tw;
+- struct hlist_node *node;
+- struct tcp_iter_state* st = seq->private;
+-
+- ++st->num;
+-
+- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+- tw = cur;
+- tw = tw_next(tw);
+-get_tw:
+- while (tw && tw->tw_family != st->family) {
+- tw = tw_next(tw);
+- }
+- if (tw) {
+- cur = tw;
+- goto out;
+- }
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+- st->state = TCP_SEQ_STATE_ESTABLISHED;
+-
+- /* We can reschedule between buckets: */
+- cond_resched_softirq();
+-
+- if (++st->bucket < tcp_hashinfo.ehash_size) {
+- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
+- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+- } else {
+- cur = NULL;
+- goto out;
+- }
+- } else
+- sk = sk_next(sk);
+-
+- sk_for_each_from(sk, node) {
+- if (sk->sk_family == st->family)
+- goto found;
+- }
+-
+- st->state = TCP_SEQ_STATE_TIME_WAIT;
+- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
+- goto get_tw;
+-found:
+- cur = sk;
+-out:
+- return cur;
+-}
+-
+-static void *established_get_idx(struct seq_file *seq, loff_t pos)
+-{
+- void *rc = established_get_first(seq);
+-
+- while (rc && pos) {
+- rc = established_get_next(seq, rc);
+- --pos;
+- }
+- return rc;
+-}
+-
+-static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
+-{
+- void *rc;
+- struct tcp_iter_state* st = seq->private;
+-
+- inet_listen_lock(&tcp_hashinfo);
+- st->state = TCP_SEQ_STATE_LISTENING;
+- rc = listening_get_idx(seq, &pos);
+-
+- if (!rc) {
+- inet_listen_unlock(&tcp_hashinfo);
+- local_bh_disable();
+- st->state = TCP_SEQ_STATE_ESTABLISHED;
+- rc = established_get_idx(seq, pos);
+- }
+-
+- return rc;
+-}
+-
+-static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+- struct tcp_iter_state* st = seq->private;
+- st->state = TCP_SEQ_STATE_LISTENING;
+- st->num = 0;
+- return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+-}
+-
+-static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+- void *rc = NULL;
+- struct tcp_iter_state* st;
+-
+- if (v == SEQ_START_TOKEN) {
+- rc = tcp_get_idx(seq, 0);
+- goto out;
+- }
+- st = seq->private;
+-
+- switch (st->state) {
+- case TCP_SEQ_STATE_OPENREQ:
+- case TCP_SEQ_STATE_LISTENING:
+- rc = listening_get_next(seq, v);
+- if (!rc) {
+- inet_listen_unlock(&tcp_hashinfo);
+- local_bh_disable();
+- st->state = TCP_SEQ_STATE_ESTABLISHED;
+- rc = established_get_first(seq);
+- }
+- break;
+- case TCP_SEQ_STATE_ESTABLISHED:
+- case TCP_SEQ_STATE_TIME_WAIT:
+- rc = established_get_next(seq, v);
+- break;
+- }
+-out:
+- ++*pos;
+- return rc;
+-}
+-
+-static void tcp_seq_stop(struct seq_file *seq, void *v)
+-{
+- struct tcp_iter_state* st = seq->private;
+-
+- switch (st->state) {
+- case TCP_SEQ_STATE_OPENREQ:
+- if (v) {
+- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
+- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+- }
+- case TCP_SEQ_STATE_LISTENING:
+- if (v != SEQ_START_TOKEN)
+- inet_listen_unlock(&tcp_hashinfo);
+- break;
+- case TCP_SEQ_STATE_TIME_WAIT:
+- case TCP_SEQ_STATE_ESTABLISHED:
+- if (v)
+- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+- local_bh_enable();
+- break;
+- }
+-}
+-
+-static int tcp_seq_open(struct inode *inode, struct file *file)
+-{
+- struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
+- struct seq_file *seq;
+- struct tcp_iter_state *s;
+- int rc;
+-
+- if (unlikely(afinfo == NULL))
+- return -EINVAL;
+-
+- s = kzalloc(sizeof(*s), GFP_KERNEL);
+- if (!s)
+- return -ENOMEM;
+- s->family = afinfo->family;
+- s->seq_ops.start = tcp_seq_start;
+- s->seq_ops.next = tcp_seq_next;
+- s->seq_ops.show = afinfo->seq_show;
+- s->seq_ops.stop = tcp_seq_stop;
+-
+- rc = seq_open(file, &s->seq_ops);
+- if (rc)
+- goto out_kfree;
+- seq = file->private_data;
+- seq->private = s;
+-out:
+- return rc;
+-out_kfree:
+- kfree(s);
+- goto out;
+-}
+-
+-int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
+-{
+- int rc = 0;
+- struct proc_dir_entry *p;
+-
+- if (!afinfo)
+- return -EINVAL;
+- afinfo->seq_fops->owner = afinfo->owner;
+- afinfo->seq_fops->open = tcp_seq_open;
+- afinfo->seq_fops->read = seq_read;
+- afinfo->seq_fops->llseek = seq_lseek;
+- afinfo->seq_fops->release = seq_release_private;
+-
+- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
+- if (p)
+- p->data = afinfo;
+- else
+- rc = -ENOMEM;
+- return rc;
+-}
+-
+-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
+-{
+- if (!afinfo)
+- return;
+- proc_net_remove(afinfo->name);
+- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+-}
+-
+-static void get_openreq4(struct sock *sk, struct request_sock *req,
+- char *tmpbuf, int i, int uid)
+-{
+- const struct inet_request_sock *ireq = inet_rsk(req);
+- int ttd = req->expires - jiffies;
+-
+- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
+- i,
+- ireq->loc_addr,
+- ntohs(inet_sk(sk)->sport),
+- ireq->rmt_addr,
+- ntohs(ireq->rmt_port),
+- TCP_SYN_RECV,
+- 0, 0, /* could print option size, but that is af dependent. */
+- 1, /* timers active (only the expire timer) */
+- jiffies_to_clock_t(ttd),
+- req->retrans,
+- uid,
+- 0, /* non standard timer */
+- 0, /* open_requests have no inode */
+- atomic_read(&sk->sk_refcnt),
+- req);
+-}
+-
+-static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
+-{
+- int timer_active;
+- unsigned long timer_expires;
+- struct tcp_sock *tp = tcp_sk(sk);
+- const struct inet_connection_sock *icsk = inet_csk(sk);
+- struct inet_sock *inet = inet_sk(sk);
+- __be32 dest = inet->daddr;
+- __be32 src = inet->rcv_saddr;
+- __u16 destp = ntohs(inet->dport);
+- __u16 srcp = ntohs(inet->sport);
+-
+- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+- timer_active = 1;
+- timer_expires = icsk->icsk_timeout;
+- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+- timer_active = 4;
+- timer_expires = icsk->icsk_timeout;
+- } else if (timer_pending(&sk->sk_timer)) {
+- timer_active = 2;
+- timer_expires = sk->sk_timer.expires;
+- } else {
+- timer_active = 0;
+- timer_expires = jiffies;
+- }
+-
+- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
+- "%08X %5d %8d %lu %d %p %u %u %u %u %d",
+- i, src, srcp, dest, destp, sk->sk_state,
+- tp->write_seq - tp->snd_una,
+- sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
+- (tp->rcv_nxt - tp->copied_seq),
+- timer_active,
+- jiffies_to_clock_t(timer_expires - jiffies),
+- icsk->icsk_retransmits,
+- sock_i_uid(sk),
+- icsk->icsk_probes_out,
+- sock_i_ino(sk),
+- atomic_read(&sk->sk_refcnt), sk,
+- icsk->icsk_rto,
+- icsk->icsk_ack.ato,
+- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+- tp->snd_cwnd,
+- tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
+-}
+-
+-static void get_timewait4_sock(struct inet_timewait_sock *tw,
+- char *tmpbuf, int i)
+-{
+- __be32 dest, src;
+- __u16 destp, srcp;
+- int ttd = tw->tw_ttd - jiffies;
+-
+- if (ttd < 0)
+- ttd = 0;
+-
+- dest = tw->tw_daddr;
+- src = tw->tw_rcv_saddr;
+- destp = ntohs(tw->tw_dport);
+- srcp = ntohs(tw->tw_sport);
+-
+- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
+- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
+- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+- atomic_read(&tw->tw_refcnt), tw);
+-}
+-
+-#define TMPSZ 150
+-
+-static int tcp4_seq_show(struct seq_file *seq, void *v)
+-{
+- struct tcp_iter_state* st;
+- char tmpbuf[TMPSZ + 1];
+-
+- if (v == SEQ_START_TOKEN) {
+- seq_printf(seq, "%-*s\n", TMPSZ - 1,
+- " sl local_address rem_address st tx_queue "
+- "rx_queue tr tm->when retrnsmt uid timeout "
+- "inode");
+- goto out;
+- }
+- st = seq->private;
+-
+- switch (st->state) {
+- case TCP_SEQ_STATE_LISTENING:
+- case TCP_SEQ_STATE_ESTABLISHED:
+- get_tcp4_sock(v, tmpbuf, st->num);
+- break;
+- case TCP_SEQ_STATE_OPENREQ:
+- get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
+- break;
+- case TCP_SEQ_STATE_TIME_WAIT:
+- get_timewait4_sock(v, tmpbuf, st->num);
+- break;
+- }
+- seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
+-out:
+- return 0;
+-}
+-
+-static struct file_operations tcp4_seq_fops;
+-static struct tcp_seq_afinfo tcp4_seq_afinfo = {
+- .owner = THIS_MODULE,
+- .name = "tcp",
+- .family = AF_INET,
+- .seq_show = tcp4_seq_show,
+- .seq_fops = &tcp4_seq_fops,
+-};
+-
+-int __init tcp4_proc_init(void)
+-{
+- return tcp_proc_register(&tcp4_seq_afinfo);
+-}
+-
+-void tcp4_proc_exit(void)
+-{
+- tcp_proc_unregister(&tcp4_seq_afinfo);
+-}
+-#endif /* CONFIG_PROC_FS */
+-
+-struct proto tcp_prot = {
+- .name = "TCP",
+- .owner = THIS_MODULE,
+- .close = tcp_close,
+- .connect = tcp_v4_connect,
+- .disconnect = tcp_disconnect,
+- .accept = inet_csk_accept,
+- .ioctl = tcp_ioctl,
+- .init = tcp_v4_init_sock,
+- .destroy = tcp_v4_destroy_sock,
+- .shutdown = tcp_shutdown,
+- .setsockopt = tcp_setsockopt,
+- .getsockopt = tcp_getsockopt,
+- .recvmsg = tcp_recvmsg,
+- .backlog_rcv = tcp_v4_do_rcv,
+- .hash = tcp_v4_hash,
+- .unhash = tcp_unhash,
+- .get_port = tcp_v4_get_port,
+- .enter_memory_pressure = tcp_enter_memory_pressure,
+- .sockets_allocated = &tcp_sockets_allocated,
+- .orphan_count = &tcp_orphan_count,
+- .memory_allocated = &tcp_memory_allocated,
+- .memory_pressure = &tcp_memory_pressure,
+- .sysctl_mem = sysctl_tcp_mem,
+- .sysctl_wmem = sysctl_tcp_wmem,
+- .sysctl_rmem = sysctl_tcp_rmem,
+- .max_header = MAX_TCP_HEADER,
+- .obj_size = sizeof(struct tcp_sock),
+- .twsk_prot = &tcp_timewait_sock_ops,
+- .rsk_prot = &tcp_request_sock_ops,
+-#ifdef CONFIG_COMPAT
+- .compat_setsockopt = compat_tcp_setsockopt,
+- .compat_getsockopt = compat_tcp_getsockopt,
+-#endif
+-};
+-
+-void __init tcp_v4_init(struct net_proto_family *ops)
+-{
+- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
+- IPPROTO_TCP) < 0)
+- panic("Failed to create the TCP control socket.\n");
+-}
+-
+-EXPORT_SYMBOL(ipv4_specific);
+-EXPORT_SYMBOL(tcp_hashinfo);
+-EXPORT_SYMBOL(tcp_prot);
+-EXPORT_SYMBOL(tcp_unhash);
+-EXPORT_SYMBOL(tcp_v4_conn_request);
+-EXPORT_SYMBOL(tcp_v4_connect);
+-EXPORT_SYMBOL(tcp_v4_do_rcv);
+-EXPORT_SYMBOL(tcp_v4_remember_stamp);
+-EXPORT_SYMBOL(tcp_v4_send_check);
+-EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
+-
+-#ifdef CONFIG_PROC_FS
+-EXPORT_SYMBOL(tcp_proc_register);
+-EXPORT_SYMBOL(tcp_proc_unregister);
+-#endif
+-EXPORT_SYMBOL(sysctl_local_port_range);
+-EXPORT_SYMBOL(sysctl_tcp_low_latency);
+-
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-591/net/ipv4/tcp_output.c
+--- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/tcp_output.c 2007-12-21 15:36:15.000000000 -0500
+@@ -432,11 +432,11 @@
+ sysctl_flags = 0;
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+- if (sysctl_tcp_timestamps) {
++ if (sk->sk_net->sysctl_tcp_timestamps) {
+ tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+ }
+- if (sysctl_tcp_window_scaling) {
++ if (sk->sk_net->sysctl_tcp_window_scaling) {
+ tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+ sysctl_flags |= SYSCTL_FLAG_WSCALE;
+ }
+@@ -2215,7 +2215,7 @@
+ * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+ */
+ tp->tcp_header_len = sizeof(struct tcphdr) +
+- (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
++ (sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+ #ifdef CONFIG_TCP_MD5SIG
+ if (tp->af_specific->md5_lookup(sk, sk) != NULL)
+@@ -2238,7 +2238,7 @@
+ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+ &tp->rcv_wnd,
+ &tp->window_clamp,
+- sysctl_tcp_window_scaling,
++ sk->sk_net->sysctl_tcp_window_scaling,
+ &rcv_wscale);
+
+ tp->rx_opt.rcv_wscale = rcv_wscale;
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_probe.c linux-2.6.22-591/net/ipv4/tcp_probe.c
+--- linux-2.6.22-570/net/ipv4/tcp_probe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/tcp_probe.c 2007-12-21 15:36:15.000000000 -0500
+@@ -172,7 +172,7 @@
+ if (IS_ERR(tcpw.fifo))
+ return PTR_ERR(tcpw.fifo);
+
+- if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
++ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops))
+ goto err0;
+
+ ret = register_jprobe(&tcp_probe);
+@@ -182,7 +182,7 @@
+ pr_info("TCP watch registered (port=%d)\n", port);
+ return 0;
+ err1:
+- proc_net_remove(procname);
++ proc_net_remove(&init_net, procname);
+ err0:
+ kfifo_free(tcpw.fifo);
+ return ret;
+@@ -192,7 +192,7 @@
+ static __exit void tcpprobe_exit(void)
+ {
+ kfifo_free(tcpw.fifo);
+- proc_net_remove(procname);
++ proc_net_remove(&init_net, procname);
+ unregister_jprobe(&tcp_probe);
+
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/tunnel4.c linux-2.6.22-591/net/ipv4/tunnel4.c
+--- linux-2.6.22-570/net/ipv4/tunnel4.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/tunnel4.c 2007-12-21 15:36:15.000000000 -0500
+@@ -75,6 +75,10 @@
+ {
+ struct xfrm_tunnel *handler;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+@@ -113,6 +117,9 @@
+ {
+ struct xfrm_tunnel *handler;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ for (handler = tunnel4_handlers; handler; handler = handler->next)
+ if (!handler->err_handler(skb, info))
+ break;
+diff -Nurb linux-2.6.22-570/net/ipv4/udp.c linux-2.6.22-591/net/ipv4/udp.c
+--- linux-2.6.22-570/net/ipv4/udp.c 2007-12-21 15:36:02.000000000 -0500
++++ linux-2.6.22-591/net/ipv4/udp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -101,6 +101,7 @@
+ #include <net/route.h>
+ #include <net/checksum.h>
+ #include <net/xfrm.h>
++#include <net/net_namespace.h>
+ #include "udp_impl.h"
+
+ /*
+@@ -112,16 +113,17 @@
+ struct hlist_head udp_hash[UDP_HTABLE_SIZE];
+ DEFINE_RWLOCK(udp_hash_lock);
+
+-static int udp_port_rover;
+-
+-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
++static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[])
+ {
+ struct sock *sk;
+ struct hlist_node *node;
+
+- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
++ sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) {
++ if (sk->sk_net != net)
++ continue;
+ if (sk->sk_hash == num)
+ return 1;
++ }
+ return 0;
+ }
+
+@@ -148,9 +150,9 @@
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+
+- if (*port_rover > sysctl_local_port_range[1] ||
+- *port_rover < sysctl_local_port_range[0])
+- *port_rover = sysctl_local_port_range[0];
++ if (*port_rover > sk->sk_net->sysctl_local_port_range[1] ||
++ *port_rover < sk->sk_net->sysctl_local_port_range[0])
++ *port_rover = sk->sk_net->sysctl_local_port_range[0];
+ best_size_so_far = 32767;
+ best = result = *port_rover;
+ for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+@@ -158,9 +160,9 @@
+
+ head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+ if (hlist_empty(head)) {
+- if (result > sysctl_local_port_range[1])
+- result = sysctl_local_port_range[0] +
+- ((result - sysctl_local_port_range[0]) &
++ if (result > sk->sk_net->sysctl_local_port_range[1])
++ result = sk->sk_net->sysctl_local_port_range[0] +
++ ((result - sk->sk_net->sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ goto gotit;
+ }
+@@ -177,11 +179,11 @@
+ result = best;
+ for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+ i++, result += UDP_HTABLE_SIZE) {
+- if (result > sysctl_local_port_range[1])
+- result = sysctl_local_port_range[0]
+- + ((result - sysctl_local_port_range[0]) &
++ if (result > sk->sk_net->sysctl_local_port_range[1])
++ result = sk->sk_net->sysctl_local_port_range[0]
++ + ((result - sk->sk_net->sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+- if (! __udp_lib_lport_inuse(result, udptable))
++ if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable))
+ break;
+ }
+ if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -194,6 +196,7 @@
+ sk_for_each(sk2, node, head)
+ if (sk2->sk_hash == snum &&
+ sk2 != sk &&
++ sk->sk_net == sk2->sk_net &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -216,7 +219,7 @@
+ int udp_get_port(struct sock *sk, unsigned short snum,
+ int (*scmp)(const struct sock *, const struct sock *))
+ {
+- return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
++ return __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp);
+ }
+
+ extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2);
+@@ -229,7 +232,8 @@
+ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+ * harder than this. -DaveM
+ */
+-static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
++static struct sock *__udp4_lib_lookup(struct net *net,
++ __be32 saddr, __be16 sport,
+ __be32 daddr, __be16 dport,
+ int dif, struct hlist_head udptable[])
+ {
+@@ -243,6 +247,9 @@
+ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ struct inet_sock *inet = inet_sk(sk);
+
++ if (sk->sk_net != net)
++ continue;
++
+ if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+ int score = (sk->sk_family == PF_INET ? 1 : 0);
+
+@@ -299,6 +306,9 @@
+ sk_for_each_from(s, node) {
+ struct inet_sock *inet = inet_sk(s);
+
++ if (s->sk_net != sk->sk_net)
++ continue;
++
+ if (s->sk_hash != hnum ||
+ (inet->daddr && inet->daddr != rmt_addr) ||
+ (inet->dport != rmt_port && inet->dport) ||
+@@ -328,6 +338,7 @@
+
+ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
+ {
++ struct net *net = skb->dev->nd_net;
+ struct inet_sock *inet;
+ struct iphdr *iph = (struct iphdr*)skb->data;
+ struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
+@@ -337,7 +348,7 @@
+ int harderr;
+ int err;
+
+- sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
++ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source,
+ skb->dev->ifindex, udptable );
+ if (sk == NULL) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+@@ -623,7 +634,8 @@
+ rt = (struct rtable*)sk_dst_check(sk, 0);
+
+ if (rt == NULL) {
+- struct flowi fl = { .oif = ipc.oif,
++ struct flowi fl = { .fl_net = sk->sk_net,
++ .oif = ipc.oif,
+ .nl_u = { .ip4_u =
+ { .daddr = faddr,
+ .saddr = saddr,
+@@ -1288,6 +1300,7 @@
+ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
+ int proto)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct sock *sk;
+ struct udphdr *uh = udp_hdr(skb);
+ unsigned short ulen;
+@@ -1322,7 +1335,7 @@
+ udp_ping_of_death(skb, uh, saddr);
+ #endif
+
+- sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
++ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest,
+ skb->dev->ifindex, udptable );
+
+ if (sk != NULL) {
+@@ -1651,7 +1664,7 @@
+ sk = sk_next(sk);
+ try_again:
+ ;
+- } while (sk && (sk->sk_family != state->family ||
++ } while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family ||
+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+
+ if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+@@ -1717,6 +1730,7 @@
+
+ seq = file->private_data;
+ seq->private = s;
++ s->net = get_net(PROC_NET(inode));
+ out:
+ return rc;
+ out_kfree:
+@@ -1724,21 +1738,31 @@
+ goto out;
+ }
+
++static int udp_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct udp_iter_state *state = seq->private;
++ put_net(state->net);
++ return seq_release_private(inode, file);
++}
++
+ /* ------------------------------------------------------------------------ */
+-int udp_proc_register(struct udp_seq_afinfo *afinfo)
++int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
+ {
+ struct proc_dir_entry *p;
+ int rc = 0;
+
+ if (!afinfo)
+ return -EINVAL;
++ if (net == &init_net) {
+ afinfo->seq_fops->owner = afinfo->owner;
+ afinfo->seq_fops->open = udp_seq_open;
+ afinfo->seq_fops->read = seq_read;
+ afinfo->seq_fops->llseek = seq_lseek;
+- afinfo->seq_fops->release = seq_release_private;
++ afinfo->seq_fops->release = udp_seq_release;
++ }
+
+- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ if (p)
+ p->data = afinfo;
+ else
+@@ -1746,11 +1770,12 @@
+ return rc;
+ }
+
+-void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
++void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
+ {
+ if (!afinfo)
+ return;
+- proc_net_remove(afinfo->name);
++ proc_net_remove(net, afinfo->name);
++ if (net == &init_net)
+ memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+
+@@ -1803,14 +1828,30 @@
+ .seq_fops = &udp4_seq_fops,
+ };
+
++
++static int udp4_proc_net_init(struct net *net)
++{
++ return udp_proc_register(net, &udp4_seq_afinfo);
++}
++
++static void udp4_proc_net_exit(struct net *net)
++{
++ udp_proc_unregister(net, &udp4_seq_afinfo);
++}
++
++static struct pernet_operations udp4_proc_net_ops = {
++ .init = udp4_proc_net_init,
++ .exit = udp4_proc_net_exit,
++};
++
+ int __init udp4_proc_init(void)
+ {
+- return udp_proc_register(&udp4_seq_afinfo);
++ return register_pernet_subsys(&udp4_proc_net_ops);
+ }
+
+ void udp4_proc_exit(void)
+ {
+- udp_proc_unregister(&udp4_seq_afinfo);
++ unregister_pernet_subsys(&udp4_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+
+diff -Nurb linux-2.6.22-570/net/ipv4/udplite.c linux-2.6.22-591/net/ipv4/udplite.c
+--- linux-2.6.22-570/net/ipv4/udplite.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/udplite.c 2007-12-21 15:36:15.000000000 -0500
+@@ -31,11 +31,18 @@
+
+ static int udplite_rcv(struct sk_buff *skb)
+ {
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
+ }
+
+ static void udplite_err(struct sk_buff *skb, u32 info)
+ {
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ return __udp4_lib_err(skb, info, udplite_hash);
+ }
+
+@@ -103,7 +110,7 @@
+ inet_register_protosw(&udplite4_protosw);
+
+ #ifdef CONFIG_PROC_FS
+- if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
++ if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */
+ printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
+ #endif
+ return;
+diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_input.c linux-2.6.22-591/net/ipv4/xfrm4_input.c
+--- linux-2.6.22-570/net/ipv4/xfrm4_input.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/xfrm4_input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -18,6 +18,10 @@
+
+ int xfrm4_rcv(struct sk_buff *skb)
+ {
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ return xfrm4_rcv_encap(skb, 0);
+ }
+
+diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_policy.c linux-2.6.22-591/net/ipv4/xfrm4_policy.c
+--- linux-2.6.22-570/net/ipv4/xfrm4_policy.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/xfrm4_policy.c 2007-12-21 15:36:15.000000000 -0500
+@@ -25,6 +25,7 @@
+ {
+ struct rtable *rt;
+ struct flowi fl_tunnel = {
++ .fl_net = &init_net,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = daddr->a4,
+@@ -73,6 +74,7 @@
+ struct rtable *rt0 = (struct rtable*)(*dst_p);
+ struct rtable *rt = rt0;
+ struct flowi fl_tunnel = {
++ .fl_net = &init_net,
+ .nl_u = {
+ .ip4_u = {
+ .saddr = fl->fl4_src,
+@@ -213,6 +215,7 @@
+ u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+
+ memset(fl, 0, sizeof(struct flowi));
++ fl->fl_net = &init_net;
+ if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
+ switch (iph->protocol) {
+ case IPPROTO_UDP:
+@@ -306,7 +309,7 @@
+
+ xdst = (struct xfrm_dst *)dst;
+ if (xdst->u.rt.idev->dev == dev) {
+- struct in_device *loopback_idev = in_dev_get(&loopback_dev);
++ struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev);
+ BUG_ON(!loopback_idev);
+
+ do {
+diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_state.c linux-2.6.22-591/net/ipv4/xfrm4_state.c
+--- linux-2.6.22-570/net/ipv4/xfrm4_state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/xfrm4_state.c 2007-12-21 15:36:15.000000000 -0500
+@@ -16,7 +16,7 @@
+
+ static int xfrm4_init_flags(struct xfrm_state *x)
+ {
+- if (ipv4_config.no_pmtu_disc)
++ if (init_net.sysctl_ipv4_no_pmtu_disc)
+ x->props.flags |= XFRM_STATE_NOPMTUDISC;
+ return 0;
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c
+--- linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c 2007-12-21 15:36:12.000000000 -0500
+@@ -109,3 +109,4 @@
+ module_init(ipip_init);
+ module_exit(ipip_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
+diff -Nurb linux-2.6.22-570/net/ipv6/Kconfig linux-2.6.22-591/net/ipv6/Kconfig
+--- linux-2.6.22-570/net/ipv6/Kconfig 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/Kconfig 2007-12-21 15:36:12.000000000 -0500
+@@ -109,7 +109,7 @@
+ If unsure, say Y.
+
+ config IPV6_MIP6
+- bool "IPv6: Mobility (EXPERIMENTAL)"
++ tristate "IPv6: Mobility (EXPERIMENTAL)"
+ depends on IPV6 && EXPERIMENTAL
+ select XFRM
+ ---help---
+diff -Nurb linux-2.6.22-570/net/ipv6/Makefile linux-2.6.22-591/net/ipv6/Makefile
+--- linux-2.6.22-570/net/ipv6/Makefile 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/Makefile 2007-12-21 15:36:12.000000000 -0500
+@@ -14,7 +14,6 @@
+ xfrm6_output.o
+ ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+-ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ ipv6-$(CONFIG_PROC_FS) += proc.o
+
+ ipv6-objs += $(ipv6-y)
+@@ -28,6 +27,7 @@
+ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
++obj-$(CONFIG_IPV6_MIP6) += mip6.o
+ obj-$(CONFIG_NETFILTER) += netfilter/
+
+ obj-$(CONFIG_IPV6_SIT) += sit.o
+diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-591/net/ipv6/addrconf.c
+--- linux-2.6.22-570/net/ipv6/addrconf.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/addrconf.c 2007-12-21 15:36:15.000000000 -0500
+@@ -73,6 +73,7 @@
+ #include <net/tcp.h>
+ #include <net/ip.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ #include <net/pkt_sched.h>
+ #include <linux/if_tunnel.h>
+ #include <linux/rtnetlink.h>
+@@ -457,7 +458,7 @@
+ struct inet6_dev *idev;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+@@ -920,7 +921,7 @@
+ read_lock(&dev_base_lock);
+ rcu_read_lock();
+
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+
+@@ -1047,7 +1048,7 @@
+ }
+
+ /* Rule 4: Prefer home address */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ if (hiscore.rule < 4) {
+ if (ifa_result->flags & IFA_F_HOMEADDRESS)
+ hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+@@ -1882,7 +1883,7 @@
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ goto err_exit;
+
+- dev = __dev_get_by_index(ireq.ifr6_ifindex);
++ dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex);
+
+ err = -ENODEV;
+ if (dev == NULL)
+@@ -1913,7 +1914,7 @@
+
+ if (err == 0) {
+ err = -ENOBUFS;
+- if ((dev = __dev_get_by_name(p.name)) == NULL)
++ if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL)
+ goto err_exit;
+ err = dev_open(dev);
+ }
+@@ -1943,7 +1944,7 @@
+ if (!valid_lft || prefered_lft > valid_lft)
+ return -EINVAL;
+
+- if ((dev = __dev_get_by_index(ifindex)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ return -ENODEV;
+
+ if ((idev = addrconf_add_dev(dev)) == NULL)
+@@ -1994,7 +1995,7 @@
+ struct inet6_dev *idev;
+ struct net_device *dev;
+
+- if ((dev = __dev_get_by_index(ifindex)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ return -ENODEV;
+
+ if ((idev = __in6_dev_get(dev)) == NULL)
+@@ -2089,7 +2090,7 @@
+ return;
+ }
+
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ struct in_device * in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev && (dev->flags & IFF_UP)) {
+ struct in_ifaddr * ifa;
+@@ -2245,12 +2246,12 @@
+
+ /* first try to inherit the link-local address from the link device */
+ if (idev->dev->iflink &&
+- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
++ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ /* then try to inherit it from any device */
+- for_each_netdev(link_dev) {
++ for_each_netdev(&init_net, link_dev) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+@@ -2282,6 +2283,9 @@
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ int run_pending = 0;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch(event) {
+ case NETDEV_REGISTER:
+ if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+@@ -2419,7 +2423,7 @@
+
+ ASSERT_RTNL();
+
+- if (dev == &loopback_dev && how == 1)
++ if (dev == &init_net.loopback_dev && how == 1)
+ how = 0;
+
+ rt6_ifdown(dev);
+@@ -2850,18 +2854,18 @@
+
+ int __init if6_proc_init(void)
+ {
+- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
++ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+ void if6_proc_exit(void)
+ {
+- proc_net_remove("if_inet6");
++ proc_net_remove(&init_net, "if_inet6");
+ }
+ #endif /* CONFIG_PROC_FS */
+
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ /* Check if address is a home address configured on any interface. */
+ int ipv6_chk_home_addr(struct in6_addr *addr)
+ {
+@@ -3017,11 +3021,15 @@
+ static int
+ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+@@ -3074,6 +3082,7 @@
+ static int
+ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *pfx;
+@@ -3083,6 +3092,9 @@
+ u8 ifa_flags;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ return err;
+@@ -3103,7 +3115,7 @@
+ valid_lft = INFINITY_LIFE_TIME;
+ }
+
+- dev = __dev_get_by_index(ifm->ifa_index);
++ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+ if (dev == NULL)
+ return -ENODEV;
+
+@@ -3292,7 +3304,7 @@
+ s_ip_idx = ip_idx = cb->args[1];
+
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+@@ -3367,26 +3379,42 @@
+
+ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = UNICAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+ static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = MULTICAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+
+ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ enum addr_type_t type = ANYCAST_ADDR;
++
++ if (net != &init_net)
++ return 0;
++
+ return inet6_dump_addr(skb, cb, type);
+ }
+
+ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ void *arg)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ struct ifaddrmsg *ifm;
+ struct nlattr *tb[IFA_MAX+1];
+ struct in6_addr *addr = NULL;
+@@ -3395,6 +3423,9 @@
+ struct sk_buff *skb;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ if (err < 0)
+ goto errout;
+@@ -3407,7 +3438,7 @@
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_index)
+- dev = __dev_get_by_index(ifm->ifa_index);
++ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+
+ if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+ err = -EADDRNOTAVAIL;
+@@ -3427,7 +3458,7 @@
+ kfree_skb(skb);
+ goto errout_ifa;
+ }
+- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ errout_ifa:
+ in6_ifa_put(ifa);
+ errout:
+@@ -3450,10 +3481,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+
+ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+@@ -3612,19 +3643,22 @@
+
+ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx, err;
+ int s_idx = cb->args[0];
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
+
++ if (net != &init_net)
++ return 0;
+ /* FIXME: maybe disable ipv6 on non v6 guests?
+ if (skb->sk && skb->sk->sk_vx_info)
+ return skb->len; */
+
+ read_lock(&dev_base_lock);
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (!v6_dev_in_nx_info(dev, nxi))
+@@ -3661,10 +3695,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+
+ static inline size_t inet6_prefix_nlmsg_size(void)
+@@ -3730,10 +3764,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
+ }
+
+ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+@@ -4244,16 +4278,16 @@
+ * device and it being up should be removed.
+ */
+ rtnl_lock();
+- if (!ipv6_add_dev(&loopback_dev))
++ if (!ipv6_add_dev(&init_net.loopback_dev))
+ err = -ENOMEM;
+ rtnl_unlock();
+ if (err)
+ return err;
+
+- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
++ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
++ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
++ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #endif
+
+ register_netdevice_notifier(&ipv6_dev_notf);
+@@ -4304,12 +4338,12 @@
+ * clean dev list.
+ */
+
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ continue;
+ addrconf_ifdown(dev, 1);
+ }
+- addrconf_ifdown(&loopback_dev, 2);
++ addrconf_ifdown(&init_net.loopback_dev, 2);
+
+ /*
+ * Check hash table.
+@@ -4335,6 +4369,6 @@
+ rtnl_unlock();
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_remove("if_inet6");
++ proc_net_remove(&init_net, "if_inet6");
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-591/net/ipv6/addrconf.c.orig
+--- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,4301 +0,0 @@
+-/*
+- * IPv6 Address [auto]configuration
+- * Linux INET6 implementation
+- *
+- * Authors:
+- * Pedro Roque <roque@di.fc.ul.pt>
+- * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+- *
+- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- */
+-
+-/*
+- * Changes:
+- *
+- * Janos Farkas : delete timer on ifdown
+- * <chexum@bankinf.banki.hu>
+- * Andi Kleen : kill double kfree on module
+- * unload.
+- * Maciej W. Rozycki : FDDI support
+- * sekiya@USAGI : Don't send too many RS
+- * packets.
+- * yoshfuji@USAGI : Fixed interval between DAD
+- * packets.
+- * YOSHIFUJI Hideaki @USAGI : improved accuracy of
+- * address validation timer.
+- * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
+- * support.
+- * Yuji SEKIYA @USAGI : Don't assign a same IPv6
+- * address on a same interface.
+- * YOSHIFUJI Hideaki @USAGI : ARCnet support
+- * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
+- * seq_file.
+- * YOSHIFUJI Hideaki @USAGI : improved source address
+- * selection; consider scope,
+- * status etc.
+- */
+-
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/socket.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/in6.h>
+-#include <linux/netdevice.h>
+-#include <linux/if_addr.h>
+-#include <linux/if_arp.h>
+-#include <linux/if_arcnet.h>
+-#include <linux/if_infiniband.h>
+-#include <linux/route.h>
+-#include <linux/inetdevice.h>
+-#include <linux/init.h>
+-#ifdef CONFIG_SYSCTL
+-#include <linux/sysctl.h>
+-#endif
+-#include <linux/capability.h>
+-#include <linux/delay.h>
+-#include <linux/notifier.h>
+-#include <linux/string.h>
+-
+-#include <net/sock.h>
+-#include <net/snmp.h>
+-
+-#include <net/ipv6.h>
+-#include <net/protocol.h>
+-#include <net/ndisc.h>
+-#include <net/ip6_route.h>
+-#include <net/addrconf.h>
+-#include <net/tcp.h>
+-#include <net/ip.h>
+-#include <net/netlink.h>
+-#include <net/pkt_sched.h>
+-#include <linux/if_tunnel.h>
+-#include <linux/rtnetlink.h>
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+-#include <linux/random.h>
+-#endif
+-
+-#include <asm/uaccess.h>
+-#include <asm/unaligned.h>
+-
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-
+-/* Set to 3 to get tracing... */
+-#define ACONF_DEBUG 2
+-
+-#if ACONF_DEBUG >= 3
+-#define ADBG(x) printk x
+-#else
+-#define ADBG(x)
+-#endif
+-
+-#define INFINITY_LIFE_TIME 0xFFFFFFFF
+-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+-
+-#ifdef CONFIG_SYSCTL
+-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+-static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+-#endif
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+-static int __ipv6_regen_rndid(struct inet6_dev *idev);
+-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+-static void ipv6_regen_rndid(unsigned long data);
+-
+-static int desync_factor = MAX_DESYNC_FACTOR * HZ;
+-#endif
+-
+-static int ipv6_count_addresses(struct inet6_dev *idev);
+-
+-/*
+- * Configured unicast address hash table
+- */
+-static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
+-static DEFINE_RWLOCK(addrconf_hash_lock);
+-
+-static void addrconf_verify(unsigned long);
+-
+-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
+-static DEFINE_SPINLOCK(addrconf_verify_lock);
+-
+-static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
+-static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
+-
+-static int addrconf_ifdown(struct net_device *dev, int how);
+-
+-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
+-static void addrconf_dad_timer(unsigned long data);
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+-static void addrconf_dad_run(struct inet6_dev *idev);
+-static void addrconf_rs_timer(unsigned long data);
+-static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+-static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+-
+-static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+- struct prefix_info *pinfo);
+-static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
+-
+-static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
+-
+-struct ipv6_devconf ipv6_devconf __read_mostly = {
+- .forwarding = 0,
+- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+- .mtu6 = IPV6_MIN_MTU,
+- .accept_ra = 1,
+- .accept_redirects = 1,
+- .autoconf = 1,
+- .force_mld_version = 0,
+- .dad_transmits = 1,
+- .rtr_solicits = MAX_RTR_SOLICITATIONS,
+- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+-#ifdef CONFIG_IPV6_PRIVACY
+- .use_tempaddr = 0,
+- .temp_valid_lft = TEMP_VALID_LIFETIME,
+- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+- .regen_max_retry = REGEN_MAX_RETRY,
+- .max_desync_factor = MAX_DESYNC_FACTOR,
+-#endif
+- .max_addresses = IPV6_MAX_ADDRESSES,
+- .accept_ra_defrtr = 1,
+- .accept_ra_pinfo = 1,
+-#ifdef CONFIG_IPV6_ROUTER_PREF
+- .accept_ra_rtr_pref = 1,
+- .rtr_probe_interval = 60 * HZ,
+-#ifdef CONFIG_IPV6_ROUTE_INFO
+- .accept_ra_rt_info_max_plen = 0,
+-#endif
+-#endif
+- .proxy_ndp = 0,
+- .accept_source_route = 0, /* we do not accept RH0 by default. */
+-};
+-
+-static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
+- .forwarding = 0,
+- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+- .mtu6 = IPV6_MIN_MTU,
+- .accept_ra = 1,
+- .accept_redirects = 1,
+- .autoconf = 1,
+- .dad_transmits = 1,
+- .rtr_solicits = MAX_RTR_SOLICITATIONS,
+- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+-#ifdef CONFIG_IPV6_PRIVACY
+- .use_tempaddr = 0,
+- .temp_valid_lft = TEMP_VALID_LIFETIME,
+- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+- .regen_max_retry = REGEN_MAX_RETRY,
+- .max_desync_factor = MAX_DESYNC_FACTOR,
+-#endif
+- .max_addresses = IPV6_MAX_ADDRESSES,
+- .accept_ra_defrtr = 1,
+- .accept_ra_pinfo = 1,
+-#ifdef CONFIG_IPV6_ROUTER_PREF
+- .accept_ra_rtr_pref = 1,
+- .rtr_probe_interval = 60 * HZ,
+-#ifdef CONFIG_IPV6_ROUTE_INFO
+- .accept_ra_rt_info_max_plen = 0,
+-#endif
+-#endif
+- .proxy_ndp = 0,
+- .accept_source_route = 0, /* we do not accept RH0 by default. */
+-};
+-
+-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+-
+-/* Check if a valid qdisc is available */
+-static inline int addrconf_qdisc_ok(struct net_device *dev)
+-{
+- return (dev->qdisc != &noop_qdisc);
+-}
+-
+-static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+-{
+- if (del_timer(&ifp->timer))
+- __in6_ifa_put(ifp);
+-}
+-
+-enum addrconf_timer_t
+-{
+- AC_NONE,
+- AC_DAD,
+- AC_RS,
+-};
+-
+-static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
+- enum addrconf_timer_t what,
+- unsigned long when)
+-{
+- if (!del_timer(&ifp->timer))
+- in6_ifa_hold(ifp);
+-
+- switch (what) {
+- case AC_DAD:
+- ifp->timer.function = addrconf_dad_timer;
+- break;
+- case AC_RS:
+- ifp->timer.function = addrconf_rs_timer;
+- break;
+- default:;
+- }
+- ifp->timer.expires = jiffies + when;
+- add_timer(&ifp->timer);
+-}
+-
+-static int snmp6_alloc_dev(struct inet6_dev *idev)
+-{
+- int err = -ENOMEM;
+-
+- if (!idev || !idev->dev)
+- return -EINVAL;
+-
+- if (snmp_mib_init((void **)idev->stats.ipv6,
+- sizeof(struct ipstats_mib),
+- __alignof__(struct ipstats_mib)) < 0)
+- goto err_ip;
+- if (snmp_mib_init((void **)idev->stats.icmpv6,
+- sizeof(struct icmpv6_mib),
+- __alignof__(struct icmpv6_mib)) < 0)
+- goto err_icmp;
+-
+- return 0;
+-
+-err_icmp:
+- snmp_mib_free((void **)idev->stats.ipv6);
+-err_ip:
+- return err;
+-}
+-
+-static int snmp6_free_dev(struct inet6_dev *idev)
+-{
+- snmp_mib_free((void **)idev->stats.icmpv6);
+- snmp_mib_free((void **)idev->stats.ipv6);
+- return 0;
+-}
+-
+-/* Nobody refers to this device, we may destroy it. */
+-
+-static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+-{
+- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+- kfree(idev);
+-}
+-
+-void in6_dev_finish_destroy(struct inet6_dev *idev)
+-{
+- struct net_device *dev = idev->dev;
+- BUG_TRAP(idev->addr_list==NULL);
+- BUG_TRAP(idev->mc_list==NULL);
+-#ifdef NET_REFCNT_DEBUG
+- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
+-#endif
+- dev_put(dev);
+- if (!idev->dead) {
+- printk("Freeing alive inet6 device %p\n", idev);
+- return;
+- }
+- snmp6_free_dev(idev);
+- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
+-}
+-
+-EXPORT_SYMBOL(in6_dev_finish_destroy);
+-
+-static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+-{
+- struct inet6_dev *ndev;
+- struct in6_addr maddr;
+-
+- ASSERT_RTNL();
+-
+- if (dev->mtu < IPV6_MIN_MTU)
+- return NULL;
+-
+- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+-
+- if (ndev == NULL)
+- return NULL;
+-
+- rwlock_init(&ndev->lock);
+- ndev->dev = dev;
+- memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+- ndev->cnf.mtu6 = dev->mtu;
+- ndev->cnf.sysctl = NULL;
+- ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+- if (ndev->nd_parms == NULL) {
+- kfree(ndev);
+- return NULL;
+- }
+- /* We refer to the device */
+- dev_hold(dev);
+-
+- if (snmp6_alloc_dev(ndev) < 0) {
+- ADBG((KERN_WARNING
+- "%s(): cannot allocate memory for statistics; dev=%s.\n",
+- __FUNCTION__, dev->name));
+- neigh_parms_release(&nd_tbl, ndev->nd_parms);
+- ndev->dead = 1;
+- in6_dev_finish_destroy(ndev);
+- return NULL;
+- }
+-
+- if (snmp6_register_dev(ndev) < 0) {
+- ADBG((KERN_WARNING
+- "%s(): cannot create /proc/net/dev_snmp6/%s\n",
+- __FUNCTION__, dev->name));
+- neigh_parms_release(&nd_tbl, ndev->nd_parms);
+- ndev->dead = 1;
+- in6_dev_finish_destroy(ndev);
+- return NULL;
+- }
+-
+- /* One reference from device. We must do this before
+- * we invoke __ipv6_regen_rndid().
+- */
+- in6_dev_hold(ndev);
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+- init_timer(&ndev->regen_timer);
+- ndev->regen_timer.function = ipv6_regen_rndid;
+- ndev->regen_timer.data = (unsigned long) ndev;
+- if ((dev->flags&IFF_LOOPBACK) ||
+- dev->type == ARPHRD_TUNNEL ||
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+- dev->type == ARPHRD_SIT ||
+-#endif
+- dev->type == ARPHRD_NONE) {
+- printk(KERN_INFO
+- "%s: Disabled Privacy Extensions\n",
+- dev->name);
+- ndev->cnf.use_tempaddr = -1;
+- } else {
+- in6_dev_hold(ndev);
+- ipv6_regen_rndid((unsigned long) ndev);
+- }
+-#endif
+-
+- if (netif_running(dev) && addrconf_qdisc_ok(dev))
+- ndev->if_flags |= IF_READY;
+-
+- ipv6_mc_init_dev(ndev);
+- ndev->tstamp = jiffies;
+-#ifdef CONFIG_SYSCTL
+- neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
+- NET_IPV6_NEIGH, "ipv6",
+- &ndisc_ifinfo_sysctl_change,
+- NULL);
+- addrconf_sysctl_register(ndev, &ndev->cnf);
+-#endif
+- /* protected by rtnl_lock */
+- rcu_assign_pointer(dev->ip6_ptr, ndev);
+-
+- /* Join all-node multicast group */
+- ipv6_addr_all_nodes(&maddr);
+- ipv6_dev_mc_inc(dev, &maddr);
+-
+- return ndev;
+-}
+-
+-static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+-{
+- struct inet6_dev *idev;
+-
+- ASSERT_RTNL();
+-
+- if ((idev = __in6_dev_get(dev)) == NULL) {
+- if ((idev = ipv6_add_dev(dev)) == NULL)
+- return NULL;
+- }
+-
+- if (dev->flags&IFF_UP)
+- ipv6_mc_up(idev);
+- return idev;
+-}
+-
+-#ifdef CONFIG_SYSCTL
+-static void dev_forward_change(struct inet6_dev *idev)
+-{
+- struct net_device *dev;
+- struct inet6_ifaddr *ifa;
+- struct in6_addr addr;
+-
+- if (!idev)
+- return;
+- dev = idev->dev;
+- if (dev && (dev->flags & IFF_MULTICAST)) {
+- ipv6_addr_all_routers(&addr);
+-
+- if (idev->cnf.forwarding)
+- ipv6_dev_mc_inc(dev, &addr);
+- else
+- ipv6_dev_mc_dec(dev, &addr);
+- }
+- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+- if (ifa->flags&IFA_F_TENTATIVE)
+- continue;
+- if (idev->cnf.forwarding)
+- addrconf_join_anycast(ifa);
+- else
+- addrconf_leave_anycast(ifa);
+- }
+-}
+-
+-
+-static void addrconf_forward_change(void)
+-{
+- struct net_device *dev;
+- struct inet6_dev *idev;
+-
+- read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
+- rcu_read_lock();
+- idev = __in6_dev_get(dev);
+- if (idev) {
+- int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
+- idev->cnf.forwarding = ipv6_devconf.forwarding;
+- if (changed)
+- dev_forward_change(idev);
+- }
+- rcu_read_unlock();
+- }
+- read_unlock(&dev_base_lock);
+-}
+-#endif
+-
+-/* Nobody refers to this ifaddr, destroy it */
+-
+-void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
+-{
+- BUG_TRAP(ifp->if_next==NULL);
+- BUG_TRAP(ifp->lst_next==NULL);
+-#ifdef NET_REFCNT_DEBUG
+- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+-#endif
+-
+- in6_dev_put(ifp->idev);
+-
+- if (del_timer(&ifp->timer))
+- printk("Timer is still running, when freeing ifa=%p\n", ifp);
+-
+- if (!ifp->dead) {
+- printk("Freeing alive inet6 address %p\n", ifp);
+- return;
+- }
+- dst_release(&ifp->rt->u.dst);
+-
+- kfree(ifp);
+-}
+-
+-static void
+-ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
+-{
+- struct inet6_ifaddr *ifa, **ifap;
+- int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
+-
+- /*
+- * Each device address list is sorted in order of scope -
+- * global before linklocal.
+- */
+- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
+- ifap = &ifa->if_next) {
+- if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
+- break;
+- }
+-
+- ifp->if_next = *ifap;
+- *ifap = ifp;
+-}
+-
+-/* On success it returns ifp with increased reference count */
+-
+-static struct inet6_ifaddr *
+-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
+- int scope, u32 flags)
+-{
+- struct inet6_ifaddr *ifa = NULL;
+- struct rt6_info *rt;
+- int hash;
+- int err = 0;
+-
+- rcu_read_lock_bh();
+- if (idev->dead) {
+- err = -ENODEV; /*XXX*/
+- goto out2;
+- }
+-
+- write_lock(&addrconf_hash_lock);
+-
+- /* Ignore adding duplicate addresses on an interface */
+- if (ipv6_chk_same_addr(addr, idev->dev)) {
+- ADBG(("ipv6_add_addr: already assigned\n"));
+- err = -EEXIST;
+- goto out;
+- }
+-
+- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+-
+- if (ifa == NULL) {
+- ADBG(("ipv6_add_addr: malloc failed\n"));
+- err = -ENOBUFS;
+- goto out;
+- }
+-
+- rt = addrconf_dst_alloc(idev, addr, 0);
+- if (IS_ERR(rt)) {
+- err = PTR_ERR(rt);
+- goto out;
+- }
+-
+- ipv6_addr_copy(&ifa->addr, addr);
+-
+- spin_lock_init(&ifa->lock);
+- init_timer(&ifa->timer);
+- ifa->timer.data = (unsigned long) ifa;
+- ifa->scope = scope;
+- ifa->prefix_len = pfxlen;
+- ifa->flags = flags | IFA_F_TENTATIVE;
+- ifa->cstamp = ifa->tstamp = jiffies;
+-
+- ifa->rt = rt;
+-
+- /*
+- * part one of RFC 4429, section 3.3
+- * We should not configure an address as
+- * optimistic if we do not yet know the link
+- * layer address of our nexhop router
+- */
+-
+- if (rt->rt6i_nexthop == NULL)
+- ifa->flags &= ~IFA_F_OPTIMISTIC;
+-
+- ifa->idev = idev;
+- in6_dev_hold(idev);
+- /* For caller */
+- in6_ifa_hold(ifa);
+-
+- /* Add to big hash table */
+- hash = ipv6_addr_hash(addr);
+-
+- ifa->lst_next = inet6_addr_lst[hash];
+- inet6_addr_lst[hash] = ifa;
+- in6_ifa_hold(ifa);
+- write_unlock(&addrconf_hash_lock);
+-
+- write_lock(&idev->lock);
+- /* Add to inet6_dev unicast addr list. */
+- ipv6_link_dev_addr(idev, ifa);
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+- if (ifa->flags&IFA_F_TEMPORARY) {
+- ifa->tmp_next = idev->tempaddr_list;
+- idev->tempaddr_list = ifa;
+- in6_ifa_hold(ifa);
+- }
+-#endif
+-
+- in6_ifa_hold(ifa);
+- write_unlock(&idev->lock);
+-out2:
+- rcu_read_unlock_bh();
+-
+- if (likely(err == 0))
+- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+- else {
+- kfree(ifa);
+- ifa = ERR_PTR(err);
+- }
+-
+- return ifa;
+-out:
+- write_unlock(&addrconf_hash_lock);
+- goto out2;
+-}
+-
+-/* This function wants to get referenced ifp and releases it before return */
+-
+-static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+-{
+- struct inet6_ifaddr *ifa, **ifap;
+- struct inet6_dev *idev = ifp->idev;
+- int hash;
+- int deleted = 0, onlink = 0;
+- unsigned long expires = jiffies;
+-
+- hash = ipv6_addr_hash(&ifp->addr);
+-
+- ifp->dead = 1;
+-
+- write_lock_bh(&addrconf_hash_lock);
+- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
+- ifap = &ifa->lst_next) {
+- if (ifa == ifp) {
+- *ifap = ifa->lst_next;
+- __in6_ifa_put(ifp);
+- ifa->lst_next = NULL;
+- break;
+- }
+- }
+- write_unlock_bh(&addrconf_hash_lock);
+-
+- write_lock_bh(&idev->lock);
+-#ifdef CONFIG_IPV6_PRIVACY
+- if (ifp->flags&IFA_F_TEMPORARY) {
+- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
+- ifap = &ifa->tmp_next) {
+- if (ifa == ifp) {
+- *ifap = ifa->tmp_next;
+- if (ifp->ifpub) {
+- in6_ifa_put(ifp->ifpub);
+- ifp->ifpub = NULL;
+- }
+- __in6_ifa_put(ifp);
+- ifa->tmp_next = NULL;
+- break;
+- }
+- }
+- }
+-#endif
+-
+- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
+- if (ifa == ifp) {
+- *ifap = ifa->if_next;
+- __in6_ifa_put(ifp);
+- ifa->if_next = NULL;
+- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
+- break;
+- deleted = 1;
+- continue;
+- } else if (ifp->flags & IFA_F_PERMANENT) {
+- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+- ifp->prefix_len)) {
+- if (ifa->flags & IFA_F_PERMANENT) {
+- onlink = 1;
+- if (deleted)
+- break;
+- } else {
+- unsigned long lifetime;
+-
+- if (!onlink)
+- onlink = -1;
+-
+- spin_lock(&ifa->lock);
+- lifetime = min_t(unsigned long,
+- ifa->valid_lft, 0x7fffffffUL/HZ);
+- if (time_before(expires,
+- ifa->tstamp + lifetime * HZ))
+- expires = ifa->tstamp + lifetime * HZ;
+- spin_unlock(&ifa->lock);
+- }
+- }
+- }
+- ifap = &ifa->if_next;
+- }
+- write_unlock_bh(&idev->lock);
+-
+- ipv6_ifa_notify(RTM_DELADDR, ifp);
+-
+- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
+-
+- addrconf_del_timer(ifp);
+-
+- /*
+- * Purge or update corresponding prefix
+- *
+- * 1) we don't purge prefix here if address was not permanent.
+- * prefix is managed by its own lifetime.
+- * 2) if there're no addresses, delete prefix.
+- * 3) if there're still other permanent address(es),
+- * corresponding prefix is still permanent.
+- * 4) otherwise, update prefix lifetime to the
+- * longest valid lifetime among the corresponding
+- * addresses on the device.
+- * Note: subsequent RA will update lifetime.
+- *
+- * --yoshfuji
+- */
+- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
+- struct in6_addr prefix;
+- struct rt6_info *rt;
+-
+- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
+- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
+-
+- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+- if (onlink == 0) {
+- ip6_del_rt(rt);
+- rt = NULL;
+- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+- rt->rt6i_expires = expires;
+- rt->rt6i_flags |= RTF_EXPIRES;
+- }
+- }
+- dst_release(&rt->u.dst);
+- }
+-
+- in6_ifa_put(ifp);
+-}
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+-{
+- struct inet6_dev *idev = ifp->idev;
+- struct in6_addr addr, *tmpaddr;
+- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+- int tmp_plen;
+- int ret = 0;
+- int max_addresses;
+- u32 addr_flags;
+-
+- write_lock(&idev->lock);
+- if (ift) {
+- spin_lock_bh(&ift->lock);
+- memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
+- spin_unlock_bh(&ift->lock);
+- tmpaddr = &addr;
+- } else {
+- tmpaddr = NULL;
+- }
+-retry:
+- in6_dev_hold(idev);
+- if (idev->cnf.use_tempaddr <= 0) {
+- write_unlock(&idev->lock);
+- printk(KERN_INFO
+- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+- in6_dev_put(idev);
+- ret = -1;
+- goto out;
+- }
+- spin_lock_bh(&ifp->lock);
+- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
+- idev->cnf.use_tempaddr = -1; /*XXX*/
+- spin_unlock_bh(&ifp->lock);
+- write_unlock(&idev->lock);
+- printk(KERN_WARNING
+- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+- in6_dev_put(idev);
+- ret = -1;
+- goto out;
+- }
+- in6_ifa_hold(ifp);
+- memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
+- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
+- spin_unlock_bh(&ifp->lock);
+- write_unlock(&idev->lock);
+- printk(KERN_WARNING
+- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
+- in6_ifa_put(ifp);
+- in6_dev_put(idev);
+- ret = -1;
+- goto out;
+- }
+- memcpy(&addr.s6_addr[8], idev->rndid, 8);
+- tmp_valid_lft = min_t(__u32,
+- ifp->valid_lft,
+- idev->cnf.temp_valid_lft);
+- tmp_prefered_lft = min_t(__u32,
+- ifp->prefered_lft,
+- idev->cnf.temp_prefered_lft - desync_factor / HZ);
+- tmp_plen = ifp->prefix_len;
+- max_addresses = idev->cnf.max_addresses;
+- tmp_cstamp = ifp->cstamp;
+- tmp_tstamp = ifp->tstamp;
+- spin_unlock_bh(&ifp->lock);
+-
+- write_unlock(&idev->lock);
+-
+- addr_flags = IFA_F_TEMPORARY;
+- /* set in addrconf_prefix_rcv() */
+- if (ifp->flags & IFA_F_OPTIMISTIC)
+- addr_flags |= IFA_F_OPTIMISTIC;
+-
+- ift = !max_addresses ||
+- ipv6_count_addresses(idev) < max_addresses ?
+- ipv6_add_addr(idev, &addr, tmp_plen,
+- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+- addr_flags) : NULL;
+- if (!ift || IS_ERR(ift)) {
+- in6_ifa_put(ifp);
+- in6_dev_put(idev);
+- printk(KERN_INFO
+- "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+- tmpaddr = &addr;
+- write_lock(&idev->lock);
+- goto retry;
+- }
+-
+- spin_lock_bh(&ift->lock);
+- ift->ifpub = ifp;
+- ift->valid_lft = tmp_valid_lft;
+- ift->prefered_lft = tmp_prefered_lft;
+- ift->cstamp = tmp_cstamp;
+- ift->tstamp = tmp_tstamp;
+- spin_unlock_bh(&ift->lock);
+-
+- addrconf_dad_start(ift, 0);
+- in6_ifa_put(ift);
+- in6_dev_put(idev);
+-out:
+- return ret;
+-}
+-#endif
+-
+-/*
+- * Choose an appropriate source address (RFC3484)
+- */
+-struct ipv6_saddr_score {
+- int addr_type;
+- unsigned int attrs;
+- int matchlen;
+- int scope;
+- unsigned int rule;
+-};
+-
+-#define IPV6_SADDR_SCORE_LOCAL 0x0001
+-#define IPV6_SADDR_SCORE_PREFERRED 0x0004
+-#define IPV6_SADDR_SCORE_HOA 0x0008
+-#define IPV6_SADDR_SCORE_OIF 0x0010
+-#define IPV6_SADDR_SCORE_LABEL 0x0020
+-#define IPV6_SADDR_SCORE_PRIVACY 0x0040
+-
+-static inline int ipv6_saddr_preferred(int type)
+-{
+- if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
+- IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
+- return 1;
+- return 0;
+-}
+-
+-/* static matching label */
+-static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
+-{
+- /*
+- * prefix (longest match) label
+- * -----------------------------
+- * ::1/128 0
+- * ::/0 1
+- * 2002::/16 2
+- * ::/96 3
+- * ::ffff:0:0/96 4
+- * fc00::/7 5
+- * 2001::/32 6
+- */
+- if (type & IPV6_ADDR_LOOPBACK)
+- return 0;
+- else if (type & IPV6_ADDR_COMPATv4)
+- return 3;
+- else if (type & IPV6_ADDR_MAPPED)
+- return 4;
+- else if (addr->s6_addr32[0] == htonl(0x20010000))
+- return 6;
+- else if (addr->s6_addr16[0] == htons(0x2002))
+- return 2;
+- else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
+- return 5;
+- return 1;
+-}
+-
+-int ipv6_dev_get_saddr(struct net_device *daddr_dev,
+- struct in6_addr *daddr, struct in6_addr *saddr)
+-{
+- struct ipv6_saddr_score hiscore;
+- struct inet6_ifaddr *ifa_result = NULL;
+- int daddr_type = __ipv6_addr_type(daddr);
+- int daddr_scope = __ipv6_addr_src_scope(daddr_type);
+- u32 daddr_label = ipv6_saddr_label(daddr, daddr_type);
+- struct net_device *dev;
+-
+- memset(&hiscore, 0, sizeof(hiscore));
+-
+- read_lock(&dev_base_lock);
+- rcu_read_lock();
+-
+- for_each_netdev(dev) {
+- struct inet6_dev *idev;
+- struct inet6_ifaddr *ifa;
+-
+- /* Rule 0: Candidate Source Address (section 4)
+- * - multicast and link-local destination address,
+- * the set of candidate source address MUST only
+- * include addresses assigned to interfaces
+- * belonging to the same link as the outgoing
+- * interface.
+- * (- For site-local destination addresses, the
+- * set of candidate source addresses MUST only
+- * include addresses assigned to interfaces
+- * belonging to the same site as the outgoing
+- * interface.)
+- */
+- if ((daddr_type & IPV6_ADDR_MULTICAST ||
+- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
+- daddr_dev && dev != daddr_dev)
+- continue;
+-
+- idev = __in6_dev_get(dev);
+- if (!idev)
+- continue;
+-
+- read_lock_bh(&idev->lock);
+- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+- struct ipv6_saddr_score score;
+-
+- score.addr_type = __ipv6_addr_type(&ifa->addr);
+-
+- /* Rule 0:
+- * - Tentative Address (RFC2462 section 5.4)
+- * - A tentative address is not considered
+- * "assigned to an interface" in the traditional
+- * sense, unless it is also flagged as optimistic.
+- * - Candidate Source Address (section 4)
+- * - In any case, anycast addresses, multicast
+- * addresses, and the unspecified address MUST
+- * NOT be included in a candidate set.
+- */
+- if ((ifa->flags & IFA_F_TENTATIVE) &&
+- (!(ifa->flags & IFA_F_OPTIMISTIC)))
+- continue;
+- if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
+- score.addr_type & IPV6_ADDR_MULTICAST)) {
+- LIMIT_NETDEBUG(KERN_DEBUG
+- "ADDRCONF: unspecified / multicast address"
+- "assigned as unicast address on %s",
+- dev->name);
+- continue;
+- }
+-
+- score.attrs = 0;
+- score.matchlen = 0;
+- score.scope = 0;
+- score.rule = 0;
+-
+- if (ifa_result == NULL) {
+- /* record it if the first available entry */
+- goto record_it;
+- }
+-
+- /* Rule 1: Prefer same address */
+- if (hiscore.rule < 1) {
+- if (ipv6_addr_equal(&ifa_result->addr, daddr))
+- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL;
+- hiscore.rule++;
+- }
+- if (ipv6_addr_equal(&ifa->addr, daddr)) {
+- score.attrs |= IPV6_SADDR_SCORE_LOCAL;
+- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) {
+- score.rule = 1;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)
+- continue;
+- }
+-
+- /* Rule 2: Prefer appropriate scope */
+- if (hiscore.rule < 2) {
+- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type);
+- hiscore.rule++;
+- }
+- score.scope = __ipv6_addr_src_scope(score.addr_type);
+- if (hiscore.scope < score.scope) {
+- if (hiscore.scope < daddr_scope) {
+- score.rule = 2;
+- goto record_it;
+- } else
+- continue;
+- } else if (score.scope < hiscore.scope) {
+- if (score.scope < daddr_scope)
+- break; /* addresses sorted by scope */
+- else {
+- score.rule = 2;
+- goto record_it;
+- }
+- }
+-
+- /* Rule 3: Avoid deprecated and optimistic addresses */
+- if (hiscore.rule < 3) {
+- if (ipv6_saddr_preferred(hiscore.addr_type) ||
+- (((ifa_result->flags &
+- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
+- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
+- hiscore.rule++;
+- }
+- if (ipv6_saddr_preferred(score.addr_type) ||
+- (((ifa->flags &
+- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
+- score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
+- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
+- score.rule = 3;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)
+- continue;
+- }
+-
+- /* Rule 4: Prefer home address */
+-#ifdef CONFIG_IPV6_MIP6
+- if (hiscore.rule < 4) {
+- if (ifa_result->flags & IFA_F_HOMEADDRESS)
+- hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+- hiscore.rule++;
+- }
+- if (ifa->flags & IFA_F_HOMEADDRESS) {
+- score.attrs |= IPV6_SADDR_SCORE_HOA;
+- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
+- score.rule = 4;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
+- continue;
+- }
+-#else
+- if (hiscore.rule < 4)
+- hiscore.rule++;
+-#endif
+-
+- /* Rule 5: Prefer outgoing interface */
+- if (hiscore.rule < 5) {
+- if (daddr_dev == NULL ||
+- daddr_dev == ifa_result->idev->dev)
+- hiscore.attrs |= IPV6_SADDR_SCORE_OIF;
+- hiscore.rule++;
+- }
+- if (daddr_dev == NULL ||
+- daddr_dev == ifa->idev->dev) {
+- score.attrs |= IPV6_SADDR_SCORE_OIF;
+- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) {
+- score.rule = 5;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF)
+- continue;
+- }
+-
+- /* Rule 6: Prefer matching label */
+- if (hiscore.rule < 6) {
+- if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label)
+- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
+- hiscore.rule++;
+- }
+- if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) {
+- score.attrs |= IPV6_SADDR_SCORE_LABEL;
+- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
+- score.rule = 6;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL)
+- continue;
+- }
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+- /* Rule 7: Prefer public address
+- * Note: prefer temprary address if use_tempaddr >= 2
+- */
+- if (hiscore.rule < 7) {
+- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^
+- (ifa_result->idev->cnf.use_tempaddr >= 2))
+- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY;
+- hiscore.rule++;
+- }
+- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^
+- (ifa->idev->cnf.use_tempaddr >= 2)) {
+- score.attrs |= IPV6_SADDR_SCORE_PRIVACY;
+- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) {
+- score.rule = 7;
+- goto record_it;
+- }
+- } else {
+- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
+- continue;
+- }
+-#else
+- if (hiscore.rule < 7)
+- hiscore.rule++;
+-#endif
+- /* Rule 8: Use longest matching prefix */
+- if (hiscore.rule < 8) {
+- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr);
+- hiscore.rule++;
+- }
+- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr);
+- if (score.matchlen > hiscore.matchlen) {
+- score.rule = 8;
+- goto record_it;
+- }
+-#if 0
+- else if (score.matchlen < hiscore.matchlen)
+- continue;
+-#endif
+-
+- /* Final Rule: choose first available one */
+- continue;
+-record_it:
+- if (ifa_result)
+- in6_ifa_put(ifa_result);
+- in6_ifa_hold(ifa);
+- ifa_result = ifa;
+- hiscore = score;
+- }
+- read_unlock_bh(&idev->lock);
+- }
+- rcu_read_unlock();
+- read_unlock(&dev_base_lock);
+-
+- if (!ifa_result)
+- return -EADDRNOTAVAIL;
+-
+- ipv6_addr_copy(saddr, &ifa_result->addr);
+- in6_ifa_put(ifa_result);
+- return 0;
+-}
+-
+-
+-int ipv6_get_saddr(struct dst_entry *dst,
+- struct in6_addr *daddr, struct in6_addr *saddr)
+-{
+- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
+-}
+-
+-EXPORT_SYMBOL(ipv6_get_saddr);
+-
+-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+- unsigned char banned_flags)
+-{
+- struct inet6_dev *idev;
+- int err = -EADDRNOTAVAIL;
+-
+- rcu_read_lock();
+- if ((idev = __in6_dev_get(dev)) != NULL) {
+- struct inet6_ifaddr *ifp;
+-
+- read_lock_bh(&idev->lock);
+- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
+- ipv6_addr_copy(addr, &ifp->addr);
+- err = 0;
+- break;
+- }
+- }
+- read_unlock_bh(&idev->lock);
+- }
+- rcu_read_unlock();
+- return err;
+-}
+-
+-static int ipv6_count_addresses(struct inet6_dev *idev)
+-{
+- int cnt = 0;
+- struct inet6_ifaddr *ifp;
+-
+- read_lock_bh(&idev->lock);
+- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+- cnt++;
+- read_unlock_bh(&idev->lock);
+- return cnt;
+-}
+-
+-int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
+-{
+- struct inet6_ifaddr * ifp;
+- u8 hash = ipv6_addr_hash(addr);
+-
+- read_lock_bh(&addrconf_hash_lock);
+- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+- if (ipv6_addr_equal(&ifp->addr, addr) &&
+- !(ifp->flags&IFA_F_TENTATIVE)) {
+- if (dev == NULL || ifp->idev->dev == dev ||
+- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+- break;
+- }
+- }
+- read_unlock_bh(&addrconf_hash_lock);
+- return ifp != NULL;
+-}
+-
+-EXPORT_SYMBOL(ipv6_chk_addr);
+-
+-static
+-int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
+-{
+- struct inet6_ifaddr * ifp;
+- u8 hash = ipv6_addr_hash(addr);
+-
+- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+- if (ipv6_addr_equal(&ifp->addr, addr)) {
+- if (dev == NULL || ifp->idev->dev == dev)
+- break;
+- }
+- }
+- return ifp != NULL;
+-}
+-
+-struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
+-{
+- struct inet6_ifaddr * ifp;
+- u8 hash = ipv6_addr_hash(addr);
+-
+- read_lock_bh(&addrconf_hash_lock);
+- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+- if (ipv6_addr_equal(&ifp->addr, addr)) {
+- if (dev == NULL || ifp->idev->dev == dev ||
+- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+- in6_ifa_hold(ifp);
+- break;
+- }
+- }
+- }
+- read_unlock_bh(&addrconf_hash_lock);
+-
+- return ifp;
+-}
+-
+-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+-{
+- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
+- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+- int sk_ipv6only = ipv6_only_sock(sk);
+- int sk2_ipv6only = inet_v6_ipv6only(sk2);
+- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+-
+- if (!sk2_rcv_saddr && !sk_ipv6only)
+- return 1;
+-
+- if (addr_type2 == IPV6_ADDR_ANY &&
+- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+- return 1;
+-
+- if (addr_type == IPV6_ADDR_ANY &&
+- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+- return 1;
+-
+- if (sk2_rcv_saddr6 &&
+- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+- return 1;
+-
+- if (addr_type == IPV6_ADDR_MAPPED &&
+- !sk2_ipv6only &&
+- (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
+- return 1;
+-
+- return 0;
+-}
+-
+-/* Gets referenced address, destroys ifaddr */
+-
+-static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
+-{
+- if (ifp->flags&IFA_F_PERMANENT) {
+- spin_lock_bh(&ifp->lock);
+- addrconf_del_timer(ifp);
+- ifp->flags |= IFA_F_TENTATIVE;
+- spin_unlock_bh(&ifp->lock);
+- in6_ifa_put(ifp);
+-#ifdef CONFIG_IPV6_PRIVACY
+- } else if (ifp->flags&IFA_F_TEMPORARY) {
+- struct inet6_ifaddr *ifpub;
+- spin_lock_bh(&ifp->lock);
+- ifpub = ifp->ifpub;
+- if (ifpub) {
+- in6_ifa_hold(ifpub);
+- spin_unlock_bh(&ifp->lock);
+- ipv6_create_tempaddr(ifpub, ifp);
+- in6_ifa_put(ifpub);
+- } else {
+- spin_unlock_bh(&ifp->lock);
+- }
+- ipv6_del_addr(ifp);
+-#endif
+- } else
+- ipv6_del_addr(ifp);
+-}
+-
+-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+-{
+- if (net_ratelimit())
+- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+- addrconf_dad_stop(ifp);
+-}
+-
+-/* Join to solicited addr multicast group. */
+-
+-void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+-{
+- struct in6_addr maddr;
+-
+- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+- return;
+-
+- addrconf_addr_solict_mult(addr, &maddr);
+- ipv6_dev_mc_inc(dev, &maddr);
+-}
+-
+-void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
+-{
+- struct in6_addr maddr;
+-
+- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+- return;
+-
+- addrconf_addr_solict_mult(addr, &maddr);
+- __ipv6_dev_mc_dec(idev, &maddr);
+-}
+-
+-static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+-{
+- struct in6_addr addr;
+- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+- if (ipv6_addr_any(&addr))
+- return;
+- ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+-}
+-
+-static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+-{
+- struct in6_addr addr;
+- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+- if (ipv6_addr_any(&addr))
+- return;
+- __ipv6_dev_ac_dec(ifp->idev, &addr);
+-}
+-
+-static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+-{
+- if (dev->addr_len != ETH_ALEN)
+- return -1;
+- memcpy(eui, dev->dev_addr, 3);
+- memcpy(eui + 5, dev->dev_addr + 3, 3);
+-
+- /*
+- * The zSeries OSA network cards can be shared among various
+- * OS instances, but the OSA cards have only one MAC address.
+- * This leads to duplicate address conflicts in conjunction
+- * with IPv6 if more than one instance uses the same card.
+- *
+- * The driver for these cards can deliver a unique 16-bit
+- * identifier for each instance sharing the same card. It is
+- * placed instead of 0xFFFE in the interface identifier. The
+- * "u" bit of the interface identifier is not inverted in this
+- * case. Hence the resulting interface identifier has local
+- * scope according to RFC2373.
+- */
+- if (dev->dev_id) {
+- eui[3] = (dev->dev_id >> 8) & 0xFF;
+- eui[4] = dev->dev_id & 0xFF;
+- } else {
+- eui[3] = 0xFF;
+- eui[4] = 0xFE;
+- eui[0] ^= 2;
+- }
+- return 0;
+-}
+-
+-static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
+-{
+- /* XXX: inherit EUI-64 from other interface -- yoshfuji */
+- if (dev->addr_len != ARCNET_ALEN)
+- return -1;
+- memset(eui, 0, 7);
+- eui[7] = *(u8*)dev->dev_addr;
+- return 0;
+-}
+-
+-static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
+-{
+- if (dev->addr_len != INFINIBAND_ALEN)
+- return -1;
+- memcpy(eui, dev->dev_addr + 12, 8);
+- eui[0] |= 2;
+- return 0;
+-}
+-
+-static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
+-{
+- switch (dev->type) {
+- case ARPHRD_ETHER:
+- case ARPHRD_FDDI:
+- case ARPHRD_IEEE802_TR:
+- return addrconf_ifid_eui48(eui, dev);
+- case ARPHRD_ARCNET:
+- return addrconf_ifid_arcnet(eui, dev);
+- case ARPHRD_INFINIBAND:
+- return addrconf_ifid_infiniband(eui, dev);
+- }
+- return -1;
+-}
+-
+-static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
+-{
+- int err = -1;
+- struct inet6_ifaddr *ifp;
+-
+- read_lock_bh(&idev->lock);
+- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+- memcpy(eui, ifp->addr.s6_addr+8, 8);
+- err = 0;
+- break;
+- }
+- }
+- read_unlock_bh(&idev->lock);
+- return err;
+-}
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+-/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
+-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+-{
+-regen:
+- get_random_bytes(idev->rndid, sizeof(idev->rndid));
+- idev->rndid[0] &= ~0x02;
+-
+- /*
+- * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
+- * check if generated address is not inappropriate
+- *
+- * - Reserved subnet anycast (RFC 2526)
+- * 11111101 11....11 1xxxxxxx
+- * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
+- * 00-00-5E-FE-xx-xx-xx-xx
+- * - value 0
+- * - XXX: already assigned to an address on the device
+- */
+- if (idev->rndid[0] == 0xfd &&
+- (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
+- (idev->rndid[7]&0x80))
+- goto regen;
+- if ((idev->rndid[0]|idev->rndid[1]) == 0) {
+- if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
+- goto regen;
+- if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
+- goto regen;
+- }
+-
+- return 0;
+-}
+-
+-static void ipv6_regen_rndid(unsigned long data)
+-{
+- struct inet6_dev *idev = (struct inet6_dev *) data;
+- unsigned long expires;
+-
+- rcu_read_lock_bh();
+- write_lock_bh(&idev->lock);
+-
+- if (idev->dead)
+- goto out;
+-
+- if (__ipv6_regen_rndid(idev) < 0)
+- goto out;
+-
+- expires = jiffies +
+- idev->cnf.temp_prefered_lft * HZ -
+- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+- if (time_before(expires, jiffies)) {
+- printk(KERN_WARNING
+- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
+- idev->dev->name);
+- goto out;
+- }
+-
+- if (!mod_timer(&idev->regen_timer, expires))
+- in6_dev_hold(idev);
+-
+-out:
+- write_unlock_bh(&idev->lock);
+- rcu_read_unlock_bh();
+- in6_dev_put(idev);
+-}
+-
+-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
+- int ret = 0;
+-
+- if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
+- ret = __ipv6_regen_rndid(idev);
+- return ret;
+-}
+-#endif
+-
+-/*
+- * Add prefix route.
+- */
+-
+-static void
+-addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
+- unsigned long expires, u32 flags)
+-{
+- struct fib6_config cfg = {
+- .fc_table = RT6_TABLE_PREFIX,
+- .fc_metric = IP6_RT_PRIO_ADDRCONF,
+- .fc_ifindex = dev->ifindex,
+- .fc_expires = expires,
+- .fc_dst_len = plen,
+- .fc_flags = RTF_UP | flags,
+- };
+-
+- ipv6_addr_copy(&cfg.fc_dst, pfx);
+-
+- /* Prevent useless cloning on PtP SIT.
+- This thing is done here expecting that the whole
+- class of non-broadcast devices need not cloning.
+- */
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+- if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+- cfg.fc_flags |= RTF_NONEXTHOP;
+-#endif
+-
+- ip6_route_add(&cfg);
+-}
+-
+-/* Create "default" multicast route to the interface */
+-
+-static void addrconf_add_mroute(struct net_device *dev)
+-{
+- struct fib6_config cfg = {
+- .fc_table = RT6_TABLE_LOCAL,
+- .fc_metric = IP6_RT_PRIO_ADDRCONF,
+- .fc_ifindex = dev->ifindex,
+- .fc_dst_len = 8,
+- .fc_flags = RTF_UP,
+- };
+-
+- ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+-
+- ip6_route_add(&cfg);
+-}
+-
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+-static void sit_route_add(struct net_device *dev)
+-{
+- struct fib6_config cfg = {
+- .fc_table = RT6_TABLE_MAIN,
+- .fc_metric = IP6_RT_PRIO_ADDRCONF,
+- .fc_ifindex = dev->ifindex,
+- .fc_dst_len = 96,
+- .fc_flags = RTF_UP | RTF_NONEXTHOP,
+- };
+-
+- /* prefix length - 96 bits "::d.d.d.d" */
+- ip6_route_add(&cfg);
+-}
+-#endif
+-
+-static void addrconf_add_lroute(struct net_device *dev)
+-{
+- struct in6_addr addr;
+-
+- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+- addrconf_prefix_route(&addr, 64, dev, 0, 0);
+-}
+-
+-static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
+-{
+- struct inet6_dev *idev;
+-
+- ASSERT_RTNL();
+-
+- if ((idev = ipv6_find_idev(dev)) == NULL)
+- return NULL;
+-
+- /* Add default multicast route */
+- addrconf_add_mroute(dev);
+-
+- /* Add link local route */
+- addrconf_add_lroute(dev);
+- return idev;
+-}
+-
+-void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+-{
+- struct prefix_info *pinfo;
+- __u32 valid_lft;
+- __u32 prefered_lft;
+- int addr_type;
+- unsigned long rt_expires;
+- struct inet6_dev *in6_dev;
+-
+- pinfo = (struct prefix_info *) opt;
+-
+- if (len < sizeof(struct prefix_info)) {
+- ADBG(("addrconf: prefix option too short\n"));
+- return;
+- }
+-
+- /*
+- * Validation checks ([ADDRCONF], page 19)
+- */
+-
+- addr_type = ipv6_addr_type(&pinfo->prefix);
+-
+- if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
+- return;
+-
+- valid_lft = ntohl(pinfo->valid);
+- prefered_lft = ntohl(pinfo->prefered);
+-
+- if (prefered_lft > valid_lft) {
+- if (net_ratelimit())
+- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+- return;
+- }
+-
+- in6_dev = in6_dev_get(dev);
+-
+- if (in6_dev == NULL) {
+- if (net_ratelimit())
+- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+- return;
+- }
+-
+- /*
+- * Two things going on here:
+- * 1) Add routes for on-link prefixes
+- * 2) Configure prefixes with the auto flag set
+- */
+-
+- /* Avoid arithmetic overflow. Really, we could
+- save rt_expires in seconds, likely valid_lft,
+- but it would require division in fib gc, that it
+- not good.
+- */
+- if (valid_lft >= 0x7FFFFFFF/HZ)
+- rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
+- else
+- rt_expires = valid_lft * HZ;
+-
+- /*
+- * We convert this (in jiffies) to clock_t later.
+- * Avoid arithmetic overflow there as well.
+- * Overflow can happen only if HZ < USER_HZ.
+- */
+- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+- rt_expires = 0x7FFFFFFF / USER_HZ;
+-
+- if (pinfo->onlink) {
+- struct rt6_info *rt;
+- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+-
+- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+- if (rt->rt6i_flags&RTF_EXPIRES) {
+- if (valid_lft == 0) {
+- ip6_del_rt(rt);
+- rt = NULL;
+- } else {
+- rt->rt6i_expires = jiffies + rt_expires;
+- }
+- }
+- } else if (valid_lft) {
+- addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+- }
+- if (rt)
+- dst_release(&rt->u.dst);
+- }
+-
+- /* Try to figure out our local address for this prefix */
+-
+- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+- struct inet6_ifaddr * ifp;
+- struct in6_addr addr;
+- int create = 0, update_lft = 0;
+-
+- if (pinfo->prefix_len == 64) {
+- memcpy(&addr, &pinfo->prefix, 8);
+- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+- in6_dev_put(in6_dev);
+- return;
+- }
+- goto ok;
+- }
+- if (net_ratelimit())
+- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
+- pinfo->prefix_len);
+- in6_dev_put(in6_dev);
+- return;
+-
+-ok:
+-
+- ifp = ipv6_get_ifaddr(&addr, dev, 1);
+-
+- if (ifp == NULL && valid_lft) {
+- int max_addresses = in6_dev->cnf.max_addresses;
+- u32 addr_flags = 0;
+-
+-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+- if (in6_dev->cnf.optimistic_dad &&
+- !ipv6_devconf.forwarding)
+- addr_flags = IFA_F_OPTIMISTIC;
+-#endif
+-
+- /* Do not allow to create too much of autoconfigured
+- * addresses; this would be too easy way to crash kernel.
+- */
+- if (!max_addresses ||
+- ipv6_count_addresses(in6_dev) < max_addresses)
+- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+- addr_type&IPV6_ADDR_SCOPE_MASK,
+- addr_flags);
+-
+- if (!ifp || IS_ERR(ifp)) {
+- in6_dev_put(in6_dev);
+- return;
+- }
+-
+- update_lft = create = 1;
+- ifp->cstamp = jiffies;
+- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+- }
+-
+- if (ifp) {
+- int flags;
+- unsigned long now;
+-#ifdef CONFIG_IPV6_PRIVACY
+- struct inet6_ifaddr *ift;
+-#endif
+- u32 stored_lft;
+-
+- /* update lifetime (RFC2462 5.5.3 e) */
+- spin_lock(&ifp->lock);
+- now = jiffies;
+- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+- else
+- stored_lft = 0;
+- if (!update_lft && stored_lft) {
+- if (valid_lft > MIN_VALID_LIFETIME ||
+- valid_lft > stored_lft)
+- update_lft = 1;
+- else if (stored_lft <= MIN_VALID_LIFETIME) {
+- /* valid_lft <= stored_lft is always true */
+- /* XXX: IPsec */
+- update_lft = 0;
+- } else {
+- valid_lft = MIN_VALID_LIFETIME;
+- if (valid_lft < prefered_lft)
+- prefered_lft = valid_lft;
+- update_lft = 1;
+- }
+- }
+-
+- if (update_lft) {
+- ifp->valid_lft = valid_lft;
+- ifp->prefered_lft = prefered_lft;
+- ifp->tstamp = now;
+- flags = ifp->flags;
+- ifp->flags &= ~IFA_F_DEPRECATED;
+- spin_unlock(&ifp->lock);
+-
+- if (!(flags&IFA_F_TENTATIVE))
+- ipv6_ifa_notify(0, ifp);
+- } else
+- spin_unlock(&ifp->lock);
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+- read_lock_bh(&in6_dev->lock);
+- /* update all temporary addresses in the list */
+- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+- /*
+- * When adjusting the lifetimes of an existing
+- * temporary address, only lower the lifetimes.
+- * Implementations must not increase the
+- * lifetimes of an existing temporary address
+- * when processing a Prefix Information Option.
+- */
+- spin_lock(&ift->lock);
+- flags = ift->flags;
+- if (ift->valid_lft > valid_lft &&
+- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
+- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
+- if (ift->prefered_lft > prefered_lft &&
+- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
+- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
+- spin_unlock(&ift->lock);
+- if (!(flags&IFA_F_TENTATIVE))
+- ipv6_ifa_notify(0, ift);
+- }
+-
+- if (create && in6_dev->cnf.use_tempaddr > 0) {
+- /*
+- * When a new public address is created as described in [ADDRCONF],
+- * also create a new temporary address.
+- */
+- read_unlock_bh(&in6_dev->lock);
+- ipv6_create_tempaddr(ifp, NULL);
+- } else {
+- read_unlock_bh(&in6_dev->lock);
+- }
+-#endif
+- in6_ifa_put(ifp);
+- addrconf_verify(0);
+- }
+- }
+- inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+- in6_dev_put(in6_dev);
+-}
+-
+-/*
+- * Set destination address.
+- * Special case for SIT interfaces where we create a new "virtual"
+- * device.
+- */
+-int addrconf_set_dstaddr(void __user *arg)
+-{
+- struct in6_ifreq ireq;
+- struct net_device *dev;
+- int err = -EINVAL;
+-
+- rtnl_lock();
+-
+- err = -EFAULT;
+- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+- goto err_exit;
+-
+- dev = __dev_get_by_index(ireq.ifr6_ifindex);
+-
+- err = -ENODEV;
+- if (dev == NULL)
+- goto err_exit;
+-
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+- if (dev->type == ARPHRD_SIT) {
+- struct ifreq ifr;
+- mm_segment_t oldfs;
+- struct ip_tunnel_parm p;
+-
+- err = -EADDRNOTAVAIL;
+- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
+- goto err_exit;
+-
+- memset(&p, 0, sizeof(p));
+- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
+- p.iph.saddr = 0;
+- p.iph.version = 4;
+- p.iph.ihl = 5;
+- p.iph.protocol = IPPROTO_IPV6;
+- p.iph.ttl = 64;
+- ifr.ifr_ifru.ifru_data = (void __user *)&p;
+-
+- oldfs = get_fs(); set_fs(KERNEL_DS);
+- err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+- set_fs(oldfs);
+-
+- if (err == 0) {
+- err = -ENOBUFS;
+- if ((dev = __dev_get_by_name(p.name)) == NULL)
+- goto err_exit;
+- err = dev_open(dev);
+- }
+- }
+-#endif
+-
+-err_exit:
+- rtnl_unlock();
+- return err;
+-}
+-
+-/*
+- * Manual configuration of address on an interface
+- */
+-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
+- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
+-{
+- struct inet6_ifaddr *ifp;
+- struct inet6_dev *idev;
+- struct net_device *dev;
+- int scope;
+- u32 flags = RTF_EXPIRES;
+-
+- ASSERT_RTNL();
+-
+- /* check the lifetime */
+- if (!valid_lft || prefered_lft > valid_lft)
+- return -EINVAL;
+-
+- if ((dev = __dev_get_by_index(ifindex)) == NULL)
+- return -ENODEV;
+-
+- if ((idev = addrconf_add_dev(dev)) == NULL)
+- return -ENOBUFS;
+-
+- scope = ipv6_addr_scope(pfx);
+-
+- if (valid_lft == INFINITY_LIFE_TIME) {
+- ifa_flags |= IFA_F_PERMANENT;
+- flags = 0;
+- } else if (valid_lft >= 0x7FFFFFFF/HZ)
+- valid_lft = 0x7FFFFFFF/HZ;
+-
+- if (prefered_lft == 0)
+- ifa_flags |= IFA_F_DEPRECATED;
+- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
+- (prefered_lft != INFINITY_LIFE_TIME))
+- prefered_lft = 0x7FFFFFFF/HZ;
+-
+- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+-
+- if (!IS_ERR(ifp)) {
+- spin_lock_bh(&ifp->lock);
+- ifp->valid_lft = valid_lft;
+- ifp->prefered_lft = prefered_lft;
+- ifp->tstamp = jiffies;
+- spin_unlock_bh(&ifp->lock);
+-
+- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
+- jiffies_to_clock_t(valid_lft * HZ), flags);
+- /*
+- * Note that section 3.1 of RFC 4429 indicates
+- * that the Optimistic flag should not be set for
+- * manually configured addresses
+- */
+- addrconf_dad_start(ifp, 0);
+- in6_ifa_put(ifp);
+- addrconf_verify(0);
+- return 0;
+- }
+-
+- return PTR_ERR(ifp);
+-}
+-
+-static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+-{
+- struct inet6_ifaddr *ifp;
+- struct inet6_dev *idev;
+- struct net_device *dev;
+-
+- if ((dev = __dev_get_by_index(ifindex)) == NULL)
+- return -ENODEV;
+-
+- if ((idev = __in6_dev_get(dev)) == NULL)
+- return -ENXIO;
+-
+- read_lock_bh(&idev->lock);
+- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+- if (ifp->prefix_len == plen &&
+- ipv6_addr_equal(pfx, &ifp->addr)) {
+- in6_ifa_hold(ifp);
+- read_unlock_bh(&idev->lock);
+-
+- ipv6_del_addr(ifp);
+-
+- /* If the last address is deleted administratively,
+- disable IPv6 on this interface.
+- */
+- if (idev->addr_list == NULL)
+- addrconf_ifdown(idev->dev, 1);
+- return 0;
+- }
+- }
+- read_unlock_bh(&idev->lock);
+- return -EADDRNOTAVAIL;
+-}
+-
+-
+-int addrconf_add_ifaddr(void __user *arg)
+-{
+- struct in6_ifreq ireq;
+- int err;
+-
+- if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+- return -EFAULT;
+-
+- rtnl_lock();
+- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
+- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+- rtnl_unlock();
+- return err;
+-}
+-
+-int addrconf_del_ifaddr(void __user *arg)
+-{
+- struct in6_ifreq ireq;
+- int err;
+-
+- if (!capable(CAP_NET_ADMIN))
+- return -EPERM;
+-
+- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+- return -EFAULT;
+-
+- rtnl_lock();
+- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+- rtnl_unlock();
+- return err;
+-}
+-
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+-static void sit_add_v4_addrs(struct inet6_dev *idev)
+-{
+- struct inet6_ifaddr * ifp;
+- struct in6_addr addr;
+- struct net_device *dev;
+- int scope;
+-
+- ASSERT_RTNL();
+-
+- memset(&addr, 0, sizeof(struct in6_addr));
+- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+-
+- if (idev->dev->flags&IFF_POINTOPOINT) {
+- addr.s6_addr32[0] = htonl(0xfe800000);
+- scope = IFA_LINK;
+- } else {
+- scope = IPV6_ADDR_COMPATv4;
+- }
+-
+- if (addr.s6_addr32[3]) {
+- ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
+- if (!IS_ERR(ifp)) {
+- spin_lock_bh(&ifp->lock);
+- ifp->flags &= ~IFA_F_TENTATIVE;
+- spin_unlock_bh(&ifp->lock);
+- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+- in6_ifa_put(ifp);
+- }
+- return;
+- }
+-
+- for_each_netdev(dev) {
+- struct in_device * in_dev = __in_dev_get_rtnl(dev);
+- if (in_dev && (dev->flags & IFF_UP)) {
+- struct in_ifaddr * ifa;
+-
+- int flag = scope;
+-
+- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+- int plen;
+-
+- addr.s6_addr32[3] = ifa->ifa_local;
+-
+- if (ifa->ifa_scope == RT_SCOPE_LINK)
+- continue;
+- if (ifa->ifa_scope >= RT_SCOPE_HOST) {
+- if (idev->dev->flags&IFF_POINTOPOINT)
+- continue;
+- flag |= IFA_HOST;
+- }
+- if (idev->dev->flags&IFF_POINTOPOINT)
+- plen = 64;
+- else
+- plen = 96;
+-
+- ifp = ipv6_add_addr(idev, &addr, plen, flag,
+- IFA_F_PERMANENT);
+- if (!IS_ERR(ifp)) {
+- spin_lock_bh(&ifp->lock);
+- ifp->flags &= ~IFA_F_TENTATIVE;
+- spin_unlock_bh(&ifp->lock);
+- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+- in6_ifa_put(ifp);
+- }
+- }
+- }
+- }
+-}
+-#endif
+-
+-static void init_loopback(struct net_device *dev)
+-{
+- struct inet6_dev *idev;
+- struct inet6_ifaddr * ifp;
+-
+- /* ::1 */
+-
+- ASSERT_RTNL();
+-
+- if ((idev = ipv6_find_idev(dev)) == NULL) {
+- printk(KERN_DEBUG "init loopback: add_dev failed\n");
+- return;
+- }
+-
+- ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
+- if (!IS_ERR(ifp)) {
+- spin_lock_bh(&ifp->lock);
+- ifp->flags &= ~IFA_F_TENTATIVE;
+- spin_unlock_bh(&ifp->lock);
+- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+- in6_ifa_put(ifp);
+- }
+-}
+-
+-static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
+-{
+- struct inet6_ifaddr * ifp;
+- u32 addr_flags = IFA_F_PERMANENT;
+-
+-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+- if (idev->cnf.optimistic_dad &&
+- !ipv6_devconf.forwarding)
+- addr_flags |= IFA_F_OPTIMISTIC;
+-#endif
+-
+-
+- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
+- if (!IS_ERR(ifp)) {
+- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
+- addrconf_dad_start(ifp, 0);
+- in6_ifa_put(ifp);
+- }
+-}
+-
+-static void addrconf_dev_config(struct net_device *dev)
+-{
+- struct in6_addr addr;
+- struct inet6_dev * idev;
+-
+- ASSERT_RTNL();
+-
+- if ((dev->type != ARPHRD_ETHER) &&
+- (dev->type != ARPHRD_FDDI) &&
+- (dev->type != ARPHRD_IEEE802_TR) &&
+- (dev->type != ARPHRD_ARCNET) &&
+- (dev->type != ARPHRD_INFINIBAND)) {
+- /* Alas, we support only Ethernet autoconfiguration. */
+- return;
+- }
+-
+- idev = addrconf_add_dev(dev);
+- if (idev == NULL)
+- return;
+-
+- memset(&addr, 0, sizeof(struct in6_addr));
+- addr.s6_addr32[0] = htonl(0xFE800000);
+-
+- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
+- addrconf_add_linklocal(idev, &addr);
+-}
+-
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+-static void addrconf_sit_config(struct net_device *dev)
+-{
+- struct inet6_dev *idev;
+-
+- ASSERT_RTNL();
+-
+- /*
+- * Configure the tunnel with one of our IPv4
+- * addresses... we should configure all of
+- * our v4 addrs in the tunnel
+- */
+-
+- if ((idev = ipv6_find_idev(dev)) == NULL) {
+- printk(KERN_DEBUG "init sit: add_dev failed\n");
+- return;
+- }
+-
+- sit_add_v4_addrs(idev);
+-
+- if (dev->flags&IFF_POINTOPOINT) {
+- addrconf_add_mroute(dev);
+- addrconf_add_lroute(dev);
+- } else
+- sit_route_add(dev);
+-}
+-#endif
+-
+-static inline int
+-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
+-{
+- struct in6_addr lladdr;
+-
+- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
+- addrconf_add_linklocal(idev, &lladdr);
+- return 0;
+- }
+- return -1;
+-}
+-
+-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
+-{
+- struct net_device *link_dev;
+-
+- /* first try to inherit the link-local address from the link device */
+- if (idev->dev->iflink &&
+- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
+- if (!ipv6_inherit_linklocal(idev, link_dev))
+- return;
+- }
+- /* then try to inherit it from any device */
+- for_each_netdev(link_dev) {
+- if (!ipv6_inherit_linklocal(idev, link_dev))
+- return;
+- }
+- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
+-}
+-
+-/*
+- * Autoconfigure tunnel with a link-local address so routing protocols,
+- * DHCPv6, MLD etc. can be run over the virtual link
+- */
+-
+-static void addrconf_ip6_tnl_config(struct net_device *dev)
+-{
+- struct inet6_dev *idev;
+-
+- ASSERT_RTNL();
+-
+- if ((idev = addrconf_add_dev(dev)) == NULL) {
+- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+- return;
+- }
+- ip6_tnl_add_linklocal(idev);
+-}
+-
+-static int addrconf_notify(struct notifier_block *this, unsigned long event,
+- void * data)
+-{
+- struct net_device *dev = (struct net_device *) data;
+- struct inet6_dev *idev = __in6_dev_get(dev);
+- int run_pending = 0;
+-
+- switch(event) {
+- case NETDEV_REGISTER:
+- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+- idev = ipv6_add_dev(dev);
+- if (!idev)
+- printk(KERN_WARNING "IPv6: add_dev failed for %s\n",
+- dev->name);
+- }
+- break;
+- case NETDEV_UP:
+- case NETDEV_CHANGE:
+- if (event == NETDEV_UP) {
+- if (!addrconf_qdisc_ok(dev)) {
+- /* device is not ready yet. */
+- printk(KERN_INFO
+- "ADDRCONF(NETDEV_UP): %s: "
+- "link is not ready\n",
+- dev->name);
+- break;
+- }
+-
+- if (idev)
+- idev->if_flags |= IF_READY;
+- } else {
+- if (!addrconf_qdisc_ok(dev)) {
+- /* device is still not ready. */
+- break;
+- }
+-
+- if (idev) {
+- if (idev->if_flags & IF_READY) {
+- /* device is already configured. */
+- break;
+- }
+- idev->if_flags |= IF_READY;
+- }
+-
+- printk(KERN_INFO
+- "ADDRCONF(NETDEV_CHANGE): %s: "
+- "link becomes ready\n",
+- dev->name);
+-
+- run_pending = 1;
+- }
+-
+- switch(dev->type) {
+-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
+- case ARPHRD_SIT:
+- addrconf_sit_config(dev);
+- break;
+-#endif
+- case ARPHRD_TUNNEL6:
+- addrconf_ip6_tnl_config(dev);
+- break;
+- case ARPHRD_LOOPBACK:
+- init_loopback(dev);
+- break;
+-
+- default:
+- addrconf_dev_config(dev);
+- break;
+- }
+- if (idev) {
+- if (run_pending)
+- addrconf_dad_run(idev);
+-
+- /* If the MTU changed during the interface down, when the
+- interface up, the changed MTU must be reflected in the
+- idev as well as routers.
+- */
+- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+- rt6_mtu_change(dev, dev->mtu);
+- idev->cnf.mtu6 = dev->mtu;
+- }
+- idev->tstamp = jiffies;
+- inet6_ifinfo_notify(RTM_NEWLINK, idev);
+- /* If the changed mtu during down is lower than IPV6_MIN_MTU
+- stop IPv6 on this interface.
+- */
+- if (dev->mtu < IPV6_MIN_MTU)
+- addrconf_ifdown(dev, event != NETDEV_DOWN);
+- }
+- break;
+-
+- case NETDEV_CHANGEMTU:
+- if ( idev && dev->mtu >= IPV6_MIN_MTU) {
+- rt6_mtu_change(dev, dev->mtu);
+- idev->cnf.mtu6 = dev->mtu;
+- break;
+- }
+-
+- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+-
+- case NETDEV_DOWN:
+- case NETDEV_UNREGISTER:
+- /*
+- * Remove all addresses from this interface.
+- */
+- addrconf_ifdown(dev, event != NETDEV_DOWN);
+- break;
+-
+- case NETDEV_CHANGENAME:
+- if (idev) {
+- snmp6_unregister_dev(idev);
+-#ifdef CONFIG_SYSCTL
+- addrconf_sysctl_unregister(&idev->cnf);
+- neigh_sysctl_unregister(idev->nd_parms);
+- neigh_sysctl_register(dev, idev->nd_parms,
+- NET_IPV6, NET_IPV6_NEIGH, "ipv6",
+- &ndisc_ifinfo_sysctl_change,
+- NULL);
+- addrconf_sysctl_register(idev, &idev->cnf);
+-#endif
+- snmp6_register_dev(idev);
+- }
+- break;
+- }
+-
+- return NOTIFY_OK;
+-}
+-
+-/*
+- * addrconf module should be notified of a device going up
+- */
+-static struct notifier_block ipv6_dev_notf = {
+- .notifier_call = addrconf_notify,
+- .priority = 0
+-};
+-
+-static int addrconf_ifdown(struct net_device *dev, int how)
+-{
+- struct inet6_dev *idev;
+- struct inet6_ifaddr *ifa, **bifa;
+- int i;
+-
+- ASSERT_RTNL();
+-
+- if (dev == &loopback_dev && how == 1)
+- how = 0;
+-
+- rt6_ifdown(dev);
+- neigh_ifdown(&nd_tbl, dev);
+-
+- idev = __in6_dev_get(dev);
+- if (idev == NULL)
+- return -ENODEV;
+-
+- /* Step 1: remove reference to ipv6 device from parent device.
+- Do not dev_put!
+- */
+- if (how == 1) {
+- idev->dead = 1;
+-
+- /* protected by rtnl_lock */
+- rcu_assign_pointer(dev->ip6_ptr, NULL);
+-
+- /* Step 1.5: remove snmp6 entry */
+- snmp6_unregister_dev(idev);
+-
+- }
+-
+- /* Step 2: clear hash table */
+- for (i=0; i<IN6_ADDR_HSIZE; i++) {
+- bifa = &inet6_addr_lst[i];
+-
+- write_lock_bh(&addrconf_hash_lock);
+- while ((ifa = *bifa) != NULL) {
+- if (ifa->idev == idev) {
+- *bifa = ifa->lst_next;
+- ifa->lst_next = NULL;
+- addrconf_del_timer(ifa);
+- in6_ifa_put(ifa);
+- continue;
+- }
+- bifa = &ifa->lst_next;
+- }
+- write_unlock_bh(&addrconf_hash_lock);
+- }
+-
+- write_lock_bh(&idev->lock);
+-
+- /* Step 3: clear flags for stateless addrconf */
+- if (how != 1)
+- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
+-
+- /* Step 4: clear address list */
+-#ifdef CONFIG_IPV6_PRIVACY
+- if (how == 1 && del_timer(&idev->regen_timer))
+- in6_dev_put(idev);
+-
+- /* clear tempaddr list */
+- while ((ifa = idev->tempaddr_list) != NULL) {
+- idev->tempaddr_list = ifa->tmp_next;
+- ifa->tmp_next = NULL;
+- ifa->dead = 1;
+- write_unlock_bh(&idev->lock);
+- spin_lock_bh(&ifa->lock);
+-
+- if (ifa->ifpub) {
+- in6_ifa_put(ifa->ifpub);
+- ifa->ifpub = NULL;
+- }
+- spin_unlock_bh(&ifa->lock);
+- in6_ifa_put(ifa);
+- write_lock_bh(&idev->lock);
+- }
+-#endif
+- while ((ifa = idev->addr_list) != NULL) {
+- idev->addr_list = ifa->if_next;
+- ifa->if_next = NULL;
+- ifa->dead = 1;
+- addrconf_del_timer(ifa);
+- write_unlock_bh(&idev->lock);
+-
+- __ipv6_ifa_notify(RTM_DELADDR, ifa);
+- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+- in6_ifa_put(ifa);
+-
+- write_lock_bh(&idev->lock);
+- }
+- write_unlock_bh(&idev->lock);
+-
+- /* Step 5: Discard multicast list */
+-
+- if (how == 1)
+- ipv6_mc_destroy_dev(idev);
+- else
+- ipv6_mc_down(idev);
+-
+- /* Step 5: netlink notification of this interface */
+- idev->tstamp = jiffies;
+- inet6_ifinfo_notify(RTM_DELLINK, idev);
+-
+- /* Shot the device (if unregistered) */
+-
+- if (how == 1) {
+-#ifdef CONFIG_SYSCTL
+- addrconf_sysctl_unregister(&idev->cnf);
+- neigh_sysctl_unregister(idev->nd_parms);
+-#endif
+- neigh_parms_release(&nd_tbl, idev->nd_parms);
+- neigh_ifdown(&nd_tbl, dev);
+- in6_dev_put(idev);
+- }
+- return 0;
+-}
+-
+-static void addrconf_rs_timer(unsigned long data)
+-{
+- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+-
+- if (ifp->idev->cnf.forwarding)
+- goto out;
+-
+- if (ifp->idev->if_flags & IF_RA_RCVD) {
+- /*
+- * Announcement received after solicitation
+- * was sent
+- */
+- goto out;
+- }
+-
+- spin_lock(&ifp->lock);
+- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+- struct in6_addr all_routers;
+-
+- /* The wait after the last probe can be shorter */
+- addrconf_mod_timer(ifp, AC_RS,
+- (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
+- ifp->idev->cnf.rtr_solicit_delay :
+- ifp->idev->cnf.rtr_solicit_interval);
+- spin_unlock(&ifp->lock);
+-
+- ipv6_addr_all_routers(&all_routers);
+-
+- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+- } else {
+- spin_unlock(&ifp->lock);
+- /*
+- * Note: we do not support deprecated "all on-link"
+- * assumption any longer.
+- */
+- printk(KERN_DEBUG "%s: no IPv6 routers present\n",
+- ifp->idev->dev->name);
+- }
+-
+-out:
+- in6_ifa_put(ifp);
+-}
+-
+-/*
+- * Duplicate Address Detection
+- */
+-static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
+-{
+- unsigned long rand_num;
+- struct inet6_dev *idev = ifp->idev;
+-
+- if (ifp->flags & IFA_F_OPTIMISTIC)
+- rand_num = 0;
+- else
+- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+-
+- ifp->probes = idev->cnf.dad_transmits;
+- addrconf_mod_timer(ifp, AC_DAD, rand_num);
+-}
+-
+-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+-{
+- struct inet6_dev *idev = ifp->idev;
+- struct net_device *dev = idev->dev;
+-
+- addrconf_join_solict(dev, &ifp->addr);
+-
+- net_srandom(ifp->addr.s6_addr32[3]);
+-
+- read_lock_bh(&idev->lock);
+- if (ifp->dead)
+- goto out;
+- spin_lock_bh(&ifp->lock);
+-
+- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+- !(ifp->flags&IFA_F_TENTATIVE) ||
+- ifp->flags & IFA_F_NODAD) {
+- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
+- spin_unlock_bh(&ifp->lock);
+- read_unlock_bh(&idev->lock);
+-
+- addrconf_dad_completed(ifp);
+- return;
+- }
+-
+- if (!(idev->if_flags & IF_READY)) {
+- spin_unlock_bh(&ifp->lock);
+- read_unlock_bh(&idev->lock);
+- /*
+- * If the defice is not ready:
+- * - keep it tentative if it is a permanent address.
+- * - otherwise, kill it.
+- */
+- in6_ifa_hold(ifp);
+- addrconf_dad_stop(ifp);
+- return;
+- }
+-
+- /*
+- * Optimistic nodes can start receiving
+- * Frames right away
+- */
+- if(ifp->flags & IFA_F_OPTIMISTIC)
+- ip6_ins_rt(ifp->rt);
+-
+- addrconf_dad_kick(ifp);
+- spin_unlock_bh(&ifp->lock);
+-out:
+- read_unlock_bh(&idev->lock);
+-}
+-
+-static void addrconf_dad_timer(unsigned long data)
+-{
+- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+- struct inet6_dev *idev = ifp->idev;
+- struct in6_addr unspec;
+- struct in6_addr mcaddr;
+-
+- read_lock_bh(&idev->lock);
+- if (idev->dead) {
+- read_unlock_bh(&idev->lock);
+- goto out;
+- }
+- spin_lock_bh(&ifp->lock);
+- if (ifp->probes == 0) {
+- /*
+- * DAD was successful
+- */
+-
+- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
+- spin_unlock_bh(&ifp->lock);
+- read_unlock_bh(&idev->lock);
+-
+- addrconf_dad_completed(ifp);
+-
+- goto out;
+- }
+-
+- ifp->probes--;
+- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+- spin_unlock_bh(&ifp->lock);
+- read_unlock_bh(&idev->lock);
+-
+- /* send a neighbour solicitation for our addr */
+- memset(&unspec, 0, sizeof(unspec));
+- addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
+- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+-out:
+- in6_ifa_put(ifp);
+-}
+-
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+-{
+- struct net_device * dev = ifp->idev->dev;
+-
+- /*
+- * Configure the address for reception. Now it is valid.
+- */
+-
+- ipv6_ifa_notify(RTM_NEWADDR, ifp);
+-
+- /* If added prefix is link local and forwarding is off,
+- start sending router solicitations.
+- */
+-
+- if (ifp->idev->cnf.forwarding == 0 &&
+- ifp->idev->cnf.rtr_solicits > 0 &&
+- (dev->flags&IFF_LOOPBACK) == 0 &&
+- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+- struct in6_addr all_routers;
+-
+- ipv6_addr_all_routers(&all_routers);
+-
+- /*
+- * If a host as already performed a random delay
+- * [...] as part of DAD [...] there is no need
+- * to delay again before sending the first RS
+- */
+- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+-
+- spin_lock_bh(&ifp->lock);
+- ifp->probes = 1;
+- ifp->idev->if_flags |= IF_RS_SENT;
+- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
+- spin_unlock_bh(&ifp->lock);
+- }
+-}
+-
+-static void addrconf_dad_run(struct inet6_dev *idev) {
+- struct inet6_ifaddr *ifp;
+-
+- read_lock_bh(&idev->lock);
+- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+- spin_lock_bh(&ifp->lock);
+- if (!(ifp->flags & IFA_F_TENTATIVE)) {
+- spin_unlock_bh(&ifp->lock);
+- continue;
+- }
+- spin_unlock_bh(&ifp->lock);
+- addrconf_dad_kick(ifp);
+- }
+- read_unlock_bh(&idev->lock);
+-}
+-
+-#ifdef CONFIG_PROC_FS
+-struct if6_iter_state {
+- int bucket;
+-};
+-
+-static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
+-{
+- struct inet6_ifaddr *ifa = NULL;
+- struct if6_iter_state *state = seq->private;
+-
+- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+- ifa = inet6_addr_lst[state->bucket];
+- if (ifa)
+- break;
+- }
+- return ifa;
+-}
+-
+-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+-{
+- struct if6_iter_state *state = seq->private;
+-
+- ifa = ifa->lst_next;
+-try_again:
+- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
+- ifa = inet6_addr_lst[state->bucket];
+- goto try_again;
+- }
+- return ifa;
+-}
+-
+-static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
+-{
+- struct inet6_ifaddr *ifa = if6_get_first(seq);
+-
+- if (ifa)
+- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+- --pos;
+- return pos ? NULL : ifa;
+-}
+-
+-static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+- read_lock_bh(&addrconf_hash_lock);
+- return if6_get_idx(seq, *pos);
+-}
+-
+-static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+- struct inet6_ifaddr *ifa;
+-
+- ifa = if6_get_next(seq, v);
+- ++*pos;
+- return ifa;
+-}
+-
+-static void if6_seq_stop(struct seq_file *seq, void *v)
+-{
+- read_unlock_bh(&addrconf_hash_lock);
+-}
+-
+-static int if6_seq_show(struct seq_file *seq, void *v)
+-{
+- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+- seq_printf(seq,
+- NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
+- NIP6(ifp->addr),
+- ifp->idev->dev->ifindex,
+- ifp->prefix_len,
+- ifp->scope,
+- ifp->flags,
+- ifp->idev->dev->name);
+- return 0;
+-}
+-
+-static struct seq_operations if6_seq_ops = {
+- .start = if6_seq_start,
+- .next = if6_seq_next,
+- .show = if6_seq_show,
+- .stop = if6_seq_stop,
+-};
+-
+-static int if6_seq_open(struct inode *inode, struct file *file)
+-{
+- struct seq_file *seq;
+- int rc = -ENOMEM;
+- struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
+-
+- if (!s)
+- goto out;
+-
+- rc = seq_open(file, &if6_seq_ops);
+- if (rc)
+- goto out_kfree;
+-
+- seq = file->private_data;
+- seq->private = s;
+-out:
+- return rc;
+-out_kfree:
+- kfree(s);
+- goto out;
+-}
+-
+-static const struct file_operations if6_fops = {
+- .owner = THIS_MODULE,
+- .open = if6_seq_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = seq_release_private,
+-};
+-
+-int __init if6_proc_init(void)
+-{
+- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+- return -ENOMEM;
+- return 0;
+-}
+-
+-void if6_proc_exit(void)
+-{
+- proc_net_remove("if_inet6");
+-}
+-#endif /* CONFIG_PROC_FS */
+-
+-#ifdef CONFIG_IPV6_MIP6
+-/* Check if address is a home address configured on any interface. */
+-int ipv6_chk_home_addr(struct in6_addr *addr)
+-{
+- int ret = 0;
+- struct inet6_ifaddr * ifp;
+- u8 hash = ipv6_addr_hash(addr);
+- read_lock_bh(&addrconf_hash_lock);
+- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
+- (ifp->flags & IFA_F_HOMEADDRESS)) {
+- ret = 1;
+- break;
+- }
+- }
+- read_unlock_bh(&addrconf_hash_lock);
+- return ret;
+-}
+-#endif
+-
+-/*
+- * Periodic address status verification
+- */
+-
+-static void addrconf_verify(unsigned long foo)
+-{
+- struct inet6_ifaddr *ifp;
+- unsigned long now, next;
+- int i;
+-
+- spin_lock_bh(&addrconf_verify_lock);
+- now = jiffies;
+- next = now + ADDR_CHECK_FREQUENCY;
+-
+- del_timer(&addr_chk_timer);
+-
+- for (i=0; i < IN6_ADDR_HSIZE; i++) {
+-
+-restart:
+- read_lock(&addrconf_hash_lock);
+- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+- unsigned long age;
+-#ifdef CONFIG_IPV6_PRIVACY
+- unsigned long regen_advance;
+-#endif
+-
+- if (ifp->flags & IFA_F_PERMANENT)
+- continue;
+-
+- spin_lock(&ifp->lock);
+- age = (now - ifp->tstamp) / HZ;
+-
+-#ifdef CONFIG_IPV6_PRIVACY
+- regen_advance = ifp->idev->cnf.regen_max_retry *
+- ifp->idev->cnf.dad_transmits *
+- ifp->idev->nd_parms->retrans_time / HZ;
+-#endif
+-
+- if (ifp->valid_lft != INFINITY_LIFE_TIME &&
+- age >= ifp->valid_lft) {
+- spin_unlock(&ifp->lock);
+- in6_ifa_hold(ifp);
+- read_unlock(&addrconf_hash_lock);
+- ipv6_del_addr(ifp);
+- goto restart;
+- } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
+- spin_unlock(&ifp->lock);
+- continue;
+- } else if (age >= ifp->prefered_lft) {
+- /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
+- int deprecate = 0;
+-
+- if (!(ifp->flags&IFA_F_DEPRECATED)) {
+- deprecate = 1;
+- ifp->flags |= IFA_F_DEPRECATED;
+- }
+-
+- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+- next = ifp->tstamp + ifp->valid_lft * HZ;
+-
+- spin_unlock(&ifp->lock);
+-
+- if (deprecate) {
+- in6_ifa_hold(ifp);
+- read_unlock(&addrconf_hash_lock);
+-
+- ipv6_ifa_notify(0, ifp);
+- in6_ifa_put(ifp);
+- goto restart;
+- }
+-#ifdef CONFIG_IPV6_PRIVACY
+- } else if ((ifp->flags&IFA_F_TEMPORARY) &&
+- !(ifp->flags&IFA_F_TENTATIVE)) {
+- if (age >= ifp->prefered_lft - regen_advance) {
+- struct inet6_ifaddr *ifpub = ifp->ifpub;
+- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+- next = ifp->tstamp + ifp->prefered_lft * HZ;
+- if (!ifp->regen_count && ifpub) {
+- ifp->regen_count++;
+- in6_ifa_hold(ifp);
+- in6_ifa_hold(ifpub);
+- spin_unlock(&ifp->lock);
+- read_unlock(&addrconf_hash_lock);
+- spin_lock(&ifpub->lock);
+- ifpub->regen_count = 0;
+- spin_unlock(&ifpub->lock);
+- ipv6_create_tempaddr(ifpub, ifp);
+- in6_ifa_put(ifpub);
+- in6_ifa_put(ifp);
+- goto restart;
+- }
+- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+- spin_unlock(&ifp->lock);
+-#endif
+- } else {
+- /* ifp->prefered_lft <= ifp->valid_lft */
+- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+- next = ifp->tstamp + ifp->prefered_lft * HZ;
+- spin_unlock(&ifp->lock);
+- }
+- }
+- read_unlock(&addrconf_hash_lock);
+- }
+-
+- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+- add_timer(&addr_chk_timer);
+- spin_unlock_bh(&addrconf_verify_lock);
+-}
+-
+-static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+-{
+- struct in6_addr *pfx = NULL;
+-
+- if (addr)
+- pfx = nla_data(addr);
+-
+- if (local) {
+- if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+- pfx = NULL;
+- else
+- pfx = nla_data(local);
+- }
+-
+- return pfx;
+-}
+-
+-static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
+- [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
+- [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
+- [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
+-};
+-
+-static int
+-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+-{
+- struct ifaddrmsg *ifm;
+- struct nlattr *tb[IFA_MAX+1];
+- struct in6_addr *pfx;
+- int err;
+-
+- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+- if (err < 0)
+- return err;
+-
+- ifm = nlmsg_data(nlh);
+- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+- if (pfx == NULL)
+- return -EINVAL;
+-
+- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+-}
+-
+-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+- u32 prefered_lft, u32 valid_lft)
+-{
+- u32 flags = RTF_EXPIRES;
+-
+- if (!valid_lft || (prefered_lft > valid_lft))
+- return -EINVAL;
+-
+- if (valid_lft == INFINITY_LIFE_TIME) {
+- ifa_flags |= IFA_F_PERMANENT;
+- flags = 0;
+- } else if (valid_lft >= 0x7FFFFFFF/HZ)
+- valid_lft = 0x7FFFFFFF/HZ;
+-
+- if (prefered_lft == 0)
+- ifa_flags |= IFA_F_DEPRECATED;
+- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
+- (prefered_lft != INFINITY_LIFE_TIME))
+- prefered_lft = 0x7FFFFFFF/HZ;
+-
+- spin_lock_bh(&ifp->lock);
+- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
+- ifp->tstamp = jiffies;
+- ifp->valid_lft = valid_lft;
+- ifp->prefered_lft = prefered_lft;
+-
+- spin_unlock_bh(&ifp->lock);
+- if (!(ifp->flags&IFA_F_TENTATIVE))
+- ipv6_ifa_notify(0, ifp);
+-
+- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
+- jiffies_to_clock_t(valid_lft * HZ), flags);
+- addrconf_verify(0);
+-
+- return 0;
+-}
+-
+-static int
+-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+-{
+- struct ifaddrmsg *ifm;
+- struct nlattr *tb[IFA_MAX+1];
+- struct in6_addr *pfx;
+- struct inet6_ifaddr *ifa;
+- struct net_device *dev;
+- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+- u8 ifa_flags;
+- int err;
+-
+- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+- if (err < 0)
+- return err;
+-
+- ifm = nlmsg_data(nlh);
+- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+- if (pfx == NULL)
+- return -EINVAL;
+-
+- if (tb[IFA_CACHEINFO]) {
+- struct ifa_cacheinfo *ci;
+-
+- ci = nla_data(tb[IFA_CACHEINFO]);
+- valid_lft = ci->ifa_valid;
+- preferred_lft = ci->ifa_prefered;
+- } else {
+- preferred_lft = INFINITY_LIFE_TIME;
+- valid_lft = INFINITY_LIFE_TIME;
+- }
+-
+- dev = __dev_get_by_index(ifm->ifa_index);
+- if (dev == NULL)
+- return -ENODEV;
+-
+- /* We ignore other flags so far. */
+- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+-
+- ifa = ipv6_get_ifaddr(pfx, dev, 1);
+- if (ifa == NULL) {
+- /*
+- * It would be best to check for !NLM_F_CREATE here but
+- * userspace alreay relies on not having to provide this.
+- */
+- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
+- ifa_flags, preferred_lft, valid_lft);
+- }
+-
+- if (nlh->nlmsg_flags & NLM_F_EXCL ||
+- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+- err = -EEXIST;
+- else
+- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+-
+- in6_ifa_put(ifa);
+-
+- return err;
+-}
+-
+-static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+- u8 scope, int ifindex)
+-{
+- struct ifaddrmsg *ifm;
+-
+- ifm = nlmsg_data(nlh);
+- ifm->ifa_family = AF_INET6;
+- ifm->ifa_prefixlen = prefixlen;
+- ifm->ifa_flags = flags;
+- ifm->ifa_scope = scope;
+- ifm->ifa_index = ifindex;
+-}
+-
+-static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+- unsigned long tstamp, u32 preferred, u32 valid)
+-{
+- struct ifa_cacheinfo ci;
+-
+- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
+- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
+- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+- ci.ifa_prefered = preferred;
+- ci.ifa_valid = valid;
+-
+- return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+-}
+-
+-static inline int rt_scope(int ifa_scope)
+-{
+- if (ifa_scope & IFA_HOST)
+- return RT_SCOPE_HOST;
+- else if (ifa_scope & IFA_LINK)
+- return RT_SCOPE_LINK;
+- else if (ifa_scope & IFA_SITE)
+- return RT_SCOPE_SITE;
+- else
+- return RT_SCOPE_UNIVERSE;
+-}
+-
+-static inline int inet6_ifaddr_msgsize(void)
+-{
+- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+- + nla_total_size(16) /* IFA_ADDRESS */
+- + nla_total_size(sizeof(struct ifa_cacheinfo));
+-}
+-
+-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+- u32 pid, u32 seq, int event, unsigned int flags)
+-{
+- struct nlmsghdr *nlh;
+- u32 preferred, valid;
+-
+- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+- ifa->idev->dev->ifindex);
+-
+- if (!(ifa->flags&IFA_F_PERMANENT)) {
+- preferred = ifa->prefered_lft;
+- valid = ifa->valid_lft;
+- if (preferred != INFINITY_LIFE_TIME) {
+- long tval = (jiffies - ifa->tstamp)/HZ;
+- preferred -= tval;
+- if (valid != INFINITY_LIFE_TIME)
+- valid -= tval;
+- }
+- } else {
+- preferred = INFINITY_LIFE_TIME;
+- valid = INFINITY_LIFE_TIME;
+- }
+-
+- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
+- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+- }
+-
+- return nlmsg_end(skb, nlh);
+-}
+-
+-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+- u32 pid, u32 seq, int event, u16 flags)
+-{
+- struct nlmsghdr *nlh;
+- u8 scope = RT_SCOPE_UNIVERSE;
+- int ifindex = ifmca->idev->dev->ifindex;
+-
+- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+- scope = RT_SCOPE_SITE;
+-
+- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+- if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+- }
+-
+- return nlmsg_end(skb, nlh);
+-}
+-
+-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+- u32 pid, u32 seq, int event, unsigned int flags)
+-{
+- struct nlmsghdr *nlh;
+- u8 scope = RT_SCOPE_UNIVERSE;
+- int ifindex = ifaca->aca_idev->dev->ifindex;
+-
+- if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+- scope = RT_SCOPE_SITE;
+-
+- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+- if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+- }
+-
+- return nlmsg_end(skb, nlh);
+-}
+-
+-enum addr_type_t
+-{
+- UNICAST_ADDR,
+- MULTICAST_ADDR,
+- ANYCAST_ADDR,
+-};
+-
+-static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
+- enum addr_type_t type)
+-{
+- int idx, ip_idx;
+- int s_idx, s_ip_idx;
+- int err = 1;
+- struct net_device *dev;
+- struct inet6_dev *idev = NULL;
+- struct inet6_ifaddr *ifa;
+- struct ifmcaddr6 *ifmca;
+- struct ifacaddr6 *ifaca;
+-
+- s_idx = cb->args[0];
+- s_ip_idx = ip_idx = cb->args[1];
+-
+- idx = 0;
+- for_each_netdev(dev) {
+- if (idx < s_idx)
+- goto cont;
+- if (idx > s_idx)
+- s_ip_idx = 0;
+- ip_idx = 0;
+- if ((idev = in6_dev_get(dev)) == NULL)
+- goto cont;
+- read_lock_bh(&idev->lock);
+- switch (type) {
+- case UNICAST_ADDR:
+- /* unicast address incl. temp addr */
+- for (ifa = idev->addr_list; ifa;
+- ifa = ifa->if_next, ip_idx++) {
+- if (ip_idx < s_ip_idx)
+- continue;
+- if ((err = inet6_fill_ifaddr(skb, ifa,
+- NETLINK_CB(cb->skb).pid,
+- cb->nlh->nlmsg_seq, RTM_NEWADDR,
+- NLM_F_MULTI)) <= 0)
+- goto done;
+- }
+- break;
+- case MULTICAST_ADDR:
+- /* multicast address */
+- for (ifmca = idev->mc_list; ifmca;
+- ifmca = ifmca->next, ip_idx++) {
+- if (ip_idx < s_ip_idx)
+- continue;
+- if ((err = inet6_fill_ifmcaddr(skb, ifmca,
+- NETLINK_CB(cb->skb).pid,
+- cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
+- NLM_F_MULTI)) <= 0)
+- goto done;
+- }
+- break;
+- case ANYCAST_ADDR:
+- /* anycast address */
+- for (ifaca = idev->ac_list; ifaca;
+- ifaca = ifaca->aca_next, ip_idx++) {
+- if (ip_idx < s_ip_idx)
+- continue;
+- if ((err = inet6_fill_ifacaddr(skb, ifaca,
+- NETLINK_CB(cb->skb).pid,
+- cb->nlh->nlmsg_seq, RTM_GETANYCAST,
+- NLM_F_MULTI)) <= 0)
+- goto done;
+- }
+- break;
+- default:
+- break;
+- }
+- read_unlock_bh(&idev->lock);
+- in6_dev_put(idev);
+-cont:
+- idx++;
+- }
+-done:
+- if (err <= 0) {
+- read_unlock_bh(&idev->lock);
+- in6_dev_put(idev);
+- }
+- cb->args[0] = idx;
+- cb->args[1] = ip_idx;
+- return skb->len;
+-}
+-
+-static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- enum addr_type_t type = UNICAST_ADDR;
+- return inet6_dump_addr(skb, cb, type);
+-}
+-
+-static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- enum addr_type_t type = MULTICAST_ADDR;
+- return inet6_dump_addr(skb, cb, type);
+-}
+-
+-
+-static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- enum addr_type_t type = ANYCAST_ADDR;
+- return inet6_dump_addr(skb, cb, type);
+-}
+-
+-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+- void *arg)
+-{
+- struct ifaddrmsg *ifm;
+- struct nlattr *tb[IFA_MAX+1];
+- struct in6_addr *addr = NULL;
+- struct net_device *dev = NULL;
+- struct inet6_ifaddr *ifa;
+- struct sk_buff *skb;
+- int err;
+-
+- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+- if (err < 0)
+- goto errout;
+-
+- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+- if (addr == NULL) {
+- err = -EINVAL;
+- goto errout;
+- }
+-
+- ifm = nlmsg_data(nlh);
+- if (ifm->ifa_index)
+- dev = __dev_get_by_index(ifm->ifa_index);
+-
+- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+- err = -EADDRNOTAVAIL;
+- goto errout;
+- }
+-
+- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+- err = -ENOBUFS;
+- goto errout_ifa;
+- }
+-
+- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+- nlh->nlmsg_seq, RTM_NEWADDR, 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout_ifa;
+- }
+- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+-errout_ifa:
+- in6_ifa_put(ifa);
+-errout:
+- return err;
+-}
+-
+-static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+-{
+- struct sk_buff *skb;
+- int err = -ENOBUFS;
+-
+- skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
+- if (skb == NULL)
+- goto errout;
+-
+- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
+-}
+-
+-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+- __s32 *array, int bytes)
+-{
+- BUG_ON(bytes < (DEVCONF_MAX * 4));
+-
+- memset(array, 0, bytes);
+- array[DEVCONF_FORWARDING] = cnf->forwarding;
+- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
+- array[DEVCONF_MTU6] = cnf->mtu6;
+- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
+- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
+- array[DEVCONF_AUTOCONF] = cnf->autoconf;
+- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
+- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
+- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+-#ifdef CONFIG_IPV6_PRIVACY
+- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
+- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
+- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
+- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
+- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
+-#endif
+- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
+- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+-#ifdef CONFIG_IPV6_ROUTER_PREF
+- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
+-#ifdef CONFIG_IPV6_ROUTE_INFO
+- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+-#endif
+-#endif
+- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+-#endif
+-}
+-
+-static inline size_t inet6_if_nlmsg_size(void)
+-{
+- return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+- + nla_total_size(4) /* IFLA_MTU */
+- + nla_total_size(4) /* IFLA_LINK */
+- + nla_total_size( /* IFLA_PROTINFO */
+- nla_total_size(4) /* IFLA_INET6_FLAGS */
+- + nla_total_size(sizeof(struct ifla_cacheinfo))
+- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+- );
+-}
+-
+-static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
+- int bytes)
+-{
+- int i;
+- int pad = bytes - sizeof(u64) * items;
+- BUG_ON(pad < 0);
+-
+- /* Use put_unaligned() because stats may not be aligned for u64. */
+- put_unaligned(items, &stats[0]);
+- for (i = 1; i < items; i++)
+- put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+-
+- memset(&stats[items], 0, pad);
+-}
+-
+-static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+- int bytes)
+-{
+- switch(attrtype) {
+- case IFLA_INET6_STATS:
+- __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+- break;
+- case IFLA_INET6_ICMP6STATS:
+- __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+- break;
+- }
+-}
+-
+-static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
+- u32 pid, u32 seq, int event, unsigned int flags)
+-{
+- struct net_device *dev = idev->dev;
+- struct nlattr *nla;
+- struct ifinfomsg *hdr;
+- struct nlmsghdr *nlh;
+- void *protoinfo;
+- struct ifla_cacheinfo ci;
+-
+- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- hdr = nlmsg_data(nlh);
+- hdr->ifi_family = AF_INET6;
+- hdr->__ifi_pad = 0;
+- hdr->ifi_type = dev->type;
+- hdr->ifi_index = dev->ifindex;
+- hdr->ifi_flags = dev_get_flags(dev);
+- hdr->ifi_change = 0;
+-
+- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+-
+- if (dev->addr_len)
+- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+-
+- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+- if (dev->ifindex != dev->iflink)
+- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+-
+- protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
+- if (protoinfo == NULL)
+- goto nla_put_failure;
+-
+- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
+-
+- ci.max_reasm_len = IPV6_MAXPLEN;
+- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
+- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+- ci.reachable_time = idev->nd_parms->reachable_time;
+- ci.retrans_time = idev->nd_parms->retrans_time;
+- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+-
+- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+- if (nla == NULL)
+- goto nla_put_failure;
+- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+-
+- /* XXX - MC not implemented */
+-
+- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+- if (nla == NULL)
+- goto nla_put_failure;
+- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+-
+- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+- if (nla == NULL)
+- goto nla_put_failure;
+- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+-
+- nla_nest_end(skb, protoinfo);
+- return nlmsg_end(skb, nlh);
+-
+-nla_put_failure:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+-{
+- int idx, err;
+- int s_idx = cb->args[0];
+- struct net_device *dev;
+- struct inet6_dev *idev;
+-
+- read_lock(&dev_base_lock);
+- idx = 0;
+- for_each_netdev(dev) {
+- if (idx < s_idx)
+- goto cont;
+- if ((idev = in6_dev_get(dev)) == NULL)
+- goto cont;
+- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
+- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
+- in6_dev_put(idev);
+- if (err <= 0)
+- break;
+-cont:
+- idx++;
+- }
+- read_unlock(&dev_base_lock);
+- cb->args[0] = idx;
+-
+- return skb->len;
+-}
+-
+-void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
+-{
+- struct sk_buff *skb;
+- int err = -ENOBUFS;
+-
+- skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
+- if (skb == NULL)
+- goto errout;
+-
+- err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
+-}
+-
+-static inline size_t inet6_prefix_nlmsg_size(void)
+-{
+- return NLMSG_ALIGN(sizeof(struct prefixmsg))
+- + nla_total_size(sizeof(struct in6_addr))
+- + nla_total_size(sizeof(struct prefix_cacheinfo));
+-}
+-
+-static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
+- struct prefix_info *pinfo, u32 pid, u32 seq,
+- int event, unsigned int flags)
+-{
+- struct prefixmsg *pmsg;
+- struct nlmsghdr *nlh;
+- struct prefix_cacheinfo ci;
+-
+- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+- if (nlh == NULL)
+- return -EMSGSIZE;
+-
+- pmsg = nlmsg_data(nlh);
+- pmsg->prefix_family = AF_INET6;
+- pmsg->prefix_pad1 = 0;
+- pmsg->prefix_pad2 = 0;
+- pmsg->prefix_ifindex = idev->dev->ifindex;
+- pmsg->prefix_len = pinfo->prefix_len;
+- pmsg->prefix_type = pinfo->type;
+- pmsg->prefix_pad3 = 0;
+- pmsg->prefix_flags = 0;
+- if (pinfo->onlink)
+- pmsg->prefix_flags |= IF_PREFIX_ONLINK;
+- if (pinfo->autoconf)
+- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
+-
+- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
+-
+- ci.preferred_time = ntohl(pinfo->prefered);
+- ci.valid_time = ntohl(pinfo->valid);
+- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
+-
+- return nlmsg_end(skb, nlh);
+-
+-nla_put_failure:
+- nlmsg_cancel(skb, nlh);
+- return -EMSGSIZE;
+-}
+-
+-static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+- struct prefix_info *pinfo)
+-{
+- struct sk_buff *skb;
+- int err = -ENOBUFS;
+-
+- skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
+- if (skb == NULL)
+- goto errout;
+-
+- err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+- if (err < 0) {
+- /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
+- WARN_ON(err == -EMSGSIZE);
+- kfree_skb(skb);
+- goto errout;
+- }
+- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+-errout:
+- if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
+-}
+-
+-static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+-{
+- inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
+-
+- switch (event) {
+- case RTM_NEWADDR:
+- /*
+- * If the address was optimistic
+- * we inserted the route at the start of
+- * our DAD process, so we don't need
+- * to do it again
+- */
+- if (!(ifp->rt->rt6i_node))
+- ip6_ins_rt(ifp->rt);
+- if (ifp->idev->cnf.forwarding)
+- addrconf_join_anycast(ifp);
+- break;
+- case RTM_DELADDR:
+- if (ifp->idev->cnf.forwarding)
+- addrconf_leave_anycast(ifp);
+- addrconf_leave_solict(ifp->idev, &ifp->addr);
+- dst_hold(&ifp->rt->u.dst);
+- if (ip6_del_rt(ifp->rt))
+- dst_free(&ifp->rt->u.dst);
+- break;
+- }
+-}
+-
+-static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+-{
+- rcu_read_lock_bh();
+- if (likely(ifp->idev->dead == 0))
+- __ipv6_ifa_notify(event, ifp);
+- rcu_read_unlock_bh();
+-}
+-
+-#ifdef CONFIG_SYSCTL
+-
+-static
+-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+- void __user *buffer, size_t *lenp, loff_t *ppos)
+-{
+- int *valp = ctl->data;
+- int val = *valp;
+- int ret;
+-
+- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+-
+- if (write && valp != &ipv6_devconf_dflt.forwarding) {
+- if (valp != &ipv6_devconf.forwarding) {
+- if ((!*valp) ^ (!val)) {
+- struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
+- if (idev == NULL)
+- return ret;
+- dev_forward_change(idev);
+- }
+- } else {
+- ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+- addrconf_forward_change();
+- }
+- if (*valp)
+- rt6_purge_dflt_routers();
+- }
+-
+- return ret;
+-}
+-
+-static int addrconf_sysctl_forward_strategy(ctl_table *table,
+- int __user *name, int nlen,
+- void __user *oldval,
+- size_t __user *oldlenp,
+- void __user *newval, size_t newlen)
+-{
+- int *valp = table->data;
+- int new;
+-
+- if (!newval || !newlen)
+- return 0;
+- if (newlen != sizeof(int))
+- return -EINVAL;
+- if (get_user(new, (int __user *)newval))
+- return -EFAULT;
+- if (new == *valp)
+- return 0;
+- if (oldval && oldlenp) {
+- size_t len;
+- if (get_user(len, oldlenp))
+- return -EFAULT;
+- if (len) {
+- if (len > table->maxlen)
+- len = table->maxlen;
+- if (copy_to_user(oldval, valp, len))
+- return -EFAULT;
+- if (put_user(len, oldlenp))
+- return -EFAULT;
+- }
+- }
+-
+- if (valp != &ipv6_devconf_dflt.forwarding) {
+- if (valp != &ipv6_devconf.forwarding) {
+- struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
+- int changed;
+- if (unlikely(idev == NULL))
+- return -ENODEV;
+- changed = (!*valp) ^ (!new);
+- *valp = new;
+- if (changed)
+- dev_forward_change(idev);
+- } else {
+- *valp = new;
+- addrconf_forward_change();
+- }
+-
+- if (*valp)
+- rt6_purge_dflt_routers();
+- } else
+- *valp = new;
+-
+- return 1;
+-}
+-
+-static struct addrconf_sysctl_table
+-{
+- struct ctl_table_header *sysctl_header;
+- ctl_table addrconf_vars[__NET_IPV6_MAX];
+- ctl_table addrconf_dev[2];
+- ctl_table addrconf_conf_dir[2];
+- ctl_table addrconf_proto_dir[2];
+- ctl_table addrconf_root_dir[2];
+-} addrconf_sysctl __read_mostly = {
+- .sysctl_header = NULL,
+- .addrconf_vars = {
+- {
+- .ctl_name = NET_IPV6_FORWARDING,
+- .procname = "forwarding",
+- .data = &ipv6_devconf.forwarding,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &addrconf_sysctl_forward,
+- .strategy = &addrconf_sysctl_forward_strategy,
+- },
+- {
+- .ctl_name = NET_IPV6_HOP_LIMIT,
+- .procname = "hop_limit",
+- .data = &ipv6_devconf.hop_limit,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_MTU,
+- .procname = "mtu",
+- .data = &ipv6_devconf.mtu6,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_ACCEPT_RA,
+- .procname = "accept_ra",
+- .data = &ipv6_devconf.accept_ra,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_ACCEPT_REDIRECTS,
+- .procname = "accept_redirects",
+- .data = &ipv6_devconf.accept_redirects,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_AUTOCONF,
+- .procname = "autoconf",
+- .data = &ipv6_devconf.autoconf,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_DAD_TRANSMITS,
+- .procname = "dad_transmits",
+- .data = &ipv6_devconf.dad_transmits,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_RTR_SOLICITS,
+- .procname = "router_solicitations",
+- .data = &ipv6_devconf.rtr_solicits,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL,
+- .procname = "router_solicitation_interval",
+- .data = &ipv6_devconf.rtr_solicit_interval,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies,
+- },
+- {
+- .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY,
+- .procname = "router_solicitation_delay",
+- .data = &ipv6_devconf.rtr_solicit_delay,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies,
+- },
+- {
+- .ctl_name = NET_IPV6_FORCE_MLD_VERSION,
+- .procname = "force_mld_version",
+- .data = &ipv6_devconf.force_mld_version,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#ifdef CONFIG_IPV6_PRIVACY
+- {
+- .ctl_name = NET_IPV6_USE_TEMPADDR,
+- .procname = "use_tempaddr",
+- .data = &ipv6_devconf.use_tempaddr,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_TEMP_VALID_LFT,
+- .procname = "temp_valid_lft",
+- .data = &ipv6_devconf.temp_valid_lft,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_TEMP_PREFERED_LFT,
+- .procname = "temp_prefered_lft",
+- .data = &ipv6_devconf.temp_prefered_lft,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_REGEN_MAX_RETRY,
+- .procname = "regen_max_retry",
+- .data = &ipv6_devconf.regen_max_retry,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR,
+- .procname = "max_desync_factor",
+- .data = &ipv6_devconf.max_desync_factor,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#endif
+- {
+- .ctl_name = NET_IPV6_MAX_ADDRESSES,
+- .procname = "max_addresses",
+- .data = &ipv6_devconf.max_addresses,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR,
+- .procname = "accept_ra_defrtr",
+- .data = &ipv6_devconf.accept_ra_defrtr,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_ACCEPT_RA_PINFO,
+- .procname = "accept_ra_pinfo",
+- .data = &ipv6_devconf.accept_ra_pinfo,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#ifdef CONFIG_IPV6_ROUTER_PREF
+- {
+- .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF,
+- .procname = "accept_ra_rtr_pref",
+- .data = &ipv6_devconf.accept_ra_rtr_pref,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL,
+- .procname = "router_probe_interval",
+- .data = &ipv6_devconf.rtr_probe_interval,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec_jiffies,
+- .strategy = &sysctl_jiffies,
+- },
+-#ifdef CONFIG_IPV6_ROUTE_INFO
+- {
+- .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,
+- .procname = "accept_ra_rt_info_max_plen",
+- .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#endif
+-#endif
+- {
+- .ctl_name = NET_IPV6_PROXY_NDP,
+- .procname = "proxy_ndp",
+- .data = &ipv6_devconf.proxy_ndp,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+- {
+- .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE,
+- .procname = "accept_source_route",
+- .data = &ipv6_devconf.accept_source_route,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+- },
+-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+- {
+- .ctl_name = CTL_UNNUMBERED,
+- .procname = "optimistic_dad",
+- .data = &ipv6_devconf.optimistic_dad,
+- .maxlen = sizeof(int),
+- .mode = 0644,
+- .proc_handler = &proc_dointvec,
+-
+- },
+-#endif
+- {
+- .ctl_name = 0, /* sentinel */
+- }
+- },
+- .addrconf_dev = {
+- {
+- .ctl_name = NET_PROTO_CONF_ALL,
+- .procname = "all",
+- .mode = 0555,
+- .child = addrconf_sysctl.addrconf_vars,
+- },
+- {
+- .ctl_name = 0, /* sentinel */
+- }
+- },
+- .addrconf_conf_dir = {
+- {
+- .ctl_name = NET_IPV6_CONF,
+- .procname = "conf",
+- .mode = 0555,
+- .child = addrconf_sysctl.addrconf_dev,
+- },
+- {
+- .ctl_name = 0, /* sentinel */
+- }
+- },
+- .addrconf_proto_dir = {
+- {
+- .ctl_name = NET_IPV6,
+- .procname = "ipv6",
+- .mode = 0555,
+- .child = addrconf_sysctl.addrconf_conf_dir,
+- },
+- {
+- .ctl_name = 0, /* sentinel */
+- }
+- },
+- .addrconf_root_dir = {
+- {
+- .ctl_name = CTL_NET,
+- .procname = "net",
+- .mode = 0555,
+- .child = addrconf_sysctl.addrconf_proto_dir,
+- },
+- {
+- .ctl_name = 0, /* sentinel */
+- }
+- },
+-};
+-
+-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+-{
+- int i;
+- struct net_device *dev = idev ? idev->dev : NULL;
+- struct addrconf_sysctl_table *t;
+- char *dev_name = NULL;
+-
+- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
+- if (t == NULL)
+- return;
+- for (i=0; t->addrconf_vars[i].data; i++) {
+- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+- }
+- if (dev) {
+- dev_name = dev->name;
+- t->addrconf_dev[0].ctl_name = dev->ifindex;
+- } else {
+- dev_name = "default";
+- t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+- }
+-
+- /*
+- * Make a copy of dev_name, because '.procname' is regarded as const
+- * by sysctl and we wouldn't want anyone to change it under our feet
+- * (see SIOCSIFNAME).
+- */
+- dev_name = kstrdup(dev_name, GFP_KERNEL);
+- if (!dev_name)
+- goto free;
+-
+- t->addrconf_dev[0].procname = dev_name;
+-
+- t->addrconf_dev[0].child = t->addrconf_vars;
+- t->addrconf_conf_dir[0].child = t->addrconf_dev;
+- t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
+- t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
+-
+- t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
+- if (t->sysctl_header == NULL)
+- goto free_procname;
+- else
+- p->sysctl = t;
+- return;
+-
+- /* error path */
+- free_procname:
+- kfree(dev_name);
+- free:
+- kfree(t);
+-
+- return;
+-}
+-
+-static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+-{
+- if (p->sysctl) {
+- struct addrconf_sysctl_table *t = p->sysctl;
+- p->sysctl = NULL;
+- unregister_sysctl_table(t->sysctl_header);
+- kfree(t->addrconf_dev[0].procname);
+- kfree(t);
+- }
+-}
+-
+-
+-#endif
+-
+-/*
+- * Device notifier
+- */
+-
+-int register_inet6addr_notifier(struct notifier_block *nb)
+-{
+- return atomic_notifier_chain_register(&inet6addr_chain, nb);
+-}
+-
+-EXPORT_SYMBOL(register_inet6addr_notifier);
+-
+-int unregister_inet6addr_notifier(struct notifier_block *nb)
+-{
+- return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
+-}
+-
+-EXPORT_SYMBOL(unregister_inet6addr_notifier);
+-
+-/*
+- * Init / cleanup code
+- */
+-
+-int __init addrconf_init(void)
+-{
+- int err = 0;
+-
+- /* The addrconf netdev notifier requires that loopback_dev
+- * has it's ipv6 private information allocated and setup
+- * before it can bring up and give link-local addresses
+- * to other devices which are up.
+- *
+- * Unfortunately, loopback_dev is not necessarily the first
+- * entry in the global dev_base list of net devices. In fact,
+- * it is likely to be the very last entry on that list.
+- * So this causes the notifier registry below to try and
+- * give link-local addresses to all devices besides loopback_dev
+- * first, then loopback_dev, which cases all the non-loopback_dev
+- * devices to fail to get a link-local address.
+- *
+- * So, as a temporary fix, allocate the ipv6 structure for
+- * loopback_dev first by hand.
+- * Longer term, all of the dependencies ipv6 has upon the loopback
+- * device and it being up should be removed.
+- */
+- rtnl_lock();
+- if (!ipv6_add_dev(&loopback_dev))
+- err = -ENOMEM;
+- rtnl_unlock();
+- if (err)
+- return err;
+-
+- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+-#endif
+-
+- register_netdevice_notifier(&ipv6_dev_notf);
+-
+- addrconf_verify(0);
+-
+- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+- if (err < 0)
+- goto errout;
+-
+- /* Only the first call to __rtnl_register can fail */
+- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
+- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
+- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
+- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
+- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+-
+-#ifdef CONFIG_SYSCTL
+- addrconf_sysctl.sysctl_header =
+- register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
+- addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+-#endif
+-
+- return 0;
+-errout:
+- unregister_netdevice_notifier(&ipv6_dev_notf);
+-
+- return err;
+-}
+-
+-void __exit addrconf_cleanup(void)
+-{
+- struct net_device *dev;
+- struct inet6_dev *idev;
+- struct inet6_ifaddr *ifa;
+- int i;
+-
+- unregister_netdevice_notifier(&ipv6_dev_notf);
+-
+-#ifdef CONFIG_SYSCTL
+- addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+- addrconf_sysctl_unregister(&ipv6_devconf);
+-#endif
+-
+- rtnl_lock();
+-
+- /*
+- * clean dev list.
+- */
+-
+- for_each_netdev(dev) {
+- if ((idev = __in6_dev_get(dev)) == NULL)
+- continue;
+- addrconf_ifdown(dev, 1);
+- }
+- addrconf_ifdown(&loopback_dev, 2);
+-
+- /*
+- * Check hash table.
+- */
+-
+- write_lock_bh(&addrconf_hash_lock);
+- for (i=0; i < IN6_ADDR_HSIZE; i++) {
+- for (ifa=inet6_addr_lst[i]; ifa; ) {
+- struct inet6_ifaddr *bifa;
+-
+- bifa = ifa;
+- ifa = ifa->lst_next;
+- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
+- /* Do not free it; something is wrong.
+- Now we can investigate it with debugger.
+- */
+- }
+- }
+- write_unlock_bh(&addrconf_hash_lock);
+-
+- del_timer(&addr_chk_timer);
+-
+- rtnl_unlock();
+-
+-#ifdef CONFIG_PROC_FS
+- proc_net_remove("if_inet6");
+-#endif
+-}
+diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-591/net/ipv6/af_inet6.c
+--- linux-2.6.22-570/net/ipv6/af_inet6.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/af_inet6.c 2007-12-21 15:36:15.000000000 -0500
+@@ -59,9 +59,6 @@
+ #ifdef CONFIG_IPV6_TUNNEL
+ #include <net/ip6_tunnel.h>
+ #endif
+-#ifdef CONFIG_IPV6_MIP6
+-#include <net/mip6.h>
+-#endif
+
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -85,7 +82,7 @@
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+ }
+
+-static int inet6_create(struct socket *sock, int protocol)
++static int inet6_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct inet_sock *inet;
+ struct ipv6_pinfo *np;
+@@ -98,6 +95,9 @@
+ int try_loading_module = 0;
+ int err;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (sock->type != SOCK_RAW &&
+ sock->type != SOCK_DGRAM &&
+ !inet_ehash_secret)
+@@ -166,7 +166,7 @@
+ BUG_TRAP(answer_prot->slab != NULL);
+
+ err = -ENOBUFS;
+- sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
++ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1);
+ if (sk == NULL)
+ goto out;
+
+@@ -209,7 +209,7 @@
+ inet->mc_index = 0;
+ inet->mc_list = NULL;
+
+- if (ipv4_config.no_pmtu_disc)
++ if (init_net.sysctl_ipv4_no_pmtu_disc)
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -290,7 +290,7 @@
+ /* Check if the address belongs to the host. */
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ v4addr = addr->sin6_addr.s6_addr32[3];
+- if (inet_addr_type(v4addr) != RTN_LOCAL) {
++ if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
+ err = -EADDRNOTAVAIL;
+ goto out;
+ }
+@@ -316,7 +316,7 @@
+ err = -EINVAL;
+ goto out;
+ }
+- dev = dev_get_by_index(sk->sk_bound_dev_if);
++ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out;
+@@ -675,6 +675,7 @@
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = sk->sk_protocol;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+@@ -876,9 +877,6 @@
+ ipv6_frag_init();
+ ipv6_nodata_init();
+ ipv6_destopt_init();
+-#ifdef CONFIG_IPV6_MIP6
+- mip6_init();
+-#endif
+
+ /* Init v6 transport protocols. */
+ udpv6_init();
+@@ -944,9 +942,7 @@
+
+ /* Cleanup code parts. */
+ ipv6_packet_cleanup();
+-#ifdef CONFIG_IPV6_MIP6
+- mip6_fini();
+-#endif
++
+ addrconf_cleanup();
+ ip6_flowlabel_cleanup();
+ ip6_route_cleanup();
+diff -Nurb linux-2.6.22-570/net/ipv6/ah6.c linux-2.6.22-591/net/ipv6/ah6.c
+--- linux-2.6.22-570/net/ipv6/ah6.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/ah6.c 2007-12-21 15:36:12.000000000 -0500
+@@ -74,7 +74,7 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ /**
+ * ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ * @iph: IPv6 header
+@@ -132,6 +132,8 @@
+ bad:
+ return;
+ }
++#else
++static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
+ #endif
+
+ /**
+@@ -189,10 +191,8 @@
+ while (exthdr.raw < end) {
+ switch (nexthdr) {
+ case NEXTHDR_DEST:
+-#ifdef CONFIG_IPV6_MIP6
+ if (dir == XFRM_POLICY_OUT)
+ ipv6_rearrange_destopt(iph, exthdr.opth);
+-#endif
+ case NEXTHDR_HOP:
+ if (!zero_out_mutable_opts(exthdr.opth)) {
+ LIMIT_NETDEBUG(
+@@ -228,7 +228,7 @@
+ u8 nexthdr;
+ char tmp_base[8];
+ struct {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ struct in6_addr saddr;
+ #endif
+ struct in6_addr daddr;
+@@ -255,7 +255,7 @@
+ err = -ENOMEM;
+ goto error;
+ }
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(tmp_ext, &top_iph->saddr, extlen);
+ #else
+ memcpy(tmp_ext, &top_iph->daddr, extlen);
+@@ -294,7 +294,7 @@
+
+ memcpy(top_iph, tmp_base, sizeof(tmp_base));
+ if (tmp_ext) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ memcpy(&top_iph->saddr, tmp_ext, extlen);
+ #else
+ memcpy(&top_iph->daddr, tmp_ext, extlen);
+@@ -554,3 +554,4 @@
+ module_exit(ah6_fini);
+
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
+diff -Nurb linux-2.6.22-570/net/ipv6/anycast.c linux-2.6.22-591/net/ipv6/anycast.c
+--- linux-2.6.22-570/net/ipv6/anycast.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/anycast.c 2007-12-21 15:36:15.000000000 -0500
+@@ -32,6 +32,7 @@
+
+ #include <net/sock.h>
+ #include <net/snmp.h>
++#include <net/net_namespace.h>
+
+ #include <net/ipv6.h>
+ #include <net/protocol.h>
+@@ -112,10 +113,10 @@
+ } else {
+ /* router, no matching interface: just pick one */
+
+- dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
++ dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+ }
+ } else
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(&init_net, ifindex);
+
+ if (dev == NULL) {
+ err = -ENODEV;
+@@ -196,7 +197,7 @@
+
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+- dev = dev_get_by_index(pac->acl_ifindex);
++ dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ if (dev) {
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ dev_put(dev);
+@@ -224,7 +225,7 @@
+ if (pac->acl_ifindex != prev_index) {
+ if (dev)
+ dev_put(dev);
+- dev = dev_get_by_index(pac->acl_ifindex);
++ dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ prev_index = pac->acl_ifindex;
+ }
+ if (dev)
+@@ -429,7 +430,7 @@
+ if (dev)
+ return ipv6_chk_acast_dev(dev, addr);
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev)
++ for_each_netdev(&init_net, dev)
+ if (ipv6_chk_acast_dev(dev, addr)) {
+ found = 1;
+ break;
+@@ -453,7 +454,7 @@
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ state->idev = NULL;
+- for_each_netdev(state->dev) {
++ for_each_netdev(&init_net, state->dev) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (!idev)
+@@ -579,7 +580,7 @@
+
+ int __init ac6_proc_init(void)
+ {
+- if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
++ if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+@@ -587,7 +588,7 @@
+
+ void ac6_proc_exit(void)
+ {
+- proc_net_remove("anycast6");
++ proc_net_remove(&init_net, "anycast6");
+ }
+ #endif
+
+diff -Nurb linux-2.6.22-570/net/ipv6/datagram.c linux-2.6.22-591/net/ipv6/datagram.c
+--- linux-2.6.22-570/net/ipv6/datagram.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/datagram.c 2007-12-21 15:36:15.000000000 -0500
+@@ -60,6 +60,7 @@
+ return -EAFNOSUPPORT;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+@@ -544,7 +545,7 @@
+ if (!src_info->ipi6_ifindex)
+ return -EINVAL;
+ else {
+- dev = dev_get_by_index(src_info->ipi6_ifindex);
++ dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex);
+ if (!dev)
+ return -ENODEV;
+ }
+@@ -658,7 +659,7 @@
+
+ switch (rthdr->type) {
+ case IPV6_SRCRT_TYPE_0:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ #endif
+ break;
+diff -Nurb linux-2.6.22-570/net/ipv6/esp6.c linux-2.6.22-591/net/ipv6/esp6.c
+--- linux-2.6.22-570/net/ipv6/esp6.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/esp6.c 2007-12-21 15:36:12.000000000 -0500
+@@ -421,3 +421,4 @@
+ module_exit(esp6_fini);
+
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
+diff -Nurb linux-2.6.22-570/net/ipv6/exthdrs.c linux-2.6.22-591/net/ipv6/exthdrs.c
+--- linux-2.6.22-570/net/ipv6/exthdrs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/exthdrs.c 2007-12-21 15:36:12.000000000 -0500
+@@ -42,7 +42,7 @@
+ #include <net/ndisc.h>
+ #include <net/ip6_route.h>
+ #include <net/addrconf.h>
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/xfrm.h>
+ #endif
+
+@@ -90,6 +90,7 @@
+ bad:
+ return -1;
+ }
++EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+ /*
+ * Parsing tlv encoded headers.
+@@ -196,7 +197,7 @@
+ Destination options header.
+ *****************************/
+
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+ {
+ struct sk_buff *skb = *skbp;
+@@ -270,7 +271,7 @@
+ #endif
+
+ static struct tlvtype_proc tlvprocdestopt_lst[] = {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ {
+ .type = IPV6_TLV_HAO,
+ .func = ipv6_dest_hao,
+@@ -283,7 +284,7 @@
+ {
+ struct sk_buff *skb = *skbp;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ __u16 dstbuf;
+ #endif
+ struct dst_entry *dst;
+@@ -298,7 +299,7 @@
+ }
+
+ opt->lastopt = opt->dst1 = skb_network_header_len(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ dstbuf = opt->dst1;
+ #endif
+
+@@ -308,7 +309,7 @@
+ skb = *skbp;
+ skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ opt = IP6CB(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ opt->nhoff = dstbuf;
+ #else
+ opt->nhoff = opt->dst1;
+@@ -427,7 +428,7 @@
+ looped_back:
+ if (hdr->segments_left == 0) {
+ switch (hdr->type) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ /* Silently discard type 2 header unless it was
+ * processed by own
+@@ -463,7 +464,7 @@
+ return -1;
+ }
+ break;
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ /* Silently discard invalid RTH type 2 */
+ if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+@@ -520,7 +521,7 @@
+ addr += i - 1;
+
+ switch (hdr->type) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+ (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+diff -Nurb linux-2.6.22-570/net/ipv6/fib6_rules.c linux-2.6.22-591/net/ipv6/fib6_rules.c
+--- linux-2.6.22-570/net/ipv6/fib6_rules.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/fib6_rules.c 2007-12-21 15:36:15.000000000 -0500
+@@ -244,7 +244,7 @@
+ return -ENOBUFS;
+ }
+
+-static u32 fib6_rule_default_pref(void)
++static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
+ {
+ return 0x3FFF;
+ }
+@@ -277,10 +277,10 @@
+ list_add_tail(&local_rule.common.list, &fib6_rules);
+ list_add_tail(&main_rule.common.list, &fib6_rules);
+
+- fib_rules_register(&fib6_rules_ops);
++ fib_rules_register(&init_net, &fib6_rules_ops);
+ }
+
+ void fib6_rules_cleanup(void)
+ {
+- fib_rules_unregister(&fib6_rules_ops);
++ fib_rules_unregister(&init_net, &fib6_rules_ops);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/icmp.c linux-2.6.22-591/net/ipv6/icmp.c
+--- linux-2.6.22-570/net/ipv6/icmp.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/icmp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -272,7 +272,7 @@
+ return 0;
+ }
+
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ static void mip6_addr_swap(struct sk_buff *skb)
+ {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+@@ -377,6 +377,7 @@
+ mip6_addr_swap(skb);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_ICMPV6;
+ ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+ if (saddr)
+@@ -495,6 +496,7 @@
+ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_ICMPV6;
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ if (saddr)
+diff -Nurb linux-2.6.22-570/net/ipv6/inet6_connection_sock.c linux-2.6.22-591/net/ipv6/inet6_connection_sock.c
+--- linux-2.6.22-570/net/ipv6/inet6_connection_sock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/inet6_connection_sock.c 2007-12-21 15:36:15.000000000 -0500
+@@ -149,6 +149,7 @@
+ struct in6_addr *final_p = NULL, final;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = sk->sk_protocol;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+diff -Nurb linux-2.6.22-570/net/ipv6/inet6_hashtables.c linux-2.6.22-591/net/ipv6/inet6_hashtables.c
+--- linux-2.6.22-570/net/ipv6/inet6_hashtables.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/inet6_hashtables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -61,7 +61,7 @@
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *sk;
+ const struct hlist_node *node;
+@@ -105,7 +105,7 @@
+
+ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+ const struct in6_addr *daddr,
+- const unsigned short hnum, const int dif)
++ const unsigned short hnum, const int dif, struct net *net)
+ {
+ struct sock *sk;
+ const struct hlist_node *node;
+@@ -113,7 +113,7 @@
+ int score, hiscore = 0;
+
+ read_lock(&hashinfo->lhash_lock);
+- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
++ sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
+ if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+
+@@ -152,12 +152,12 @@
+ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+ const struct in6_addr *saddr, const __be16 sport,
+ const struct in6_addr *daddr, const __be16 dport,
+- const int dif)
++ const int dif, struct net *net)
+ {
+ struct sock *sk;
+
+ local_bh_disable();
+- sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
++ sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net);
+ local_bh_enable();
+
+ return sk;
+@@ -251,6 +251,7 @@
+ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+ struct sock *sk)
+ {
++ struct net *net = sk->sk_net;
+ struct inet_hashinfo *hinfo = death_row->hashinfo;
+ const unsigned short snum = inet_sk(sk)->num;
+ struct inet_bind_hashbucket *head;
+@@ -258,8 +259,8 @@
+ int ret;
+
+ if (snum == 0) {
+- const int low = sysctl_local_port_range[0];
+- const int high = sysctl_local_port_range[1];
++ const int low = sk->sk_net->sysctl_local_port_range[0];
++ const int high = sk->sk_net->sysctl_local_port_range[1];
+ const int range = high - low;
+ int i, port;
+ static u32 hint;
+@@ -270,7 +271,7 @@
+ local_bh_disable();
+ for (i = 1; i <= range; i++) {
+ port = low + (i + offset) % range;
+- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
+ spin_lock(&head->lock);
+
+ /* Does not bother with rcv_saddr checks,
+@@ -278,7 +279,7 @@
+ * unique enough.
+ */
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+- if (tb->port == port) {
++ if ((tb->port == port) && (tb->net == net)) {
+ BUG_TRAP(!hlist_empty(&tb->owners));
+ if (tb->fastreuse >= 0)
+ goto next_port;
+@@ -291,7 +292,7 @@
+ }
+
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+- head, port);
++ head, net, port);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ break;
+@@ -326,7 +327,7 @@
+ goto out;
+ }
+
+- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
+ tb = inet_csk(sk)->icsk_bind_hash;
+ spin_lock_bh(&head->lock);
+
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_fib.c linux-2.6.22-591/net/ipv6/ip6_fib.c
+--- linux-2.6.22-570/net/ipv6/ip6_fib.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/ip6_fib.c 2007-12-21 15:36:15.000000000 -0500
+@@ -361,6 +361,7 @@
+
+ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ unsigned int h, s_h;
+ unsigned int e = 0, s_e;
+ struct rt6_rtnl_dump_arg arg;
+@@ -369,6 +370,9 @@
+ struct hlist_node *node;
+ int res = 0;
+
++ if (net != &init_net)
++ return 0;
++
+ s_h = cb->args[0];
+ s_e = cb->args[1];
+
+@@ -1311,6 +1315,11 @@
+
+ static int fib6_clean_node(struct fib6_walker_t *w)
+ {
++ struct nl_info info = {
++ .nlh = NULL,
++ .pid = 0,
++ .net = &init_net,
++ };
+ int res;
+ struct rt6_info *rt;
+ struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+@@ -1319,7 +1328,7 @@
+ res = c->func(rt, c->arg);
+ if (res < 0) {
+ w->leaf = rt;
+- res = fib6_del(rt, NULL);
++ res = fib6_del(rt, &info);
+ if (res) {
+ #if RT6_DEBUG >= 2
+ printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_flowlabel.c linux-2.6.22-591/net/ipv6/ip6_flowlabel.c
+--- linux-2.6.22-570/net/ipv6/ip6_flowlabel.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/ip6_flowlabel.c 2007-12-21 15:36:15.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/seq_file.h>
+
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+
+ #include <net/ipv6.h>
+ #include <net/ndisc.h>
+@@ -309,6 +310,7 @@
+
+ msg.msg_controllen = olen;
+ msg.msg_control = (void*)(fl->opt+1);
++ flowi.fl_net = &init_net;
+ flowi.oif = 0;
+
+ err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+@@ -690,7 +692,7 @@
+ void ip6_flowlabel_init(void)
+ {
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
++ proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+ #endif
+ }
+
+@@ -698,6 +700,6 @@
+ {
+ del_timer(&ip6_fl_gc_timer);
+ #ifdef CONFIG_PROC_FS
+- proc_net_remove("ip6_flowlabel");
++ proc_net_remove(&init_net, "ip6_flowlabel");
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_input.c linux-2.6.22-591/net/ipv6/ip6_input.c
+--- linux-2.6.22-570/net/ipv6/ip6_input.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/ip6_input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -61,6 +61,11 @@
+ u32 pkt_len;
+ struct inet6_dev *idev;
+
++ if (dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
++
+ if (skb->pkt_type == PACKET_OTHERHOST) {
+ kfree_skb(skb);
+ return 0;
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_output.c linux-2.6.22-591/net/ipv6/ip6_output.c
+--- linux-2.6.22-570/net/ipv6/ip6_output.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/ip6_output.c 2007-12-21 15:36:15.000000000 -0500
+@@ -423,7 +423,7 @@
+
+ /* XXX: idev->cnf.proxy_ndp? */
+ if (ipv6_devconf.proxy_ndp &&
+- pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
++ pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
+ int proxied = ip6_forward_proxy_check(skb);
+ if (proxied > 0)
+ return ip6_input(skb);
+@@ -543,7 +543,7 @@
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ break;
+ #endif
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_tunnel.c linux-2.6.22-591/net/ipv6/ip6_tunnel.c
+--- linux-2.6.22-570/net/ipv6/ip6_tunnel.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/ip6_tunnel.c 2007-12-21 15:36:15.000000000 -0500
+@@ -235,7 +235,7 @@
+ int i;
+ for (i = 1; i < IP6_TNL_MAX; i++) {
+ sprintf(name, "ip6tnl%d", i);
+- if (__dev_get_by_name(name) == NULL)
++ if (__dev_get_by_name(&init_net, name) == NULL)
+ break;
+ }
+ if (i == IP6_TNL_MAX)
+@@ -651,7 +651,7 @@
+ struct net_device *ldev = NULL;
+
+ if (p->link)
+- ldev = dev_get_by_index(p->link);
++ ldev = dev_get_by_index(&init_net, p->link);
+
+ if ((ipv6_addr_is_multicast(&p->laddr) ||
+ likely(ipv6_chk_addr(&p->laddr, ldev, 0))) &&
+@@ -787,7 +787,7 @@
+ struct net_device *ldev = NULL;
+
+ if (p->link)
+- ldev = dev_get_by_index(p->link);
++ ldev = dev_get_by_index(&init_net, p->link);
+
+ if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0)))
+ printk(KERN_WARNING
+diff -Nurb linux-2.6.22-570/net/ipv6/ipcomp6.c linux-2.6.22-591/net/ipv6/ipcomp6.c
+--- linux-2.6.22-570/net/ipv6/ipcomp6.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/ipcomp6.c 2007-12-21 15:36:12.000000000 -0500
+@@ -501,4 +501,4 @@
+ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+ MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+
+-
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
+diff -Nurb linux-2.6.22-570/net/ipv6/ipv6_sockglue.c linux-2.6.22-591/net/ipv6/ipv6_sockglue.c
+--- linux-2.6.22-570/net/ipv6/ipv6_sockglue.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/ipv6_sockglue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -123,7 +123,7 @@
+ struct ipv6hdr *ipv6h;
+ struct inet6_protocol *ops;
+
+- if (!(features & NETIF_F_HW_CSUM))
++ if (!(features & NETIF_F_V6_CSUM))
+ features &= ~NETIF_F_SG;
+
+ if (unlikely(skb_shinfo(skb)->gso_type &
+@@ -417,7 +417,7 @@
+ struct ipv6_rt_hdr *rthdr = opt->srcrt;
+ switch (rthdr->type) {
+ case IPV6_SRCRT_TYPE_0:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPV6_SRCRT_TYPE_2:
+ #endif
+ break;
+@@ -463,6 +463,7 @@
+ struct flowi fl;
+ int junk;
+
++ fl.fl_net = &init_net;
+ fl.fl6_flowlabel = 0;
+ fl.oif = sk->sk_bound_dev_if;
+
+@@ -547,7 +548,7 @@
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+ goto e_inval;
+
+- if (__dev_get_by_index(val) == NULL) {
++ if (__dev_get_by_index(&init_net, val) == NULL) {
+ retv = -ENODEV;
+ break;
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/mcast.c linux-2.6.22-591/net/ipv6/mcast.c
+--- linux-2.6.22-570/net/ipv6/mcast.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/mcast.c 2007-12-21 15:36:15.000000000 -0500
+@@ -51,6 +51,7 @@
+
+ #include <net/sock.h>
+ #include <net/snmp.h>
++#include <net/net_namespace.h>
+
+ #include <net/ipv6.h>
+ #include <net/protocol.h>
+@@ -214,7 +215,7 @@
+ dst_release(&rt->u.dst);
+ }
+ } else
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(&init_net, ifindex);
+
+ if (dev == NULL) {
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+@@ -265,7 +266,7 @@
+ *lnk = mc_lst->next;
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+- if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
++ if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) {
+ struct inet6_dev *idev = in6_dev_get(dev);
+
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
+@@ -300,7 +301,7 @@
+ dst_release(&rt->u.dst);
+ }
+ } else
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(&init_net, ifindex);
+
+ if (!dev)
+ return NULL;
+@@ -331,7 +332,7 @@
+ np->ipv6_mc_list = mc_lst->next;
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+- dev = dev_get_by_index(mc_lst->ifindex);
++ dev = dev_get_by_index(&init_net, mc_lst->ifindex);
+ if (dev) {
+ struct inet6_dev *idev = in6_dev_get(dev);
+
+@@ -2332,7 +2333,7 @@
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ state->idev = NULL;
+- for_each_netdev(state->dev) {
++ for_each_netdev(&init_net, state->dev) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (!idev)
+@@ -2476,7 +2477,7 @@
+
+ state->idev = NULL;
+ state->im = NULL;
+- for_each_netdev(state->dev) {
++ for_each_netdev(&init_net, state->dev) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (unlikely(idev == NULL))
+@@ -2658,8 +2659,8 @@
+ np->hop_limit = 1;
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+- proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
++ proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops);
++ proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+ #endif
+
+ return 0;
+@@ -2671,7 +2672,7 @@
+ igmp6_socket = NULL; /* for safety */
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_remove("mcfilter6");
+- proc_net_remove("igmp6");
++ proc_net_remove(&init_net, "mcfilter6");
++ proc_net_remove(&init_net, "igmp6");
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/mip6.c linux-2.6.22-591/net/ipv6/mip6.c
+--- linux-2.6.22-570/net/ipv6/mip6.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/mip6.c 2007-12-21 15:36:12.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <net/sock.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_checksum.h>
++#include <net/rawv6.h>
+ #include <net/xfrm.h>
+ #include <net/mip6.h>
+
+@@ -86,7 +87,7 @@
+ return len;
+ }
+
+-int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
++static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+ {
+ struct ip6_mh *mh;
+
+@@ -471,7 +472,7 @@
+ .remote_addr = mip6_xfrm_addr,
+ };
+
+-int __init mip6_init(void)
++static int __init mip6_init(void)
+ {
+ printk(KERN_INFO "Mobile IPv6\n");
+
+@@ -483,18 +484,35 @@
+ printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+ goto mip6_rthdr_xfrm_fail;
+ }
++ if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
++ printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__);
++ goto mip6_rawv6_mh_fail;
++ }
++
++
+ return 0;
+
++ mip6_rawv6_mh_fail:
++ xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+ mip6_rthdr_xfrm_fail:
+ xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
+ return -EAGAIN;
+ }
+
+-void __exit mip6_fini(void)
++static void __exit mip6_fini(void)
+ {
++ if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
++ printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__);
+ if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+ if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+ printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
+ }
++
++module_init(mip6_init);
++module_exit(mip6_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
+diff -Nurb linux-2.6.22-570/net/ipv6/ndisc.c linux-2.6.22-591/net/ipv6/ndisc.c
+--- linux-2.6.22-570/net/ipv6/ndisc.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/ndisc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -418,6 +418,7 @@
+ int oif)
+ {
+ memset(fl, 0, sizeof(*fl));
++ fl->fl_net = &init_net;
+ ipv6_addr_copy(&fl->fl6_src, saddr);
+ ipv6_addr_copy(&fl->fl6_dst, daddr);
+ fl->proto = IPPROTO_ICMPV6;
+@@ -760,7 +761,7 @@
+ if (ipv6_chk_acast_addr(dev, &msg->target) ||
+ (idev->cnf.forwarding &&
+ (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
+- (pneigh = pneigh_lookup(&nd_tbl,
++ (pneigh = pneigh_lookup(&nd_tbl, &init_net,
+ &msg->target, dev, 0)) != NULL)) {
+ if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
+ skb->pkt_type != PACKET_HOST &&
+@@ -901,7 +902,7 @@
+ */
+ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+ ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
+- pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
++ pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
+ /* XXX: idev->cnf.prixy_ndp */
+ goto out;
+ }
+@@ -1525,6 +1526,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case NETDEV_CHANGEADDR:
+ neigh_changeaddr(&nd_tbl, dev);
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+@@ -546,6 +547,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+@@ -565,7 +569,7 @@
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_IP6_FW && n->pid) {
+ write_lock_bh(&queue_lock);
+- if (n->pid == peer_pid)
++ if ((n->net == &init_net) && (n->pid == peer_pid))
+ __ipq_reset();
+ write_unlock_bh(&queue_lock);
+ }
+@@ -657,14 +661,14 @@
+ struct proc_dir_entry *proc;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
+- THIS_MODULE);
++ ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk,
++ NULL, THIS_MODULE);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
++ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ if (proc)
+ proc->owner = THIS_MODULE;
+ else {
+@@ -685,7 +689,7 @@
+ cleanup_sysctl:
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+- proc_net_remove(IPQ_PROC_FS_NAME);
++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+ cleanup_ipqnl:
+ sock_release(ipqnl->sk_socket);
+@@ -705,7 +709,7 @@
+
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+- proc_net_remove(IPQ_PROC_FS_NAME);
++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+
+ sock_release(ipqnl->sk_socket);
+ mutex_lock(&ipqnl_mutex);
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -906,7 +906,7 @@
+ int ret;
+ struct xt_table *t;
+
+- t = xt_find_table_lock(AF_INET6, entries->name);
++ t = xt_find_table_lock(&init_net, AF_INET6, entries->name);
+ if (t && !IS_ERR(t)) {
+ struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n", private->number);
+@@ -972,7 +972,7 @@
+
+ duprintf("ip_tables: Translated table\n");
+
+- t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
++ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name),
+ "ip6table_%s", tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -1073,7 +1073,7 @@
+ goto free;
+ }
+
+- t = xt_find_table_lock(AF_INET6, tmp.name);
++ t = xt_find_table_lock(&init_net, AF_INET6, tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+@@ -1109,6 +1109,9 @@
+ {
+ int ret;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -1134,6 +1137,9 @@
+ {
+ int ret;
+
++ if (sk->sk_net != &init_net)
++ return -ENOPROTOOPT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+@@ -1155,7 +1161,7 @@
+ }
+ name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+
+- t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
++ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name),
+ "ip6table_%s", name);
+ if (t && !IS_ERR(t)) {
+ struct ip6t_getinfo info;
+@@ -1259,7 +1265,7 @@
+ return ret;
+ }
+
+- ret = xt_register_table(table, &bootstrap, newinfo);
++ ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c 2007-12-21 15:36:15.000000000 -0500
+@@ -92,6 +92,7 @@
+ }
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
+ ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
+@@ -172,7 +173,7 @@
+ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+ {
+ if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
+- skb_in->dev = &loopback_dev;
++ skb_in->dev = &init_net.loopback_dev;
+
+ icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+ }
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -65,6 +65,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ }
+
+@@ -75,6 +79,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ #if 0
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c 2007-12-21 15:36:15.000000000 -0500
+@@ -79,6 +79,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ }
+
+@@ -95,6 +99,10 @@
+ u_int8_t hop_limit;
+ u_int32_t flowlabel, mark;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ #if 0
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c
+--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c 2007-12-21 15:36:15.000000000 -0500
+@@ -57,6 +57,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+ }
+
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+--- linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-12-21 15:36:15.000000000 -0500
+@@ -167,6 +167,10 @@
+ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+@@ -203,6 +207,10 @@
+ {
+ struct sk_buff *reasm;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* Previously seen (loopback)? */
+ if ((*pskb)->nfct)
+ return NF_ACCEPT;
+@@ -231,6 +239,10 @@
+ {
+ struct sk_buff *reasm = (*pskb)->nfct_reasm;
+
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* This packet is fragmented and has reassembled packet. */
+ if (reasm) {
+ /* Reassembled packet isn't parsed yet ? */
+@@ -256,6 +268,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+ if (net_ratelimit())
+diff -Nurb linux-2.6.22-570/net/ipv6/netfilter.c linux-2.6.22-591/net/ipv6/netfilter.c
+--- linux-2.6.22-570/net/ipv6/netfilter.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/netfilter.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,6 +14,7 @@
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ .mark = skb->mark,
+ .nl_u =
+diff -Nurb linux-2.6.22-570/net/ipv6/proc.c linux-2.6.22-591/net/ipv6/proc.c
+--- linux-2.6.22-570/net/ipv6/proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <net/tcp.h>
+ #include <net/transp_v6.h>
+ #include <net/ipv6.h>
++#include <net/net_namespace.h>
+
+ static struct proc_dir_entry *proc_net_devsnmp6;
+
+@@ -231,22 +232,22 @@
+ {
+ int rc = 0;
+
+- if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
++ if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+ goto proc_snmp6_fail;
+
+- proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
++ proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net);
+ if (!proc_net_devsnmp6)
+ goto proc_dev_snmp6_fail;
+
+- if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
++ if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops))
+ goto proc_sockstat6_fail;
+ out:
+ return rc;
+
+ proc_sockstat6_fail:
+- proc_net_remove("dev_snmp6");
++ proc_net_remove(&init_net, "dev_snmp6");
+ proc_dev_snmp6_fail:
+- proc_net_remove("snmp6");
++ proc_net_remove(&init_net, "snmp6");
+ proc_snmp6_fail:
+ rc = -ENOMEM;
+ goto out;
+@@ -254,8 +255,8 @@
+
+ void ipv6_misc_proc_exit(void)
+ {
+- proc_net_remove("sockstat6");
+- proc_net_remove("dev_snmp6");
+- proc_net_remove("snmp6");
++ proc_net_remove(&init_net, "sockstat6");
++ proc_net_remove(&init_net, "dev_snmp6");
++ proc_net_remove(&init_net, "snmp6");
+ }
+
+diff -Nurb linux-2.6.22-570/net/ipv6/raw.c linux-2.6.22-591/net/ipv6/raw.c
+--- linux-2.6.22-570/net/ipv6/raw.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/raw.c 2007-12-21 15:36:15.000000000 -0500
+@@ -49,7 +49,8 @@
+ #include <net/udp.h>
+ #include <net/inet_common.h>
+ #include <net/tcp_states.h>
+-#ifdef CONFIG_IPV6_MIP6
++#include <net/net_namespace.h>
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/mip6.h>
+ #endif
+
+@@ -137,6 +138,28 @@
+ return 0;
+ }
+
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
++static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
++
++int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
++ struct sk_buff *skb))
++{
++ rcu_assign_pointer(mh_filter, filter);
++ return 0;
++}
++EXPORT_SYMBOL(rawv6_mh_filter_register);
++
++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
++ struct sk_buff *skb))
++{
++ rcu_assign_pointer(mh_filter, NULL);
++ synchronize_rcu();
++ return 0;
++}
++EXPORT_SYMBOL(rawv6_mh_filter_unregister);
++
++#endif
++
+ /*
+ * demultiplex raw sockets.
+ * (should consider queueing the skb in the sock receive_queue
+@@ -178,16 +201,22 @@
+ case IPPROTO_ICMPV6:
+ filtered = icmpv6_filter(sk, skb);
+ break;
+-#ifdef CONFIG_IPV6_MIP6
++
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPPROTO_MH:
++ {
+ /* XXX: To validate MH only once for each packet,
+ * this is placed here. It should be after checking
+ * xfrm policy, however it doesn't. The checking xfrm
+ * policy is placed in rawv6_rcv() because it is
+ * required for each socket.
+ */
+- filtered = mip6_mh_filter(sk, skb);
++ int (*filter)(struct sock *sock, struct sk_buff *skb);
++
++ filter = rcu_dereference(mh_filter);
++ filtered = filter ? filter(sk, skb) : 0;
+ break;
++ }
+ #endif
+ default:
+ filtered = 0;
+@@ -254,7 +283,7 @@
+ if (!sk->sk_bound_dev_if)
+ goto out;
+
+- dev = dev_get_by_index(sk->sk_bound_dev_if);
++ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out;
+@@ -611,9 +640,7 @@
+ struct iovec *iov;
+ u8 __user *type = NULL;
+ u8 __user *code = NULL;
+-#ifdef CONFIG_IPV6_MIP6
+ u8 len = 0;
+-#endif
+ int probed = 0;
+ int i;
+
+@@ -646,7 +673,6 @@
+ probed = 1;
+ }
+ break;
+-#ifdef CONFIG_IPV6_MIP6
+ case IPPROTO_MH:
+ if (iov->iov_base && iov->iov_len < 1)
+ break;
+@@ -660,7 +686,6 @@
+ len += iov->iov_len;
+
+ break;
+-#endif
+ default:
+ probed = 1;
+ break;
+@@ -704,6 +729,7 @@
+ * Get and verify the address.
+ */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+
+ if (sin6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+@@ -1291,13 +1317,13 @@
+
+ int __init raw6_proc_init(void)
+ {
+- if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
++ if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops))
+ return -ENOMEM;
+ return 0;
+ }
+
+ void raw6_proc_exit(void)
+ {
+- proc_net_remove("raw6");
++ proc_net_remove(&init_net, "raw6");
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/ipv6/reassembly.c linux-2.6.22-591/net/ipv6/reassembly.c
+--- linux-2.6.22-570/net/ipv6/reassembly.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/reassembly.c 2007-12-21 15:36:15.000000000 -0500
+@@ -301,7 +301,7 @@
+
+ fq_kill(fq);
+
+- dev = dev_get_by_index(fq->iif);
++ dev = dev_get_by_index(&init_net, fq->iif);
+ if (!dev)
+ goto out;
+
+diff -Nurb linux-2.6.22-570/net/ipv6/route.c linux-2.6.22-591/net/ipv6/route.c
+--- linux-2.6.22-570/net/ipv6/route.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -56,6 +56,7 @@
+ #include <net/xfrm.h>
+ #include <net/netevent.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+
+ #include <asm/uaccess.h>
+
+@@ -137,7 +138,7 @@
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+- .dev = &loopback_dev,
++ .dev = NULL,
+ .obsolete = -1,
+ .error = -ENETUNREACH,
+ .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -163,7 +164,7 @@
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+- .dev = &loopback_dev,
++ .dev = NULL,
+ .obsolete = -1,
+ .error = -EACCES,
+ .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -183,7 +184,7 @@
+ .dst = {
+ .__refcnt = ATOMIC_INIT(1),
+ .__use = 1,
+- .dev = &loopback_dev,
++ .dev = NULL,
+ .obsolete = -1,
+ .error = -EINVAL,
+ .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -223,8 +224,8 @@
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ struct inet6_dev *idev = rt->rt6i_idev;
+
+- if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
+- struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
++ if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) {
++ struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev);
+ if (loopback_idev != NULL) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+@@ -564,6 +565,7 @@
+ int oif, int strict)
+ {
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .oif = oif,
+ .nl_u = {
+ .ip6_u = {
+@@ -611,7 +613,12 @@
+
+ int ip6_ins_rt(struct rt6_info *rt)
+ {
+- return __ip6_ins_rt(rt, NULL);
++ struct nl_info info = {
++ .nlh = NULL,
++ .pid = 0,
++ .net = &init_net,
++ };
++ return __ip6_ins_rt(rt, &info);
+ }
+
+ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
+@@ -742,6 +749,7 @@
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct flowi fl = {
++ .fl_net = &init_net,
+ .iif = skb->dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+@@ -1129,7 +1137,7 @@
+ #endif
+ if (cfg->fc_ifindex) {
+ err = -ENODEV;
+- dev = dev_get_by_index(cfg->fc_ifindex);
++ dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
+ if (!dev)
+ goto out;
+ idev = in6_dev_get(dev);
+@@ -1187,12 +1195,12 @@
+ if ((cfg->fc_flags & RTF_REJECT) ||
+ (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
+ /* hold loopback dev/idev if we haven't done so. */
+- if (dev != &loopback_dev) {
++ if (dev != &init_net.loopback_dev) {
+ if (dev) {
+ dev_put(dev);
+ in6_dev_put(idev);
+ }
+- dev = &loopback_dev;
++ dev = &init_net.loopback_dev;
+ dev_hold(dev);
+ idev = in6_dev_get(dev);
+ if (!idev) {
+@@ -1333,7 +1341,12 @@
+
+ int ip6_del_rt(struct rt6_info *rt)
+ {
+- return __ip6_del_rt(rt, NULL);
++ struct nl_info info = {
++ .nlh = NULL,
++ .pid = 0,
++ .net = &init_net,
++ };
++ return __ip6_del_rt(rt, &info);
+ }
+
+ static int ip6_route_del(struct fib6_config *cfg)
+@@ -1444,6 +1457,7 @@
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip6rd_flowi rdfl = {
+ .fl = {
++ .fl_net = &init_net,
+ .oif = dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+@@ -1896,13 +1910,13 @@
+ if (rt == NULL)
+ return ERR_PTR(-ENOMEM);
+
+- dev_hold(&loopback_dev);
++ dev_hold(&init_net.loopback_dev);
+ in6_dev_hold(idev);
+
+ rt->u.dst.flags = DST_HOST;
+ rt->u.dst.input = ip6_input;
+ rt->u.dst.output = ip6_output;
+- rt->rt6i_dev = &loopback_dev;
++ rt->rt6i_dev = &init_net.loopback_dev;
+ rt->rt6i_idev = idev;
+ rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+ rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+@@ -2033,6 +2047,7 @@
+
+ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.nlh = nlh;
++ cfg->fc_nlinfo.net = skb->sk->sk_net;
+
+ if (tb[RTA_GATEWAY]) {
+ nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+@@ -2078,9 +2093,13 @@
+
+ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib6_config cfg;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+@@ -2090,9 +2109,13 @@
+
+ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct fib6_config cfg;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = rtm_to_fib6_config(skb, nlh, &cfg);
+ if (err < 0)
+ return err;
+@@ -2227,6 +2250,7 @@
+
+ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+ {
++ struct net *net = in_skb->sk->sk_net;
+ struct nlattr *tb[RTA_MAX+1];
+ struct rt6_info *rt;
+ struct sk_buff *skb;
+@@ -2234,12 +2258,16 @@
+ struct flowi fl;
+ int err, iif = 0;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
+
+ err = -EINVAL;
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+
+ if (tb[RTA_SRC]) {
+ if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+@@ -2263,7 +2291,7 @@
+
+ if (iif) {
+ struct net_device *dev;
+- dev = __dev_get_by_index(iif);
++ dev = __dev_get_by_index(&init_net, iif);
+ if (!dev) {
+ err = -ENODEV;
+ goto errout;
+@@ -2293,7 +2321,7 @@
+ goto errout;
+ }
+
+- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ errout:
+ return err;
+ }
+@@ -2301,17 +2329,10 @@
+ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+ {
+ struct sk_buff *skb;
+- u32 pid = 0, seq = 0;
+- struct nlmsghdr *nlh = NULL;
++ u32 pid = info->pid, seq = info->nlh ? info->nlh->nlmsg_seq : 0;
++ struct nlmsghdr *nlh = info->nlh;
+ int err = -ENOBUFS;
+
+- if (info) {
+- pid = info->pid;
+- nlh = info->nlh;
+- if (nlh)
+- seq = nlh->nlmsg_seq;
+- }
+-
+ skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ if (skb == NULL)
+ goto errout;
+@@ -2323,10 +2344,10 @@
+ kfree_skb(skb);
+ goto errout;
+ }
+- err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
++ err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+ errout:
+ if (err < 0)
+- rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
+ }
+
+ /*
+@@ -2558,13 +2579,19 @@
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
+
++ /* Perform the initialization we can't perform at compile time */
++ ip6_null_entry.u.dst.dev = &init_net.loopback_dev;
++#ifdef CONFIG_IPV6_MULTIPLE_TABLES
++ ip6_prohibit_entry.u.dst.dev = &init_net.loopback_dev;
++ ip6_blk_hole_entry.u.dst.dev = &init_net.loopback_dev;
++#endif
+ fib6_init();
+ #ifdef CONFIG_PROC_FS
+- p = proc_net_create("ipv6_route", 0, rt6_proc_info);
++ p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
+ if (p)
+ p->owner = THIS_MODULE;
+
+- proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
++ proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+ #endif
+ #ifdef CONFIG_XFRM
+ xfrm6_init();
+@@ -2584,8 +2611,8 @@
+ fib6_rules_cleanup();
+ #endif
+ #ifdef CONFIG_PROC_FS
+- proc_net_remove("ipv6_route");
+- proc_net_remove("rt6_stats");
++ proc_net_remove(&init_net, "ipv6_route");
++ proc_net_remove(&init_net, "rt6_stats");
+ #endif
+ #ifdef CONFIG_XFRM
+ xfrm6_fini();
+diff -Nurb linux-2.6.22-570/net/ipv6/sit.c linux-2.6.22-591/net/ipv6/sit.c
+--- linux-2.6.22-570/net/ipv6/sit.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/sit.c 2007-12-21 15:36:15.000000000 -0500
+@@ -167,7 +167,7 @@
+ int i;
+ for (i=1; i<100; i++) {
+ sprintf(name, "sit%d", i);
+- if (__dev_get_by_name(name) == NULL)
++ if (__dev_get_by_name(&init_net, name) == NULL)
+ break;
+ }
+ if (i==100)
+@@ -283,6 +283,9 @@
+ struct sk_buff *skb2;
+ struct rt6_info *rt6i;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (len < hlen + sizeof(struct ipv6hdr))
+ return;
+ iph6 = (struct ipv6hdr*)(dp + hlen);
+@@ -369,6 +372,10 @@
+ struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto out;
+
+@@ -474,7 +481,8 @@
+ }
+
+ {
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = &init_net,
++ .nl_u = { .ip4_u =
+ { .daddr = dst,
+ .saddr = tiph->saddr,
+ .tos = RT_TOS(tos) } },
+@@ -745,7 +753,8 @@
+ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+ if (iph->daddr) {
+- struct flowi fl = { .nl_u = { .ip4_u =
++ struct flowi fl = { .fl_net = &init_net,
++ .nl_u = { .ip4_u =
+ { .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .tos = RT_TOS(iph->tos) } },
+@@ -760,7 +769,7 @@
+ }
+
+ if (!tdev && tunnel->parms.link)
+- tdev = __dev_get_by_index(tunnel->parms.link);
++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+
+ if (tdev) {
+ dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+diff -Nurb linux-2.6.22-570/net/ipv6/tcp_ipv6.c linux-2.6.22-591/net/ipv6/tcp_ipv6.c
+--- linux-2.6.22-570/net/ipv6/tcp_ipv6.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/tcp_ipv6.c 2007-12-21 15:36:15.000000000 -0500
+@@ -143,6 +143,7 @@
+ return(-EAFNOSUPPORT);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+@@ -330,6 +331,7 @@
+ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __be32 info)
+ {
++ struct net *net = skb->dev->nd_net;
+ struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+ const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+ struct ipv6_pinfo *np;
+@@ -339,7 +341,7 @@
+ __u32 seq;
+
+ sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
+- th->source, skb->dev->ifindex);
++ th->source, skb->dev->ifindex, net);
+
+ if (sk == NULL) {
+ ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+@@ -388,6 +390,7 @@
+ for now.
+ */
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+@@ -481,6 +484,7 @@
+ int err = -1;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+ ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
+@@ -1066,6 +1070,7 @@
+ buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
+
+@@ -1167,6 +1172,7 @@
+ buff->csum = csum_partial((char *)t1, tot_len, 0);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
+
+@@ -1224,7 +1230,8 @@
+
+ nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+ th->source, &ipv6_hdr(skb)->daddr,
+- ntohs(th->dest), inet6_iif(skb));
++ ntohs(th->dest), inet6_iif(skb),
++ sk->sk_net);
+
+ if (nsk) {
+ if (nsk->sk_state != TCP_TIME_WAIT) {
+@@ -1414,6 +1421,7 @@
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+ if (opt && opt->srcrt) {
+@@ -1700,6 +1708,7 @@
+ static int tcp_v6_rcv(struct sk_buff **pskb)
+ {
+ struct sk_buff *skb = *pskb;
++ struct net *net = skb->dev->nd_net;
+ struct tcphdr *th;
+ struct sock *sk;
+ int ret;
+@@ -1736,7 +1745,7 @@
+
+ sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest),
+- inet6_iif(skb));
++ inet6_iif(skb), net);
+
+ if (!sk)
+ goto no_tcp_socket;
+@@ -1816,7 +1825,8 @@
+
+ sk2 = inet6_lookup_listener(&tcp_hashinfo,
+ &ipv6_hdr(skb)->daddr,
+- ntohs(th->dest), inet6_iif(skb));
++ ntohs(th->dest), inet6_iif(skb),
++ net);
+ if (sk2 != NULL) {
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+ inet_twsk_deschedule(tw, &tcp_death_row);
+@@ -2121,12 +2131,12 @@
+
+ int __init tcp6_proc_init(void)
+ {
+- return tcp_proc_register(&tcp6_seq_afinfo);
++ return tcp_proc_register(&init_net, &tcp6_seq_afinfo);
+ }
+
+ void tcp6_proc_exit(void)
+ {
+- tcp_proc_unregister(&tcp6_seq_afinfo);
++ tcp_proc_unregister(&init_net, &tcp6_seq_afinfo);
+ }
+ #endif
+
+diff -Nurb linux-2.6.22-570/net/ipv6/udp.c linux-2.6.22-591/net/ipv6/udp.c
+--- linux-2.6.22-570/net/ipv6/udp.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/udp.c 2007-12-21 15:36:15.000000000 -0500
+@@ -657,6 +657,7 @@
+ ulen += sizeof(struct udphdr);
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+
+ if (sin6) {
+ if (sin6->sin6_port == 0)
+@@ -967,11 +968,11 @@
+
+ int __init udp6_proc_init(void)
+ {
+- return udp_proc_register(&udp6_seq_afinfo);
++ return udp_proc_register(&init_net, &udp6_seq_afinfo);
+ }
+
+ void udp6_proc_exit(void) {
+- udp_proc_unregister(&udp6_seq_afinfo);
++ udp_proc_unregister(&init_net, &udp6_seq_afinfo);
+ }
+ #endif /* CONFIG_PROC_FS */
+
+diff -Nurb linux-2.6.22-570/net/ipv6/udplite.c linux-2.6.22-591/net/ipv6/udplite.c
+--- linux-2.6.22-570/net/ipv6/udplite.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/udplite.c 2007-12-21 15:36:15.000000000 -0500
+@@ -95,11 +95,11 @@
+
+ int __init udplite6_proc_init(void)
+ {
+- return udp_proc_register(&udplite6_seq_afinfo);
++ return udp_proc_register(&init_net, &udplite6_seq_afinfo);
+ }
+
+ void udplite6_proc_exit(void)
+ {
+- udp_proc_unregister(&udplite6_seq_afinfo);
++ udp_proc_unregister(&init_net, &udplite6_seq_afinfo);
+ }
+ #endif
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_policy.c linux-2.6.22-591/net/ipv6/xfrm6_policy.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_policy.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/ipv6/xfrm6_policy.c 2007-12-21 15:36:15.000000000 -0500
+@@ -18,7 +18,7 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/mip6.h>
+ #endif
+
+@@ -40,6 +40,7 @@
+ {
+ struct rt6_info *rt;
+ struct flowi fl_tunnel = {
++ .fl_net = &init_net,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *(struct in6_addr *)&daddr->a6,
+@@ -132,6 +133,7 @@
+ struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
+ struct rt6_info *rt = rt0;
+ struct flowi fl_tunnel = {
++ .fl_net = &init_net,
+ .nl_u = {
+ .ip6_u = {
+ .saddr = fl->fl6_src,
+@@ -278,6 +280,7 @@
+ u8 nexthdr = nh[IP6CB(skb)->nhoff];
+
+ memset(fl, 0, sizeof(struct flowi));
++ fl->fl_net = &init_net;
+ ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
+ ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+
+@@ -318,7 +321,7 @@
+ fl->proto = nexthdr;
+ return;
+
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ case IPPROTO_MH:
+ if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ struct ip6_mh *mh;
+@@ -375,7 +378,7 @@
+
+ xdst = (struct xfrm_dst *)dst;
+ if (xdst->u.rt6.rt6i_idev->dev == dev) {
+- struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
++ struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev);
+ BUG_ON(!loopback_idev);
+
+ do {
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_state.c linux-2.6.22-591/net/ipv6/xfrm6_state.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/xfrm6_state.c 2007-12-21 15:36:12.000000000 -0500
+@@ -65,7 +65,7 @@
+ goto end;
+
+ /* Rule 2: select MIPv6 RO or inbound trigger */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+@@ -130,7 +130,7 @@
+ goto end;
+
+ /* Rule 2: select MIPv6 RO or inbound trigger */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ for (i = 0; i < n; i++) {
+ if (src[i] &&
+ (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c 2007-12-21 15:36:12.000000000 -0500
+@@ -379,3 +379,4 @@
+ module_init(xfrm6_tunnel_init);
+ module_exit(xfrm6_tunnel_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
+diff -Nurb linux-2.6.22-570/net/ipx/af_ipx.c linux-2.6.22-591/net/ipx/af_ipx.c
+--- linux-2.6.22-570/net/ipx/af_ipx.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipx/af_ipx.c 2007-12-21 15:36:15.000000000 -0500
+@@ -347,6 +347,9 @@
+ struct net_device *dev = ptr;
+ struct ipx_interface *i, *tmp;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event != NETDEV_DOWN && event != NETDEV_UP)
+ goto out;
+
+@@ -986,7 +989,7 @@
+ if (intrfc)
+ ipxitf_put(intrfc);
+
+- dev = dev_get_by_name(idef->ipx_device);
++ dev = dev_get_by_name(&init_net, idef->ipx_device);
+ rc = -ENODEV;
+ if (!dev)
+ goto out;
+@@ -1094,7 +1097,7 @@
+ if (!dlink_type)
+ goto out;
+
+- dev = __dev_get_by_name(idef->ipx_device);
++ dev = __dev_get_by_name(&init_net, idef->ipx_device);
+ rc = -ENODEV;
+ if (!dev)
+ goto out;
+@@ -1189,7 +1192,7 @@
+ if (copy_from_user(&ifr, arg, sizeof(ifr)))
+ break;
+ sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
+- dev = __dev_get_by_name(ifr.ifr_name);
++ dev = __dev_get_by_name(&init_net, ifr.ifr_name);
+ rc = -ENODEV;
+ if (!dev)
+ break;
+@@ -1360,11 +1363,14 @@
+ .obj_size = sizeof(struct ipx_sock),
+ };
+
+-static int ipx_create(struct socket *sock, int protocol)
++static int ipx_create(struct net *net, struct socket *sock, int protocol)
+ {
+ int rc = -ESOCKTNOSUPPORT;
+ struct sock *sk;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ /*
+ * SPX support is not anymore in the kernel sources. If you want to
+ * ressurrect it, completing it and making it understand shared skbs,
+@@ -1375,7 +1381,7 @@
+ goto out;
+
+ rc = -ENOMEM;
+- sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1);
++ sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1);
+ if (!sk)
+ goto out;
+ #ifdef IPX_REFCNT_DEBUG
+@@ -1644,6 +1650,9 @@
+ u16 ipx_pktsize;
+ int rc = 0;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ /* Not ours */
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+diff -Nurb linux-2.6.22-570/net/ipx/ipx_proc.c linux-2.6.22-591/net/ipx/ipx_proc.c
+--- linux-2.6.22-570/net/ipx/ipx_proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/ipx/ipx_proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/spinlock.h>
+ #include <linux/seq_file.h>
++#include <net/net_namespace.h>
+ #include <net/tcp_states.h>
+ #include <net/ipx.h>
+
+@@ -353,7 +354,7 @@
+ struct proc_dir_entry *p;
+ int rc = -ENOMEM;
+
+- ipx_proc_dir = proc_mkdir("ipx", proc_net);
++ ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net);
+
+ if (!ipx_proc_dir)
+ goto out;
+@@ -381,7 +382,7 @@
+ out_route:
+ remove_proc_entry("interface", ipx_proc_dir);
+ out_interface:
+- remove_proc_entry("ipx", proc_net);
++ remove_proc_entry("ipx", init_net.proc_net);
+ goto out;
+ }
+
+@@ -390,7 +391,7 @@
+ remove_proc_entry("interface", ipx_proc_dir);
+ remove_proc_entry("route", ipx_proc_dir);
+ remove_proc_entry("socket", ipx_proc_dir);
+- remove_proc_entry("ipx", proc_net);
++ remove_proc_entry("ipx", init_net.proc_net);
+ }
+
+ #else /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/irda/af_irda.c linux-2.6.22-591/net/irda/af_irda.c
+--- linux-2.6.22-570/net/irda/af_irda.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/irda/af_irda.c 2007-12-21 15:36:15.000000000 -0500
+@@ -60,7 +60,7 @@
+
+ #include <net/irda/af_irda.h>
+
+-static int irda_create(struct socket *sock, int protocol);
++static int irda_create(struct net *net, struct socket *sock, int protocol);
+
+ static const struct proto_ops irda_stream_ops;
+ static const struct proto_ops irda_seqpacket_ops;
+@@ -831,7 +831,7 @@
+
+ IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
+- err = irda_create(newsock, sk->sk_protocol);
++ err = irda_create(sk->sk_net, newsock, sk->sk_protocol);
+ if (err)
+ return err;
+
+@@ -1057,13 +1057,16 @@
+ * Create IrDA socket
+ *
+ */
+-static int irda_create(struct socket *sock, int protocol)
++static int irda_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct irda_sock *self;
+
+ IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ /* Check for valid socket type */
+ switch (sock->type) {
+ case SOCK_STREAM: /* For TTP connections with SAR disabled */
+@@ -1075,7 +1078,7 @@
+ }
+
+ /* Allocate networking socket */
+- sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1);
++ sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1);
+ if (sk == NULL)
+ return -ENOMEM;
+
+diff -Nurb linux-2.6.22-570/net/irda/irias_object.c linux-2.6.22-591/net/irda/irias_object.c
+--- linux-2.6.22-570/net/irda/irias_object.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/irda/irias_object.c 2007-12-21 15:36:12.000000000 -0500
+@@ -36,39 +36,6 @@
+ */
+ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}};
+
+-/*
+- * Function strndup (str, max)
+- *
+- * My own kernel version of strndup!
+- *
+- * Faster, check boundary... Jean II
+- */
+-static char *strndup(char *str, size_t max)
+-{
+- char *new_str;
+- int len;
+-
+- /* Check string */
+- if (str == NULL)
+- return NULL;
+- /* Check length, truncate */
+- len = strlen(str);
+- if(len > max)
+- len = max;
+-
+- /* Allocate new string */
+- new_str = kmalloc(len + 1, GFP_ATOMIC);
+- if (new_str == NULL) {
+- IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+- return NULL;
+- }
+-
+- /* Copy and truncate */
+- memcpy(new_str, str, len);
+- new_str[len] = '\0';
+-
+- return new_str;
+-}
+
+ /*
+ * Function ias_new_object (name, id)
+@@ -90,7 +57,7 @@
+ }
+
+ obj->magic = IAS_OBJECT_MAGIC;
+- obj->name = strndup(name, IAS_MAX_CLASSNAME);
++ obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC);
+ if (!obj->name) {
+ IRDA_WARNING("%s(), Unable to allocate name!\n",
+ __FUNCTION__);
+@@ -360,7 +327,7 @@
+ }
+
+ attrib->magic = IAS_ATTRIB_MAGIC;
+- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+
+ /* Insert value */
+ attrib->value = irias_new_integer_value(value);
+@@ -404,7 +371,7 @@
+ }
+
+ attrib->magic = IAS_ATTRIB_MAGIC;
+- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+
+ attrib->value = irias_new_octseq_value( octets, len);
+ if (!attrib->name || !attrib->value) {
+@@ -446,7 +413,7 @@
+ }
+
+ attrib->magic = IAS_ATTRIB_MAGIC;
+- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+
+ attrib->value = irias_new_string_value(value);
+ if (!attrib->name || !attrib->value) {
+@@ -506,7 +473,7 @@
+
+ value->type = IAS_STRING;
+ value->charset = CS_ASCII;
+- value->t.string = strndup(string, IAS_MAX_STRING);
++ value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC);
+ if (!value->t.string) {
+ IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+ kfree(value);
+diff -Nurb linux-2.6.22-570/net/irda/irlap_frame.c linux-2.6.22-591/net/irda/irlap_frame.c
+--- linux-2.6.22-570/net/irda/irlap_frame.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/irda/irlap_frame.c 2007-12-21 15:36:15.000000000 -0500
+@@ -1319,6 +1319,9 @@
+ int command;
+ __u8 control;
+
++ if (dev->nd_net != &init_net)
++ goto out;
++
+ /* FIXME: should we get our own field? */
+ self = (struct irlap_cb *) dev->atalk_ptr;
+
+diff -Nurb linux-2.6.22-570/net/irda/irproc.c linux-2.6.22-591/net/irda/irproc.c
+--- linux-2.6.22-570/net/irda/irproc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/irda/irproc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <net/net_namespace.h>
+
+ #include <net/irda/irda.h>
+ #include <net/irda/irlap.h>
+@@ -66,7 +67,7 @@
+ int i;
+ struct proc_dir_entry *d;
+
+- proc_irda = proc_mkdir("irda", proc_net);
++ proc_irda = proc_mkdir("irda", init_net.proc_net);
+ if (proc_irda == NULL)
+ return;
+ proc_irda->owner = THIS_MODULE;
+@@ -92,7 +93,7 @@
+ for (i=0; i<ARRAY_SIZE(irda_dirs); i++)
+ remove_proc_entry(irda_dirs[i].name, proc_irda);
+
+- remove_proc_entry("irda", proc_net);
++ remove_proc_entry("irda", init_net.proc_net);
+ proc_irda = NULL;
+ }
+ }
+diff -Nurb linux-2.6.22-570/net/key/af_key.c linux-2.6.22-591/net/key/af_key.c
+--- linux-2.6.22-570/net/key/af_key.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/key/af_key.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/init.h>
+ #include <net/xfrm.h>
+ #include <linux/audit.h>
++#include <net/net_namespace.h>
+
+ #include <net/sock.h>
+
+@@ -136,11 +137,14 @@
+ .obj_size = sizeof(struct pfkey_sock),
+ };
+
+-static int pfkey_create(struct socket *sock, int protocol)
++static int pfkey_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ int err;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (sock->type != SOCK_RAW)
+@@ -149,7 +153,7 @@
+ return -EPROTONOSUPPORT;
+
+ err = -ENOMEM;
+- sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1);
++ sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1);
+ if (sk == NULL)
+ goto out;
+
+@@ -3781,7 +3785,7 @@
+ static void __exit ipsec_pfkey_exit(void)
+ {
+ xfrm_unregister_km(&pfkeyv2_mgr);
+- remove_proc_entry("net/pfkey", NULL);
++ remove_proc_entry("pfkey", init_net.proc_net);
+ sock_unregister(PF_KEY);
+ proto_unregister(&key_proto);
+ }
+@@ -3798,7 +3802,7 @@
+ goto out_unregister_key_proto;
+ #ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+- if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)
++ if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL)
+ goto out_sock_unregister;
+ #endif
+ err = xfrm_register_km(&pfkeyv2_mgr);
+diff -Nurb linux-2.6.22-570/net/llc/af_llc.c linux-2.6.22-591/net/llc/af_llc.c
+--- linux-2.6.22-570/net/llc/af_llc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/llc/af_llc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -150,14 +150,17 @@
+ * socket type we have available.
+ * Returns 0 upon success, negative upon failure.
+ */
+-static int llc_ui_create(struct socket *sock, int protocol)
++static int llc_ui_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ int rc = -ESOCKTNOSUPPORT;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
+ rc = -ENOMEM;
+- sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto);
++ sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto);
+ if (sk) {
+ rc = 0;
+ llc_ui_sk_init(sock, sk);
+@@ -249,7 +252,7 @@
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out;
+ rc = -ENODEV;
+- llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd);
++ llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+ if (!llc->dev)
+ goto out;
+ rc = -EUSERS;
+@@ -300,7 +303,7 @@
+ goto out;
+ rc = -ENODEV;
+ rtnl_lock();
+- llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac);
++ llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac);
+ rtnl_unlock();
+ if (!llc->dev)
+ goto out;
+diff -Nurb linux-2.6.22-570/net/llc/llc_conn.c linux-2.6.22-591/net/llc/llc_conn.c
+--- linux-2.6.22-570/net/llc/llc_conn.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/llc/llc_conn.c 2007-12-21 15:36:15.000000000 -0500
+@@ -700,7 +700,7 @@
+ struct llc_addr *saddr,
+ struct llc_addr *daddr)
+ {
+- struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC,
++ struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC,
+ sk->sk_prot);
+ struct llc_sock *newllc, *llc = llc_sk(sk);
+
+@@ -867,9 +867,9 @@
+ * Allocates a LLC sock and initializes it. Returns the new LLC sock
+ * or %NULL if there's no memory available for one
+ */
+-struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot)
++struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
+ {
+- struct sock *sk = sk_alloc(family, priority, prot, 1);
++ struct sock *sk = sk_alloc(net, family, priority, prot, 1);
+
+ if (!sk)
+ goto out;
+diff -Nurb linux-2.6.22-570/net/llc/llc_core.c linux-2.6.22-591/net/llc/llc_core.c
+--- linux-2.6.22-570/net/llc/llc_core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/llc/llc_core.c 2007-12-23 03:37:02.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+
+ LIST_HEAD(llc_sap_list);
+@@ -162,7 +163,8 @@
+ {
+ struct net_device *dev;
+
+- dev = first_net_device();
++ /* XXX sapan
++ dev = first_net_device(&init_net);
+ if (dev != NULL)
+ dev = next_net_device(dev);
+
+@@ -172,6 +174,7 @@
+ memset(llc_station_mac_sa, 0, ETH_ALEN);
+ dev_add_pack(&llc_packet_type);
+ dev_add_pack(&llc_tr_packet_type);
++ */
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/net/llc/llc_core.c.orig linux-2.6.22-591/net/llc/llc_core.c.orig
+--- linux-2.6.22-570/net/llc/llc_core.c.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/net/llc/llc_core.c.orig 2007-12-21 15:36:15.000000000 -0500
+@@ -0,0 +1,197 @@
++/*
++ * llc_core.c - Minimum needed routines for sap handling and module init/exit
++ *
++ * Copyright (c) 1997 by Procom Technology, Inc.
++ * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
++ *
++ * This program can be redistributed or modified under the terms of the
++ * GNU General Public License as published by the Free Software Foundation.
++ * This program is distributed without any warranty or implied warranty
++ * of merchantability or fitness for a particular purpose.
++ *
++ * See the GNU General Public License for more details.
++ */
++
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/netdevice.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <net/net_namespace.h>
++#include <net/llc.h>
++
++LIST_HEAD(llc_sap_list);
++DEFINE_RWLOCK(llc_sap_list_lock);
++
++unsigned char llc_station_mac_sa[ETH_ALEN];
++
++/**
++ * llc_sap_alloc - allocates and initializes sap.
++ *
++ * Allocates and initializes sap.
++ */
++static struct llc_sap *llc_sap_alloc(void)
++{
++ struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC);
++
++ if (sap) {
++ sap->state = LLC_SAP_STATE_ACTIVE;
++ memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN);
++ rwlock_init(&sap->sk_list.lock);
++ atomic_set(&sap->refcnt, 1);
++ }
++ return sap;
++}
++
++/**
++ * llc_add_sap - add sap to station list
++ * @sap: Address of the sap
++ *
++ * Adds a sap to the LLC's station sap list.
++ */
++static void llc_add_sap(struct llc_sap *sap)
++{
++ list_add_tail(&sap->node, &llc_sap_list);
++}
++
++/**
++ * llc_del_sap - del sap from station list
++ * @sap: Address of the sap
++ *
++ * Removes a sap to the LLC's station sap list.
++ */
++static void llc_del_sap(struct llc_sap *sap)
++{
++ write_lock_bh(&llc_sap_list_lock);
++ list_del(&sap->node);
++ write_unlock_bh(&llc_sap_list_lock);
++}
++
++static struct llc_sap *__llc_sap_find(unsigned char sap_value)
++{
++ struct llc_sap* sap;
++
++ list_for_each_entry(sap, &llc_sap_list, node)
++ if (sap->laddr.lsap == sap_value)
++ goto out;
++ sap = NULL;
++out:
++ return sap;
++}
++
++/**
++ * llc_sap_find - searchs a SAP in station
++ * @sap_value: sap to be found
++ *
++ * Searchs for a sap in the sap list of the LLC's station upon the sap ID.
++ * If the sap is found it will be refcounted and the user will have to do
++ * a llc_sap_put after use.
++ * Returns the sap or %NULL if not found.
++ */
++struct llc_sap *llc_sap_find(unsigned char sap_value)
++{
++ struct llc_sap* sap;
++
++ read_lock_bh(&llc_sap_list_lock);
++ sap = __llc_sap_find(sap_value);
++ if (sap)
++ llc_sap_hold(sap);
++ read_unlock_bh(&llc_sap_list_lock);
++ return sap;
++}
++
++/**
++ * llc_sap_open - open interface to the upper layers.
++ * @lsap: SAP number.
++ * @func: rcv func for datalink protos
++ *
++ * Interface function to upper layer. Each one who wants to get a SAP
++ * (for example NetBEUI) should call this function. Returns the opened
++ * SAP for success, NULL for failure.
++ */
++struct llc_sap *llc_sap_open(unsigned char lsap,
++ int (*func)(struct sk_buff *skb,
++ struct net_device *dev,
++ struct packet_type *pt,
++ struct net_device *orig_dev))
++{
++ struct llc_sap *sap = NULL;
++
++ write_lock_bh(&llc_sap_list_lock);
++ if (__llc_sap_find(lsap)) /* SAP already exists */
++ goto out;
++ sap = llc_sap_alloc();
++ if (!sap)
++ goto out;
++ sap->laddr.lsap = lsap;
++ sap->rcv_func = func;
++ llc_add_sap(sap);
++out:
++ write_unlock_bh(&llc_sap_list_lock);
++ return sap;
++}
++
++/**
++ * llc_sap_close - close interface for upper layers.
++ * @sap: SAP to be closed.
++ *
++ * Close interface function to upper layer. Each one who wants to
++ * close an open SAP (for example NetBEUI) should call this function.
++ * Removes this sap from the list of saps in the station and then
++ * frees the memory for this sap.
++ */
++void llc_sap_close(struct llc_sap *sap)
++{
++ WARN_ON(!hlist_empty(&sap->sk_list.list));
++ llc_del_sap(sap);
++ kfree(sap);
++}
++
++static struct packet_type llc_packet_type = {
++ .type = __constant_htons(ETH_P_802_2),
++ .func = llc_rcv,
++};
++
++static struct packet_type llc_tr_packet_type = {
++ .type = __constant_htons(ETH_P_TR_802_2),
++ .func = llc_rcv,
++};
++
++static int __init llc_init(void)
++{
++ struct net_device *dev;
++
++ dev = first_net_device(&init_net);
++ if (dev != NULL)
++ dev = next_net_device(dev);
++
++ if (dev != NULL)
++ memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN);
++ else
++ memset(llc_station_mac_sa, 0, ETH_ALEN);
++ dev_add_pack(&llc_packet_type);
++ dev_add_pack(&llc_tr_packet_type);
++ return 0;
++}
++
++static void __exit llc_exit(void)
++{
++ dev_remove_pack(&llc_packet_type);
++ dev_remove_pack(&llc_tr_packet_type);
++}
++
++module_init(llc_init);
++module_exit(llc_exit);
++
++EXPORT_SYMBOL(llc_station_mac_sa);
++EXPORT_SYMBOL(llc_sap_list);
++EXPORT_SYMBOL(llc_sap_list_lock);
++EXPORT_SYMBOL(llc_sap_find);
++EXPORT_SYMBOL(llc_sap_open);
++EXPORT_SYMBOL(llc_sap_close);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003");
++MODULE_DESCRIPTION("LLC IEEE 802.2 core support");
+diff -Nurb linux-2.6.22-570/net/llc/llc_input.c linux-2.6.22-591/net/llc/llc_input.c
+--- linux-2.6.22-570/net/llc/llc_input.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/llc/llc_input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -12,6 +12,7 @@
+ * See the GNU General Public License for more details.
+ */
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_pdu.h>
+ #include <net/llc_sap.h>
+@@ -145,6 +146,9 @@
+ int (*rcv)(struct sk_buff *, struct net_device *,
+ struct packet_type *, struct net_device *);
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ /*
+ * When the interface is in promisc. mode, drop all the crap that it
+ * receives, do not try to analyse it.
+diff -Nurb linux-2.6.22-570/net/llc/llc_proc.c linux-2.6.22-591/net/llc/llc_proc.c
+--- linux-2.6.22-570/net/llc/llc_proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/llc/llc_proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -18,6 +18,7 @@
+ #include <linux/errno.h>
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_c_ac.h>
+ #include <net/llc_c_ev.h>
+@@ -231,7 +232,7 @@
+ int rc = -ENOMEM;
+ struct proc_dir_entry *p;
+
+- llc_proc_dir = proc_mkdir("llc", proc_net);
++ llc_proc_dir = proc_mkdir("llc", init_net.proc_net);
+ if (!llc_proc_dir)
+ goto out;
+ llc_proc_dir->owner = THIS_MODULE;
+@@ -254,7 +255,7 @@
+ out_core:
+ remove_proc_entry("socket", llc_proc_dir);
+ out_socket:
+- remove_proc_entry("llc", proc_net);
++ remove_proc_entry("llc", init_net.proc_net);
+ goto out;
+ }
+
+@@ -262,5 +263,5 @@
+ {
+ remove_proc_entry("socket", llc_proc_dir);
+ remove_proc_entry("core", llc_proc_dir);
+- remove_proc_entry("llc", proc_net);
++ remove_proc_entry("llc", init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c
+--- linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c 2007-12-21 15:36:12.000000000 -0500
+@@ -838,6 +838,29 @@
+ }
+
+
++static int ieee80211_ioctl_giwrate(struct net_device *dev,
++ struct iw_request_info *info,
++ struct iw_param *rate, char *extra)
++{
++ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
++ struct sta_info *sta;
++ struct ieee80211_sub_if_data *sdata;
++
++ sdata = IEEE80211_DEV_TO_SUB_IF(dev);
++ if (sdata->type == IEEE80211_IF_TYPE_STA)
++ sta = sta_info_get(local, sdata->u.sta.bssid);
++ else
++ return -EOPNOTSUPP;
++ if (!sta)
++ return -ENODEV;
++ if (sta->txrate < local->oper_hw_mode->num_rates)
++ rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000;
++ else
++ rate->value = 0;
++ sta_info_put(sta);
++ return 0;
++}
++
+ static int ieee80211_ioctl_siwrts(struct net_device *dev,
+ struct iw_request_info *info,
+ struct iw_param *rts, char *extra)
+@@ -1779,7 +1802,7 @@
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) NULL, /* -- hole -- */
+ (iw_handler) NULL, /* SIOCSIWRATE */
+- (iw_handler) NULL, /* SIOCGIWRATE */
++ (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */
+ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */
+ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */
+ (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */
+diff -Nurb linux-2.6.22-570/net/netfilter/core.c linux-2.6.22-591/net/netfilter/core.c
+--- linux-2.6.22-570/net/netfilter/core.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/core.c 2007-12-21 15:36:15.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+
+ #include "nf_internals.h"
+
+@@ -203,7 +204,9 @@
+ return 0;
+
+ /* Not exclusive use of packet? Must copy. */
+- if (skb_shared(*pskb) || skb_cloned(*pskb))
++ if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
++ goto copy_skb;
++ if (skb_shared(*pskb))
+ goto copy_skb;
+
+ return pskb_may_pull(*pskb, writable_len);
+@@ -278,8 +281,28 @@
+ #endif /* CONFIG_NF_CONNTRACK */
+
+ #ifdef CONFIG_PROC_FS
+-struct proc_dir_entry *proc_net_netfilter;
+-EXPORT_SYMBOL(proc_net_netfilter);
++static int netfilter_proc_init(struct net * net)
++{
++ int error = -ENOMEM;
++ net->proc_net_netfilter = proc_mkdir("netfilter", net->proc_net);
++
++ if (net->proc_net_netfilter) {
++ net->proc_net_netfilter->data = net;
++ error = 0;
++ }
++ return error;
++}
++
++static void netfilter_proc_exit(struct net *net)
++{
++ remove_proc_entry("netfilter", net->proc_net);
++}
++
++static struct pernet_operations netfilter_proc_ops = {
++ .init = netfilter_proc_init,
++ .exit = netfilter_proc_exit,
++};
++
+ #endif
+
+ void __init netfilter_init(void)
+@@ -291,8 +314,7 @@
+ }
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+- if (!proc_net_netfilter)
++ if (register_pernet_subsys(&netfilter_proc_ops) < 0)
+ panic("cannot create netfilter proc entry");
+ #endif
+
+diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c
+--- linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c 2007-12-21 15:36:15.000000000 -0500
+@@ -724,6 +724,8 @@
+
+ memset(&fl1, 0, sizeof(fl1));
+ memset(&fl2, 0, sizeof(fl2));
++ fl1.fl_net = &init_net;
++ fl2.fl_net = &init_net;
+
+ switch (family) {
+ case AF_INET: {
+diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c
+--- linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/percpu.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ #ifdef CONFIG_SYSCTL
+ #include <linux/sysctl.h>
+ #endif
+@@ -419,14 +420,14 @@
+ return ret;
+
+ #ifdef CONFIG_PROC_FS
+- proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
++ proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops);
+ if (!proc) goto cleanup_init;
+
+- proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
++ proc_exp = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440,
+ &exp_file_ops);
+ if (!proc_exp) goto cleanup_proc;
+
+- proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
++ proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat);
+ if (!proc_stat)
+ goto cleanup_proc_exp;
+
+@@ -447,11 +448,11 @@
+ cleanup_proc_stat:
+ #endif
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("nf_conntrack", proc_net_stat);
++ remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
+ cleanup_proc_exp:
+- proc_net_remove("nf_conntrack_expect");
++ proc_net_remove(&init_net, "nf_conntrack_expect");
+ cleanup_proc:
+- proc_net_remove("nf_conntrack");
++ proc_net_remove(&init_net, "nf_conntrack");
+ cleanup_init:
+ #endif /* CNFIG_PROC_FS */
+ nf_conntrack_cleanup();
+@@ -464,9 +465,9 @@
+ unregister_sysctl_table(nf_ct_sysctl_header);
+ #endif
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("nf_conntrack", proc_net_stat);
+- proc_net_remove("nf_conntrack_expect");
+- proc_net_remove("nf_conntrack");
++ remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
++ proc_net_remove(&init_net, "nf_conntrack_expect");
++ proc_net_remove(&init_net, "nf_conntrack");
+ #endif /* CNFIG_PROC_FS */
+ nf_conntrack_cleanup();
+ }
+diff -Nurb linux-2.6.22-570/net/netfilter/nf_log.c linux-2.6.22-591/net/netfilter/nf_log.c
+--- linux-2.6.22-570/net/netfilter/nf_log.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nf_log.c 2007-12-21 15:36:15.000000000 -0500
+@@ -168,7 +168,8 @@
+ #ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pde;
+
+- pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter);
++ pde = create_proc_entry("nf_log", S_IRUGO,
++ init_net.proc_net_netfilter);
+ if (!pde)
+ return -1;
+
+diff -Nurb linux-2.6.22-570/net/netfilter/nf_queue.c linux-2.6.22-591/net/netfilter/nf_queue.c
+--- linux-2.6.22-570/net/netfilter/nf_queue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nf_queue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -346,7 +346,7 @@
+ #ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pde;
+
+- pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
++ pde = create_proc_entry("nf_queue", S_IRUGO, init_net.proc_net_netfilter);
+ if (!pde)
+ return -1;
+ pde->proc_fops = &nfqueue_file_ops;
+diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink.c linux-2.6.22-591/net/netfilter/nfnetlink.c
+--- linux-2.6.22-570/net/netfilter/nfnetlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nfnetlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -264,7 +264,7 @@
+ {
+ printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+
+- nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
++ nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
+ nfnetlink_rcv, NULL, THIS_MODULE);
+ if (!nfnl) {
+ printk(KERN_ERR "cannot initialize nfnetlink!\n");
+diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_log.c linux-2.6.22-591/net/netfilter/nfnetlink_log.c
+--- linux-2.6.22-570/net/netfilter/nfnetlink_log.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nfnetlink_log.c 2007-12-21 15:36:15.000000000 -0500
+@@ -705,7 +705,8 @@
+
+ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+ UDEBUG("node = %p\n", inst);
+- if (n->pid == inst->peer_pid)
++ if ((n->net == &init_net) &&
++ (n->pid == inst->peer_pid))
+ __instance_destroy(inst);
+ }
+ }
+@@ -1023,7 +1024,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ proc_nful = create_proc_entry("nfnetlink_log", 0440,
+- proc_net_netfilter);
++ init_net.proc_net_netfilter);
+ if (!proc_nful)
+ goto cleanup_subsys;
+ proc_nful->proc_fops = &nful_file_ops;
+@@ -1043,7 +1044,7 @@
+ {
+ nf_log_unregister(&nfulnl_logger);
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("nfnetlink_log", proc_net_netfilter);
++ remove_proc_entry("nfnetlink_log", init_net.proc_net_netfilter);
+ #endif
+ nfnetlink_subsys_unregister(&nfulnl_subsys);
+ netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_queue.c linux-2.6.22-591/net/netfilter/nfnetlink_queue.c
+--- linux-2.6.22-570/net/netfilter/nfnetlink_queue.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/nfnetlink_queue.c 2007-12-21 15:36:15.000000000 -0500
+@@ -734,6 +734,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ nfqnl_dev_drop(dev->ifindex);
+@@ -762,7 +765,8 @@
+ struct hlist_head *head = &instance_table[i];
+
+ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+- if (n->pid == inst->peer_pid)
++ if ((n->net == &init_net) &&
++ (n->pid == inst->peer_pid))
+ __instance_destroy(inst);
+ }
+ }
+@@ -1106,7 +1110,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
+- proc_net_netfilter);
++ init_net.proc_net_netfilter);
+ if (!proc_nfqueue)
+ goto cleanup_subsys;
+ proc_nfqueue->proc_fops = &nfqnl_file_ops;
+@@ -1129,7 +1133,7 @@
+ nf_unregister_queue_handlers(&nfqh);
+ unregister_netdevice_notifier(&nfqnl_dev_notifier);
+ #ifdef CONFIG_PROC_FS
+- remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
++ remove_proc_entry("nfnetlink_queue", init_net.proc_net_netfilter);
+ #endif
+ nfnetlink_subsys_unregister(&nfqnl_subsys);
+ netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+diff -Nurb linux-2.6.22-570/net/netfilter/x_tables.c linux-2.6.22-591/net/netfilter/x_tables.c
+--- linux-2.6.22-570/net/netfilter/x_tables.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/x_tables.c 2007-12-21 15:36:15.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/vmalloc.h>
+ #include <linux/mutex.h>
+ #include <linux/mm.h>
++#include <net/net_namespace.h>
+
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_arp.h>
+@@ -37,11 +38,16 @@
+ struct mutex mutex;
+ struct list_head match;
+ struct list_head target;
+- struct list_head tables;
+ struct mutex compat_mutex;
+ };
+
+-static struct xt_af *xt;
++
++struct xt_af_pernet {
++ struct list_head tables;
++};
++
++static struct xt_af * xt;
++
+
+ #ifdef DEBUG_IP_FIREWALL_USER
+ #define duprintf(format, args...) printk(format , ## args)
+@@ -286,9 +292,9 @@
+ return 1;
+ }
+ if (target == 1)
+- have_rev = target_revfn(af, name, revision, &best);
++ have_rev = target_revfn( af, name, revision, &best);
+ else
+- have_rev = match_revfn(af, name, revision, &best);
++ have_rev = match_revfn( af, name, revision, &best);
+ mutex_unlock(&xt[af].mutex);
+
+ /* Nothing at all? Return 0 to try loading module. */
+@@ -533,14 +539,14 @@
+ EXPORT_SYMBOL(xt_free_table_info);
+
+ /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+-struct xt_table *xt_find_table_lock(int af, const char *name)
++struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name)
+ {
+ struct xt_table *t;
+
+ if (mutex_lock_interruptible(&xt[af].mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+- list_for_each_entry(t, &xt[af].tables, list)
++ list_for_each_entry(t, &net->xtn[af].tables, list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ mutex_unlock(&xt[af].mutex);
+@@ -596,7 +602,7 @@
+ }
+ EXPORT_SYMBOL_GPL(xt_replace_table);
+
+-int xt_register_table(struct xt_table *table,
++int xt_register_table(struct net *net, struct xt_table *table,
+ struct xt_table_info *bootstrap,
+ struct xt_table_info *newinfo)
+ {
+@@ -609,7 +615,7 @@
+ return ret;
+
+ /* Don't autoload: we'd eat our tail... */
+- list_for_each_entry(t, &xt[table->af].tables, list) {
++ list_for_each_entry(t, &net->xtn[table->af].tables, list) {
+ if (strcmp(t->name, table->name) == 0) {
+ ret = -EEXIST;
+ goto unlock;
+@@ -628,7 +634,7 @@
+ /* save number of initial entries */
+ private->initial_entries = private->number;
+
+- list_add(&table->list, &xt[table->af].tables);
++ list_add(&table->list, &net->xtn[table->af].tables);
+
+ ret = 0;
+ unlock:
+@@ -666,7 +672,7 @@
+ return pos ? NULL : head;
+ }
+
+-static struct list_head *type2list(u_int16_t af, u_int16_t type)
++static struct list_head *type2list(struct net *net, u_int16_t af, u_int16_t type)
+ {
+ struct list_head *list;
+
+@@ -678,7 +684,7 @@
+ list = &xt[af].match;
+ break;
+ case TABLE:
+- list = &xt[af].tables;
++ list = &net->xtn[af].tables;
+ break;
+ default:
+ list = NULL;
+@@ -691,6 +697,7 @@
+ static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private;
++ struct net *net = PDE_NET(pde);
+ u_int16_t af = (unsigned long)pde->data & 0xffff;
+ u_int16_t type = (unsigned long)pde->data >> 16;
+ struct list_head *list;
+@@ -698,7 +705,7 @@
+ if (af >= NPROTO)
+ return NULL;
+
+- list = type2list(af, type);
++ list = type2list(net, af, type);
+ if (!list)
+ return NULL;
+
+@@ -711,6 +718,7 @@
+ static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ struct proc_dir_entry *pde = seq->private;
++ struct net *net = PDE_NET(pde);
+ u_int16_t af = (unsigned long)pde->data & 0xffff;
+ u_int16_t type = (unsigned long)pde->data >> 16;
+ struct list_head *list;
+@@ -718,7 +726,7 @@
+ if (af >= NPROTO)
+ return NULL;
+
+- list = type2list(af, type);
++ list = type2list(net, af, type);
+ if (!list)
+ return NULL;
+
+@@ -759,6 +767,7 @@
+ if (!ret) {
+ struct seq_file *seq = file->private_data;
+ struct proc_dir_entry *pde = PDE(inode);
++ get_net(PROC_NET(inode));
+
+ seq->private = pde;
+ }
+@@ -766,12 +775,18 @@
+ return ret;
+ }
+
++static int xt_tgt_release(struct inode *inode, struct file *file)
++{
++ put_net(PROC_NET(inode));
++ return seq_release(inode, file);
++}
++
+ static const struct file_operations xt_file_ops = {
+ .owner = THIS_MODULE,
+ .open = xt_tgt_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = xt_tgt_release,
+ };
+
+ #define FORMAT_TABLES "_tables_names"
+@@ -794,7 +809,7 @@
+ #ifdef CONFIG_PROC_FS
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+- proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ if (!proc)
+ goto out;
+ proc->data = (void *) ((unsigned long) af | (TABLE << 16));
+@@ -802,14 +817,14 @@
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+- proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ if (!proc)
+ goto out_remove_tables;
+ proc->data = (void *) ((unsigned long) af | (MATCH << 16));
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+- proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ if (!proc)
+ goto out_remove_matches;
+ proc->data = (void *) ((unsigned long) af | (TARGET << 16));
+@@ -821,12 +836,12 @@
+ out_remove_matches:
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+- proc_net_remove(buf);
++ proc_net_remove(&init_net, buf);
+
+ out_remove_tables:
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+- proc_net_remove(buf);
++ proc_net_remove(&init_net, buf);
+ out:
+ return -1;
+ #endif
+@@ -840,19 +855,42 @@
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TABLES, sizeof(buf));
+- proc_net_remove(buf);
++ proc_net_remove(&init_net, buf);
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+- proc_net_remove(buf);
++ proc_net_remove(&init_net, buf);
+
+ strlcpy(buf, xt_prefix[af], sizeof(buf));
+ strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+- proc_net_remove(buf);
++ proc_net_remove(&init_net, buf);
+ #endif /*CONFIG_PROC_FS*/
+ }
+ EXPORT_SYMBOL_GPL(xt_proto_fini);
+
++static int xt_net_init(struct net *net)
++{
++ int i;
++
++ net->xtn = kmalloc(sizeof(struct xt_af_pernet) * NPROTO, GFP_KERNEL);
++ if (!net->xtn)
++ return -ENOMEM;
++
++ for (i = 0; i < NPROTO; i++) {
++ INIT_LIST_HEAD(&net->xtn[i].tables);
++ }
++ return 0;
++}
++
++static void xt_net_exit(struct net *net)
++{
++ kfree(net->xtn);
++}
++
++static struct pernet_operations xt_net_ops = {
++ .init = xt_net_init,
++ .exit = xt_net_exit,
++};
+
+ static int __init xt_init(void)
+ {
+@@ -869,13 +907,13 @@
+ #endif
+ INIT_LIST_HEAD(&xt[i].target);
+ INIT_LIST_HEAD(&xt[i].match);
+- INIT_LIST_HEAD(&xt[i].tables);
+ }
+- return 0;
++ return register_pernet_subsys(&xt_net_ops);
+ }
+
+ static void __exit xt_fini(void)
+ {
++ unregister_pernet_subsys(&xt_net_ops);
+ kfree(xt);
+ }
+
+diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c linux-2.6.22-591/net/netfilter/xt_MARK.c
+--- linux-2.6.22-570/net/netfilter/xt_MARK.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/netfilter/xt_MARK.c 2007-12-21 15:36:15.000000000 -0500
+@@ -131,7 +131,7 @@
+ if ((*pskb)->sk)
+ connection_sk = (*pskb)->sk;
+ else {
+- connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
++ connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif,(*pskb)->sk->sk_net);
+ }
+
+ if (connection_sk) {
+diff -Nurb linux-2.6.22-570/net/netfilter/xt_hashlimit.c linux-2.6.22-591/net/netfilter/xt_hashlimit.c
+--- linux-2.6.22-570/net/netfilter/xt_hashlimit.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netfilter/xt_hashlimit.c 2007-12-21 15:36:15.000000000 -0500
+@@ -21,6 +21,7 @@
+ #include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
++#include <net/net_namespace.h>
+
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -736,13 +737,13 @@
+ printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+ goto err2;
+ }
+- hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net);
++ hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
+ if (!hashlimit_procdir4) {
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ "entry\n");
+ goto err3;
+ }
+- hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net);
++ hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
+ if (!hashlimit_procdir6) {
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ "entry\n");
+@@ -750,7 +751,7 @@
+ }
+ return 0;
+ err4:
+- remove_proc_entry("ipt_hashlimit", proc_net);
++ remove_proc_entry("ipt_hashlimit", init_net.proc_net);
+ err3:
+ kmem_cache_destroy(hashlimit_cachep);
+ err2:
+@@ -762,8 +763,8 @@
+
+ static void __exit xt_hashlimit_fini(void)
+ {
+- remove_proc_entry("ipt_hashlimit", proc_net);
+- remove_proc_entry("ip6t_hashlimit", proc_net);
++ remove_proc_entry("ipt_hashlimit", init_net.proc_net);
++ remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
+ kmem_cache_destroy(hashlimit_cachep);
+ xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+ }
+diff -Nurb linux-2.6.22-570/net/netlink/af_netlink.c linux-2.6.22-591/net/netlink/af_netlink.c
+--- linux-2.6.22-570/net/netlink/af_netlink.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/netlink/af_netlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -63,6 +63,7 @@
+ #include <net/sock.h>
+ #include <net/scm.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+
+ #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
+
+@@ -212,7 +213,7 @@
+ wake_up(&nl_table_wait);
+ }
+
+-static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
++static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
+ {
+ struct nl_pid_hash *hash = &nl_table[protocol].hash;
+ struct hlist_head *head;
+@@ -222,7 +223,7 @@
+ read_lock(&nl_table_lock);
+ head = nl_pid_hashfn(hash, pid);
+ sk_for_each(sk, node, head) {
+- if (nlk_sk(sk)->pid == pid) {
++ if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) {
+ sock_hold(sk);
+ goto found;
+ }
+@@ -327,7 +328,7 @@
+ * makes sure updates are visible before bind or setsockopt return. */
+ }
+
+-static int netlink_insert(struct sock *sk, u32 pid)
++static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
+ {
+ struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct hlist_head *head;
+@@ -340,7 +341,7 @@
+ head = nl_pid_hashfn(hash, pid);
+ len = 0;
+ sk_for_each(osk, node, head) {
+- if (nlk_sk(osk)->pid == pid)
++ if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid))
+ break;
+ len++;
+ }
+@@ -383,15 +384,15 @@
+ .obj_size = sizeof(struct netlink_sock),
+ };
+
+-static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+- int protocol)
++static int __netlink_create(struct net *net, struct socket *sock,
++ struct mutex *cb_mutex, int protocol)
+ {
+ struct sock *sk;
+ struct netlink_sock *nlk;
+
+ sock->ops = &netlink_ops;
+
+- sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
++ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+@@ -411,7 +412,7 @@
+ return 0;
+ }
+
+-static int netlink_create(struct socket *sock, int protocol)
++static int netlink_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct module *module = NULL;
+ struct mutex *cb_mutex;
+@@ -440,7 +441,7 @@
+ cb_mutex = nl_table[protocol].cb_mutex;
+ netlink_unlock_table();
+
+- if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
++ if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0)
+ goto out_module;
+
+ nlk = nlk_sk(sock->sk);
+@@ -477,6 +478,7 @@
+
+ if (nlk->pid && !nlk->subscriptions) {
+ struct netlink_notify n = {
++ .net = sk->sk_net,
+ .protocol = sk->sk_protocol,
+ .pid = nlk->pid,
+ };
+@@ -505,6 +507,7 @@
+ static int netlink_autobind(struct socket *sock)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct hlist_head *head;
+ struct sock *osk;
+@@ -518,6 +521,8 @@
+ netlink_table_grab();
+ head = nl_pid_hashfn(hash, pid);
+ sk_for_each(osk, node, head) {
++ if ((osk->sk_net != net))
++ continue;
+ if (nlk_sk(osk)->pid == pid) {
+ /* Bind collision, search negative pid values. */
+ pid = rover--;
+@@ -529,7 +534,7 @@
+ }
+ netlink_table_ungrab();
+
+- err = netlink_insert(sk, pid);
++ err = netlink_insert(sk, net, pid);
+ if (err == -EADDRINUSE)
+ goto retry;
+
+@@ -583,6 +588,7 @@
+ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+ int err;
+@@ -606,7 +612,7 @@
+ return -EINVAL;
+ } else {
+ err = nladdr->nl_pid ?
+- netlink_insert(sk, nladdr->nl_pid) :
++ netlink_insert(sk, net, nladdr->nl_pid) :
+ netlink_autobind(sock);
+ if (err)
+ return err;
+@@ -690,10 +696,12 @@
+ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
+ {
+ int protocol = ssk->sk_protocol;
++ struct net *net;
+ struct sock *sock;
+ struct netlink_sock *nlk;
+
+- sock = netlink_lookup(protocol, pid);
++ net = ssk->sk_net;
++ sock = netlink_lookup(net, protocol, pid);
+ if (!sock)
+ return ERR_PTR(-ECONNREFUSED);
+
+@@ -866,6 +874,7 @@
+
+ struct netlink_broadcast_data {
+ struct sock *exclude_sk;
++ struct net *net;
+ u32 pid;
+ u32 group;
+ int failure;
+@@ -888,6 +897,9 @@
+ !test_bit(p->group - 1, nlk->groups))
+ goto out;
+
++ if ((sk->sk_net != p->net))
++ goto out;
++
+ if (p->failure) {
+ netlink_overrun(sk);
+ goto out;
+@@ -926,6 +938,7 @@
+ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+ u32 group, gfp_t allocation)
+ {
++ struct net *net = ssk->sk_net;
+ struct netlink_broadcast_data info;
+ struct hlist_node *node;
+ struct sock *sk;
+@@ -933,6 +946,7 @@
+ skb = netlink_trim(skb, allocation);
+
+ info.exclude_sk = ssk;
++ info.net = net;
+ info.pid = pid;
+ info.group = group;
+ info.failure = 0;
+@@ -981,6 +995,9 @@
+ if (sk == p->exclude_sk)
+ goto out;
+
++ if (sk->sk_net != p->exclude_sk->sk_net)
++ goto out;
++
+ if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+ !test_bit(p->group - 1, nlk->groups))
+ goto out;
+@@ -1276,7 +1293,7 @@
+ */
+
+ struct sock *
+-netlink_kernel_create(int unit, unsigned int groups,
++netlink_kernel_create(struct net *net, int unit, unsigned int groups,
+ void (*input)(struct sock *sk, int len),
+ struct mutex *cb_mutex, struct module *module)
+ {
+@@ -1293,7 +1310,7 @@
+ if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
+ return NULL;
+
+- if (__netlink_create(sock, cb_mutex, unit) < 0)
++ if (__netlink_create(net, sock, cb_mutex, unit) < 0)
+ goto out_sock_release;
+
+ if (groups < 32)
+@@ -1308,18 +1325,20 @@
+ if (input)
+ nlk_sk(sk)->data_ready = input;
+
+- if (netlink_insert(sk, 0))
++ if (netlink_insert(sk, net, 0))
+ goto out_sock_release;
+
+ nlk = nlk_sk(sk);
+ nlk->flags |= NETLINK_KERNEL_SOCKET;
+
+ netlink_table_grab();
++ if (!nl_table[unit].registered) {
+ nl_table[unit].groups = groups;
+ nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
+ nl_table[unit].module = module;
+ nl_table[unit].registered = 1;
++ }
+ netlink_table_ungrab();
+
+ return sk;
+@@ -1420,7 +1439,7 @@
+ atomic_inc(&skb->users);
+ cb->skb = skb;
+
+- sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
++ sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid);
+ if (sk == NULL) {
+ netlink_destroy_callback(cb);
+ return -ECONNREFUSED;
+@@ -1462,7 +1481,8 @@
+ if (!skb) {
+ struct sock *sk;
+
+- sk = netlink_lookup(in_skb->sk->sk_protocol,
++ sk = netlink_lookup(in_skb->sk->sk_net,
++ in_skb->sk->sk_protocol,
+ NETLINK_CB(in_skb).pid);
+ if (sk) {
+ sk->sk_err = ENOBUFS;
+@@ -1613,6 +1633,7 @@
+
+ #ifdef CONFIG_PROC_FS
+ struct nl_seq_iter {
++ struct net *net;
+ int link;
+ int hash_idx;
+ };
+@@ -1630,6 +1651,8 @@
+
+ for (j = 0; j <= hash->mask; j++) {
+ sk_for_each(s, node, &hash->table[j]) {
++ if (iter->net != s->sk_net)
++ continue;
+ if (off == pos) {
+ iter->link = i;
+ iter->hash_idx = j;
+@@ -1659,11 +1682,14 @@
+ if (v == SEQ_START_TOKEN)
+ return netlink_seq_socket_idx(seq, 0);
+
+- s = sk_next(v);
++ iter = seq->private;
++ s = v;
++ do {
++ s = sk_next(s);
++ } while (s && (iter->net != s->sk_net));
+ if (s)
+ return s;
+
+- iter = seq->private;
+ i = iter->link;
+ j = iter->hash_idx + 1;
+
+@@ -1672,6 +1698,8 @@
+
+ for (; j <= hash->mask; j++) {
+ s = sk_head(&hash->table[j]);
++ while (s && (iter->net != s->sk_net))
++ s = sk_next(s);
+ if (s) {
+ iter->link = i;
+ iter->hash_idx = j;
+@@ -1742,15 +1770,24 @@
+
+ seq = file->private_data;
+ seq->private = iter;
++ iter->net = get_net(PROC_NET(inode));
+ return 0;
+ }
+
++static int netlink_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct nl_seq_iter *iter = seq->private;
++ put_net(iter->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations netlink_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = netlink_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = netlink_seq_release,
+ };
+
+ #endif
+@@ -1792,6 +1829,27 @@
+ .owner = THIS_MODULE, /* for consistency 8) */
+ };
+
++static int netlink_net_init(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++ if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
++ return -ENOMEM;
++#endif
++ return 0;
++}
++
++static void netlink_net_exit(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++ proc_net_remove(net, "netlink");
++#endif
++}
++
++static struct pernet_operations netlink_net_ops = {
++ .init = netlink_net_init,
++ .exit = netlink_net_exit,
++};
++
+ static int __init netlink_proto_init(void)
+ {
+ struct sk_buff *dummy_skb;
+@@ -1837,9 +1895,7 @@
+ }
+
+ sock_register(&netlink_family_ops);
+-#ifdef CONFIG_PROC_FS
+- proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+-#endif
++ register_pernet_subsys(&netlink_net_ops);
+ /* The netlink device handler may be needed early. */
+ rtnetlink_init();
+ out:
+diff -Nurb linux-2.6.22-570/net/netlink/attr.c linux-2.6.22-591/net/netlink/attr.c
+--- linux-2.6.22-570/net/netlink/attr.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netlink/attr.c 2007-12-21 15:36:12.000000000 -0500
+@@ -72,6 +72,17 @@
+ return -ERANGE;
+ break;
+
++ case NLA_NESTED_COMPAT:
++ if (attrlen < pt->len)
++ return -ERANGE;
++ if (attrlen < NLA_ALIGN(pt->len))
++ break;
++ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
++ return -ERANGE;
++ nla = nla_data(nla) + NLA_ALIGN(pt->len);
++ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
++ return -ERANGE;
++ break;
+ default:
+ if (pt->len)
+ minlen = pt->len;
+diff -Nurb linux-2.6.22-570/net/netlink/genetlink.c linux-2.6.22-591/net/netlink/genetlink.c
+--- linux-2.6.22-570/net/netlink/genetlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netlink/genetlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -557,8 +557,9 @@
+ goto errout_register;
+
+ netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
+- genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
+- genl_rcv, NULL, THIS_MODULE);
++ genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC,
++ GENL_MAX_ID, genl_rcv, NULL,
++ THIS_MODULE);
+ if (genl_sock == NULL)
+ panic("GENL: Cannot initialize generic netlink\n");
+
+diff -Nurb linux-2.6.22-570/net/netrom/af_netrom.c linux-2.6.22-591/net/netrom/af_netrom.c
+--- linux-2.6.22-570/net/netrom/af_netrom.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netrom/af_netrom.c 2007-12-21 15:36:15.000000000 -0500
+@@ -41,6 +41,7 @@
+ #include <net/ip.h>
+ #include <net/tcp_states.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ #include <linux/init.h>
+
+ static int nr_ndevs = 4;
+@@ -105,6 +106,9 @@
+ {
+ struct net_device *dev = (struct net_device *)ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+@@ -408,15 +412,18 @@
+ .obj_size = sizeof(struct nr_sock),
+ };
+
+-static int nr_create(struct socket *sock, int protocol)
++static int nr_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct nr_sock *nr;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (sock->type != SOCK_SEQPACKET || protocol != 0)
+ return -ESOCKTNOSUPPORT;
+
+- if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL)
++ if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL)
+ return -ENOMEM;
+
+ nr = nr_sk(sk);
+@@ -458,7 +465,7 @@
+ if (osk->sk_type != SOCK_SEQPACKET)
+ return NULL;
+
+- if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
++ if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+ return NULL;
+
+ nr = nr_sk(sk);
+@@ -1447,9 +1454,9 @@
+
+ nr_loopback_init();
+
+- proc_net_fops_create("nr", S_IRUGO, &nr_info_fops);
+- proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops);
+- proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops);
++ proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops);
++ proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops);
++ proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops);
+ out:
+ return rc;
+ fail:
+@@ -1477,9 +1484,9 @@
+ {
+ int i;
+
+- proc_net_remove("nr");
+- proc_net_remove("nr_neigh");
+- proc_net_remove("nr_nodes");
++ proc_net_remove(&init_net, "nr");
++ proc_net_remove(&init_net, "nr_neigh");
++ proc_net_remove(&init_net, "nr_nodes");
+ nr_loopback_clear();
+
+ nr_rt_free();
+diff -Nurb linux-2.6.22-570/net/netrom/nr_route.c linux-2.6.22-591/net/netrom/nr_route.c
+--- linux-2.6.22-570/net/netrom/nr_route.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/netrom/nr_route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -580,7 +580,7 @@
+ {
+ struct net_device *dev;
+
+- if ((dev = dev_get_by_name(devname)) == NULL)
++ if ((dev = dev_get_by_name(&init_net, devname)) == NULL)
+ return NULL;
+
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+@@ -598,7 +598,7 @@
+ struct net_device *dev, *first = NULL;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
+ if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+ first = dev;
+@@ -618,7 +618,7 @@
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
+ dev_hold(dev);
+ goto out;
+diff -Nurb linux-2.6.22-570/net/packet/af_packet.c linux-2.6.22-591/net/packet/af_packet.c
+--- linux-2.6.22-570/net/packet/af_packet.c 2007-12-21 15:36:03.000000000 -0500
++++ linux-2.6.22-591/net/packet/af_packet.c 2007-12-21 15:36:15.000000000 -0500
+@@ -65,6 +65,7 @@
+ #include <net/protocol.h>
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <linux/errno.h>
+ #include <linux/timer.h>
+ #include <asm/system.h>
+@@ -135,10 +136,6 @@
+ packet classifier depends on it.
+ */
+
+-/* List of all packet sockets. */
+-static HLIST_HEAD(packet_sklist);
+-static DEFINE_RWLOCK(packet_sklist_lock);
+-
+ static atomic_t packet_socks_nr;
+
+
+@@ -273,6 +270,9 @@
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ goto out;
+
++ if (dev->nd_net != sk->sk_net)
++ goto out;
++
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ goto oom;
+
+@@ -344,7 +344,7 @@
+ */
+
+ saddr->spkt_device[13] = 0;
+- dev = dev_get_by_name(saddr->spkt_device);
++ dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
+ err = -ENODEV;
+ if (dev == NULL)
+ goto out_unlock;
+@@ -462,6 +462,9 @@
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (dev->nd_net != sk->sk_net)
++ goto drop;
++
+ skb->dev = dev;
+
+ if (dev->hard_header) {
+@@ -578,6 +581,9 @@
+ sk = pt->af_packet_priv;
+ po = pkt_sk(sk);
+
++ if (dev->nd_net != sk->sk_net)
++ goto drop;
++
+ if (dev->hard_header) {
+ if (sk->sk_type != SOCK_DGRAM)
+ skb_push(skb, skb->data - skb_mac_header(skb));
+@@ -738,7 +744,7 @@
+ }
+
+
+- dev = dev_get_by_index(ifindex);
++ dev = dev_get_by_index(sk->sk_net, ifindex);
+ err = -ENXIO;
+ if (dev == NULL)
+ goto out_unlock;
+@@ -811,15 +817,17 @@
+ {
+ struct sock *sk = sock->sk;
+ struct packet_sock *po;
++ struct net *net;
+
+ if (!sk)
+ return 0;
+
++ net = sk->sk_net;
+ po = pkt_sk(sk);
+
+- write_lock_bh(&packet_sklist_lock);
++ write_lock_bh(&net->packet_sklist_lock);
+ sk_del_node_init(sk);
+- write_unlock_bh(&packet_sklist_lock);
++ write_unlock_bh(&net->packet_sklist_lock);
+
+ /*
+ * Unhook packet receive handler.
+@@ -933,7 +941,7 @@
+ return -EINVAL;
+ strlcpy(name,uaddr->sa_data,sizeof(name));
+
+- dev = dev_get_by_name(name);
++ dev = dev_get_by_name(sk->sk_net, name);
+ if (dev) {
+ err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
+ dev_put(dev);
+@@ -960,7 +968,7 @@
+
+ if (sll->sll_ifindex) {
+ err = -ENODEV;
+- dev = dev_get_by_index(sll->sll_ifindex);
++ dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
+ if (dev == NULL)
+ goto out;
+ }
+@@ -982,7 +990,7 @@
+ * Create a packet of type SOCK_PACKET.
+ */
+
+-static int packet_create(struct socket *sock, int protocol)
++static int packet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct packet_sock *po;
+@@ -998,7 +1006,7 @@
+ sock->state = SS_UNCONNECTED;
+
+ err = -ENOBUFS;
+- sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
++ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+ if (sk == NULL)
+ goto out;
+
+@@ -1034,9 +1042,9 @@
+ po->running = 1;
+ }
+
+- write_lock_bh(&packet_sklist_lock);
+- sk_add_node(sk, &packet_sklist);
+- write_unlock_bh(&packet_sklist_lock);
++ write_lock_bh(&net->packet_sklist_lock);
++ sk_add_node(sk, &net->packet_sklist);
++ write_unlock_bh(&net->packet_sklist_lock);
+ return(0);
+ out:
+ return err;
+@@ -1154,7 +1162,7 @@
+ return -EOPNOTSUPP;
+
+ uaddr->sa_family = AF_PACKET;
+- dev = dev_get_by_index(pkt_sk(sk)->ifindex);
++ dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
+ if (dev) {
+ strlcpy(uaddr->sa_data, dev->name, 15);
+ dev_put(dev);
+@@ -1179,7 +1187,7 @@
+ sll->sll_family = AF_PACKET;
+ sll->sll_ifindex = po->ifindex;
+ sll->sll_protocol = po->num;
+- dev = dev_get_by_index(po->ifindex);
++ dev = dev_get_by_index(sk->sk_net, po->ifindex);
+ if (dev) {
+ sll->sll_hatype = dev->type;
+ sll->sll_halen = dev->addr_len;
+@@ -1231,7 +1239,7 @@
+ rtnl_lock();
+
+ err = -ENODEV;
+- dev = __dev_get_by_index(mreq->mr_ifindex);
++ dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
+ if (!dev)
+ goto done;
+
+@@ -1285,7 +1293,7 @@
+ if (--ml->count == 0) {
+ struct net_device *dev;
+ *mlp = ml->next;
+- dev = dev_get_by_index(ml->ifindex);
++ dev = dev_get_by_index(sk->sk_net, ml->ifindex);
+ if (dev) {
+ packet_dev_mc(dev, ml, -1);
+ dev_put(dev);
+@@ -1313,7 +1321,7 @@
+ struct net_device *dev;
+
+ po->mclist = ml->next;
+- if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
++ if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
+ packet_dev_mc(dev, ml, -1);
+ dev_put(dev);
+ }
+@@ -1469,9 +1477,10 @@
+ struct sock *sk;
+ struct hlist_node *node;
+ struct net_device *dev = data;
++ struct net *net = dev->nd_net;
+
+- read_lock(&packet_sklist_lock);
+- sk_for_each(sk, node, &packet_sklist) {
++ read_lock(&net->packet_sklist_lock);
++ sk_for_each(sk, node, &net->packet_sklist) {
+ struct packet_sock *po = pkt_sk(sk);
+
+ switch (msg) {
+@@ -1510,7 +1519,7 @@
+ break;
+ }
+ }
+- read_unlock(&packet_sklist_lock);
++ read_unlock(&net->packet_sklist_lock);
+ return NOTIFY_DONE;
+ }
+
+@@ -1878,12 +1887,12 @@
+ };
+
+ #ifdef CONFIG_PROC_FS
+-static inline struct sock *packet_seq_idx(loff_t off)
++static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
+ {
+ struct sock *s;
+ struct hlist_node *node;
+
+- sk_for_each(s, node, &packet_sklist) {
++ sk_for_each(s, node, &net->packet_sklist) {
+ if (!off--)
+ return s;
+ }
+@@ -1892,21 +1901,24 @@
+
+ static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+- read_lock(&packet_sklist_lock);
+- return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
++ struct net *net = seq->private;
++ read_lock(&net->packet_sklist_lock);
++ return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
+ }
+
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++ struct net *net = seq->private;
+ ++*pos;
+ return (v == SEQ_START_TOKEN)
+- ? sk_head(&packet_sklist)
++ ? sk_head(&net->packet_sklist)
+ : sk_next((struct sock*)v) ;
+ }
+
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+ {
+- read_unlock(&packet_sklist_lock);
++ struct net *net = seq->private;
++ read_unlock(&net->packet_sklist_lock);
+ }
+
+ static int packet_seq_show(struct seq_file *seq, void *v)
+@@ -1942,7 +1954,22 @@
+
+ static int packet_seq_open(struct inode *inode, struct file *file)
+ {
+- return seq_open(file, &packet_seq_ops);
++ struct seq_file *seq;
++ int res;
++ res = seq_open(file, &packet_seq_ops);
++ if (!res) {
++ seq = file->private_data;
++ seq->private = get_net(PROC_NET(inode));
++ }
++ return res;
++}
++
++static int packet_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq= file->private_data;
++ struct net *net = seq->private;
++ put_net(net);
++ return seq_release(inode, file);
+ }
+
+ static const struct file_operations packet_seq_fops = {
+@@ -1950,15 +1977,37 @@
+ .open = packet_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = packet_seq_release,
+ };
+
+ #endif
+
++static int packet_net_init(struct net *net)
++{
++ rwlock_init(&net->packet_sklist_lock);
++ INIT_HLIST_HEAD(&net->packet_sklist);
++
++ if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
++ return -ENOMEM;
++
++ return 0;
++}
++
++static void packet_net_exit(struct net *net)
++{
++ proc_net_remove(net, "packet");
++}
++
++static struct pernet_operations packet_net_ops = {
++ .init = packet_net_init,
++ .exit = packet_net_exit,
++};
++
++
+ static void __exit packet_exit(void)
+ {
+- proc_net_remove("packet");
+ unregister_netdevice_notifier(&packet_netdev_notifier);
++ unregister_pernet_subsys(&packet_net_ops);
+ sock_unregister(PF_PACKET);
+ proto_unregister(&packet_proto);
+ }
+@@ -1971,8 +2020,8 @@
+ goto out;
+
+ sock_register(&packet_family_ops);
++ register_pernet_subsys(&packet_net_ops);
+ register_netdevice_notifier(&packet_netdev_notifier);
+- proc_net_fops_create("packet", 0, &packet_seq_fops);
+ out:
+ return rc;
+ }
+diff -Nurb linux-2.6.22-570/net/rose/af_rose.c linux-2.6.22-591/net/rose/af_rose.c
+--- linux-2.6.22-570/net/rose/af_rose.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/rose/af_rose.c 2007-12-21 15:36:15.000000000 -0500
+@@ -45,6 +45,7 @@
+ #include <net/tcp_states.h>
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+
+ static int rose_ndevs = 10;
+
+@@ -196,6 +197,9 @@
+ {
+ struct net_device *dev = (struct net_device *)ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+@@ -498,15 +502,18 @@
+ .obj_size = sizeof(struct rose_sock),
+ };
+
+-static int rose_create(struct socket *sock, int protocol)
++static int rose_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct rose_sock *rose;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (sock->type != SOCK_SEQPACKET || protocol != 0)
+ return -ESOCKTNOSUPPORT;
+
+- if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
++ if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+ return -ENOMEM;
+
+ rose = rose_sk(sk);
+@@ -544,7 +551,7 @@
+ if (osk->sk_type != SOCK_SEQPACKET)
+ return NULL;
+
+- if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
++ if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+ return NULL;
+
+ rose = rose_sk(sk);
+@@ -1576,10 +1583,10 @@
+
+ rose_add_loopback_neigh();
+
+- proc_net_fops_create("rose", S_IRUGO, &rose_info_fops);
+- proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops);
+- proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops);
+- proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops);
++ proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops);
++ proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops);
++ proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops);
++ proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops);
+ out:
+ return rc;
+ fail:
+@@ -1606,10 +1613,10 @@
+ {
+ int i;
+
+- proc_net_remove("rose");
+- proc_net_remove("rose_neigh");
+- proc_net_remove("rose_nodes");
+- proc_net_remove("rose_routes");
++ proc_net_remove(&init_net, "rose");
++ proc_net_remove(&init_net, "rose_neigh");
++ proc_net_remove(&init_net, "rose_nodes");
++ proc_net_remove(&init_net, "rose_routes");
+ rose_loopback_clear();
+
+ rose_rt_free();
+diff -Nurb linux-2.6.22-570/net/rose/rose_route.c linux-2.6.22-591/net/rose/rose_route.c
+--- linux-2.6.22-570/net/rose/rose_route.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/rose/rose_route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -583,7 +583,7 @@
+ {
+ struct net_device *dev;
+
+- if ((dev = dev_get_by_name(devname)) == NULL)
++ if ((dev = dev_get_by_name(&init_net, devname)) == NULL)
+ return NULL;
+
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+@@ -601,7 +601,7 @@
+ struct net_device *dev, *first = NULL;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
+ if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+ first = dev;
+@@ -619,7 +619,7 @@
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
+ dev_hold(dev);
+ goto out;
+@@ -636,7 +636,7 @@
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
+ goto out;
+ }
+diff -Nurb linux-2.6.22-570/net/rxrpc/af_rxrpc.c linux-2.6.22-591/net/rxrpc/af_rxrpc.c
+--- linux-2.6.22-570/net/rxrpc/af_rxrpc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/rxrpc/af_rxrpc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/poll.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/af_rxrpc.h>
+ #include "ar-internal.h"
+@@ -605,13 +606,16 @@
+ /*
+ * create an RxRPC socket
+ */
+-static int rxrpc_create(struct socket *sock, int protocol)
++static int rxrpc_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%p,%d", sock, protocol);
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ /* we support transport protocol UDP only */
+ if (protocol != PF_INET)
+ return -EPROTONOSUPPORT;
+@@ -622,7 +626,7 @@
+ sock->ops = &rxrpc_rpc_ops;
+ sock->state = SS_UNCONNECTED;
+
+- sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
++ sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+ if (!sk)
+ return -ENOMEM;
+
+@@ -829,8 +833,8 @@
+ }
+
+ #ifdef CONFIG_PROC_FS
+- proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+- proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
++ proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops);
++ proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+ #endif
+ return 0;
+
+@@ -868,8 +872,8 @@
+
+ _debug("flush scheduled work");
+ flush_workqueue(rxrpc_workqueue);
+- proc_net_remove("rxrpc_conns");
+- proc_net_remove("rxrpc_calls");
++ proc_net_remove(&init_net, "rxrpc_conns");
++ proc_net_remove(&init_net, "rxrpc_calls");
+ destroy_workqueue(rxrpc_workqueue);
+ kmem_cache_destroy(rxrpc_call_jar);
+ _leave("");
+diff -Nurb linux-2.6.22-570/net/sched/act_api.c linux-2.6.22-591/net/sched/act_api.c
+--- linux-2.6.22-570/net/sched/act_api.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/act_api.c 2007-12-21 15:36:15.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/init.h>
+ #include <linux/kmod.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/sch_generic.h>
+ #include <net/act_api.h>
+@@ -675,7 +676,7 @@
+ return -EINVAL;
+ }
+
+- return rtnl_unicast(skb, pid);
++ return rtnl_unicast(skb, &init_net, pid);
+ }
+
+ static struct tc_action *
+@@ -796,7 +797,7 @@
+ nlh->nlmsg_flags |= NLM_F_ROOT;
+ module_put(a->ops->owner);
+ kfree(a);
+- err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++ err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ if (err > 0)
+ return 0;
+
+@@ -859,7 +860,7 @@
+
+ /* now do the delete */
+ tcf_action_destroy(head, 0);
+- ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
++ ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+ n->nlmsg_flags&NLM_F_ECHO);
+ if (ret > 0)
+ return 0;
+@@ -903,7 +904,7 @@
+ nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ NETLINK_CB(skb).dst_group = RTNLGRP_TC;
+
+- err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
++ err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
+@@ -941,10 +942,14 @@
+
+ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct rtattr **tca = arg;
+ u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+ int ret = 0, ovr = 0;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ if (tca[TCA_ACT_TAB-1] == NULL) {
+ printk("tc_ctl_action: received NO action attribs\n");
+ return -EINVAL;
+@@ -1014,6 +1019,7 @@
+ static int
+ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct rtattr *x;
+@@ -1023,6 +1029,9 @@
+ struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+ struct rtattr *kind = find_dump_kind(cb->nlh);
+
++ if (net != &init_net)
++ return 0;
++
+ if (kind == NULL) {
+ printk("tc_dump_action: action bad kind\n");
+ return 0;
+diff -Nurb linux-2.6.22-570/net/sched/act_mirred.c linux-2.6.22-591/net/sched/act_mirred.c
+--- linux-2.6.22-570/net/sched/act_mirred.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/act_mirred.c 2007-12-21 15:36:15.000000000 -0500
+@@ -85,7 +85,7 @@
+ parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
+
+ if (parm->ifindex) {
+- dev = __dev_get_by_index(parm->ifindex);
++ dev = __dev_get_by_index(&init_net, parm->ifindex);
+ if (dev == NULL)
+ return -ENODEV;
+ switch (dev->type) {
+diff -Nurb linux-2.6.22-570/net/sched/cls_api.c linux-2.6.22-591/net/sched/cls_api.c
+--- linux-2.6.22-570/net/sched/cls_api.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/cls_api.c 2007-12-21 15:36:15.000000000 -0500
+@@ -129,6 +129,7 @@
+
+ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct rtattr **tca;
+ struct tcmsg *t;
+ u32 protocol;
+@@ -145,6 +146,9 @@
+ unsigned long fh;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ replay:
+ tca = arg;
+ t = NLMSG_DATA(n);
+@@ -164,7 +168,7 @@
+ /* Find head of filter chain. */
+
+ /* Find link */
+- if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL)
+ return -ENODEV;
+
+ /* Find qdisc */
+@@ -365,7 +369,7 @@
+ return -EINVAL;
+ }
+
+- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ }
+
+ struct tcf_dump_args
+@@ -385,6 +389,7 @@
+
+ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int t;
+ int s_t;
+ struct net_device *dev;
+@@ -395,9 +400,12 @@
+ struct Qdisc_class_ops *cops;
+ struct tcf_dump_args arg;
+
++ if (net != &init_net)
++ return 0;
++
+ if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ return skb->len;
+- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ return skb->len;
+
+ if (!tcm->tcm_parent)
+diff -Nurb linux-2.6.22-570/net/sched/em_meta.c linux-2.6.22-591/net/sched/em_meta.c
+--- linux-2.6.22-570/net/sched/em_meta.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/em_meta.c 2007-12-21 15:36:15.000000000 -0500
+@@ -291,7 +291,7 @@
+ } else {
+ struct net_device *dev;
+
+- dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
++ dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if);
+ *err = var_dev(dev, dst);
+ if (dev)
+ dev_put(dev);
+diff -Nurb linux-2.6.22-570/net/sched/sch_api.c linux-2.6.22-591/net/sched/sch_api.c
+--- linux-2.6.22-570/net/sched/sch_api.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/sched/sch_api.c 2007-12-21 15:36:15.000000000 -0500
+@@ -35,6 +35,7 @@
+ #include <linux/bitops.h>
+ #include <linux/hrtimer.h>
+
++#include <net/net_namespace.h>
+ #include <net/netlink.h>
+ #include <net/sock.h>
+ #include <net/pkt_sched.h>
+@@ -609,6 +610,7 @@
+
+ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct tcmsg *tcm = NLMSG_DATA(n);
+ struct rtattr **tca = arg;
+ struct net_device *dev;
+@@ -617,7 +619,10 @@
+ struct Qdisc *p = NULL;
+ int err;
+
+- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++ if (net != &init_net)
++ return -EINVAL;
++
++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ return -ENODEV;
+
+ if (clid) {
+@@ -670,6 +675,7 @@
+
+ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct tcmsg *tcm;
+ struct rtattr **tca;
+ struct net_device *dev;
+@@ -677,6 +683,9 @@
+ struct Qdisc *q, *p;
+ int err;
+
++ if (net != &init_net)
++ return -EINVAL;
++
+ replay:
+ /* Reinit, just in case something touches this. */
+ tcm = NLMSG_DATA(n);
+@@ -684,7 +693,7 @@
+ clid = tcm->tcm_parent;
+ q = p = NULL;
+
+- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ return -ENODEV;
+
+ if (clid) {
+@@ -873,7 +882,7 @@
+ }
+
+ if (skb->len)
+- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+
+ err_out:
+ kfree_skb(skb);
+@@ -882,16 +891,20 @@
+
+ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int idx, q_idx;
+ int s_idx, s_q_idx;
+ struct net_device *dev;
+ struct Qdisc *q;
+
++ if (net != &init_net)
++ return 0;
++
+ s_idx = cb->args[0];
+ s_q_idx = q_idx = cb->args[1];
+ read_lock(&dev_base_lock);
+ idx = 0;
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+@@ -930,6 +943,7 @@
+
+ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct tcmsg *tcm = NLMSG_DATA(n);
+ struct rtattr **tca = arg;
+ struct net_device *dev;
+@@ -942,7 +956,10 @@
+ u32 qid = TC_H_MAJ(clid);
+ int err;
+
+- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++ if (net != &init_net)
++ return -EINVAL;
++
++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ return -ENODEV;
+
+ /*
+@@ -1096,7 +1113,7 @@
+ return -EINVAL;
+ }
+
+- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ }
+
+ struct qdisc_dump_args
+@@ -1116,6 +1133,7 @@
+
+ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ struct net *net = skb->sk->sk_net;
+ int t;
+ int s_t;
+ struct net_device *dev;
+@@ -1123,9 +1141,12 @@
+ struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+ struct qdisc_dump_args arg;
+
++ if (net != &init_net)
++ return 0;
++
+ if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ return 0;
+- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ return 0;
+
+ s_t = cb->args[0];
+@@ -1252,7 +1273,7 @@
+ {
+ register_qdisc(&pfifo_qdisc_ops);
+ register_qdisc(&bfifo_qdisc_ops);
+- proc_net_fops_create("psched", 0, &psched_fops);
++ proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
+
+ rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+diff -Nurb linux-2.6.22-570/net/sched/sch_generic.c linux-2.6.22-591/net/sched/sch_generic.c
+--- linux-2.6.22-570/net/sched/sch_generic.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/sch_generic.c 2007-12-21 15:36:12.000000000 -0500
+@@ -59,122 +59,143 @@
+ spin_unlock_bh(&dev->queue_lock);
+ }
+
+-/*
+- dev->queue_lock serializes queue accesses for this device
+- AND dev->qdisc pointer itself.
++static inline int qdisc_qlen(struct Qdisc *q)
++{
++ return q->q.qlen;
++}
+
+- netif_tx_lock serializes accesses to device driver.
++static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
++ struct Qdisc *q)
++{
++ if (unlikely(skb->next))
++ dev->gso_skb = skb;
++ else
++ q->ops->requeue(skb, q);
+
+- dev->queue_lock and netif_tx_lock are mutually exclusive,
+- if one is grabbed, another must be free.
+- */
++ netif_schedule(dev);
++ return 0;
++}
+
++static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
++ struct Qdisc *q)
++{
++ struct sk_buff *skb;
+
+-/* Kick device.
++ if ((skb = dev->gso_skb))
++ dev->gso_skb = NULL;
++ else
++ skb = q->dequeue(q);
+
+- Returns: 0 - queue is empty or throttled.
+- >0 - queue is not empty.
++ return skb;
++}
+
+- NOTE: Called under dev->queue_lock with locally disabled BH.
+-*/
++static inline int handle_dev_cpu_collision(struct sk_buff *skb,
++ struct net_device *dev,
++ struct Qdisc *q)
++{
++ int ret;
+
++ if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
++ /*
++ * Same CPU holding the lock. It may be a transient
++ * configuration error, when hard_start_xmit() recurses. We
++ * detect it by checking xmit owner and drop the packet when
++ * deadloop is detected. Return OK to try the next skb.
++ */
++ kfree_skb(skb);
++ if (net_ratelimit())
++ printk(KERN_WARNING "Dead loop on netdevice %s, "
++ "fix it urgently!\n", dev->name);
++ ret = qdisc_qlen(q);
++ } else {
++ /*
++ * Another cpu is holding lock, requeue & delay xmits for
++ * some time.
++ */
++ __get_cpu_var(netdev_rx_stat).cpu_collision++;
++ ret = dev_requeue_skb(skb, dev, q);
++ }
++
++ return ret;
++}
++
++/*
++ * NOTE: Called under dev->queue_lock with locally disabled BH.
++ *
++ * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
++ * device at a time. dev->queue_lock serializes queue accesses for
++ * this device AND dev->qdisc pointer itself.
++ *
++ * netif_tx_lock serializes accesses to device driver.
++ *
++ * dev->queue_lock and netif_tx_lock are mutually exclusive,
++ * if one is grabbed, another must be free.
++ *
++ * Note, that this procedure can be called by a watchdog timer
++ *
++ * Returns to the caller:
++ * 0 - queue is empty or throttled.
++ * >0 - queue is not empty.
++ *
++ */
+ static inline int qdisc_restart(struct net_device *dev)
+ {
+ struct Qdisc *q = dev->qdisc;
+ struct sk_buff *skb;
++ unsigned lockless;
++ int ret;
+
+ /* Dequeue packet */
+- if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
+- unsigned nolock = (dev->features & NETIF_F_LLTX);
+-
+- dev->gso_skb = NULL;
++ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
++ return 0;
+
+ /*
+- * When the driver has LLTX set it does its own locking
+- * in start_xmit. No need to add additional overhead by
+- * locking again. These checks are worth it because
+- * even uncongested locks can be quite expensive.
+- * The driver can do trylock like here too, in case
+- * of lock congestion it should return -1 and the packet
+- * will be requeued.
+- */
+- if (!nolock) {
+- if (!netif_tx_trylock(dev)) {
+- collision:
+- /* So, someone grabbed the driver. */
+-
+- /* It may be transient configuration error,
+- when hard_start_xmit() recurses. We detect
+- it by checking xmit owner and drop the
+- packet when deadloop is detected.
++ * When the driver has LLTX set, it does its own locking in
++ * start_xmit. These checks are worth it because even uncongested
++ * locks can be quite expensive. The driver can do a trylock, as
++ * is being done here; in case of lock contention it should return
++ * NETDEV_TX_LOCKED and the packet will be requeued.
+ */
+- if (dev->xmit_lock_owner == smp_processor_id()) {
+- kfree_skb(skb);
+- if (net_ratelimit())
+- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+- goto out;
+- }
+- __get_cpu_var(netdev_rx_stat).cpu_collision++;
+- goto requeue;
+- }
++ lockless = (dev->features & NETIF_F_LLTX);
++
++ if (!lockless && !netif_tx_trylock(dev)) {
++ /* Another CPU grabbed the driver tx lock */
++ return handle_dev_cpu_collision(skb, dev, q);
+ }
+
+- {
+ /* And release queue */
+ spin_unlock(&dev->queue_lock);
+
+- if (!netif_queue_stopped(dev)) {
+- int ret;
+-
+ ret = dev_hard_start_xmit(skb, dev);
+- if (ret == NETDEV_TX_OK) {
+- if (!nolock) {
+- netif_tx_unlock(dev);
+- }
+- spin_lock(&dev->queue_lock);
+- q = dev->qdisc;
+- goto out;
+- }
+- if (ret == NETDEV_TX_LOCKED && nolock) {
+- spin_lock(&dev->queue_lock);
+- q = dev->qdisc;
+- goto collision;
+- }
+- }
+
+- /* NETDEV_TX_BUSY - we need to requeue */
+- /* Release the driver */
+- if (!nolock) {
++ if (!lockless)
+ netif_tx_unlock(dev);
+- }
++
+ spin_lock(&dev->queue_lock);
+ q = dev->qdisc;
+- }
+
+- /* Device kicked us out :(
+- This is possible in three cases:
++ switch (ret) {
++ case NETDEV_TX_OK:
++ /* Driver sent out skb successfully */
++ ret = qdisc_qlen(q);
++ break;
+
+- 0. driver is locked
+- 1. fastroute is enabled
+- 2. device cannot determine busy state
+- before start of transmission (f.e. dialout)
+- 3. device is buggy (ppp)
+- */
++ case NETDEV_TX_LOCKED:
++ /* Driver try lock failed */
++ ret = handle_dev_cpu_collision(skb, dev, q);
++ break;
+
+-requeue:
+- if (unlikely(q == &noop_qdisc))
+- kfree_skb(skb);
+- else if (skb->next)
+- dev->gso_skb = skb;
+- else
+- q->ops->requeue(skb, q);
+- netif_schedule(dev);
++ default:
++ /* Driver returned NETDEV_TX_BUSY - requeue skb */
++ if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
++ printk(KERN_WARNING "BUG %s code %d qlen %d\n",
++ dev->name, ret, q->q.qlen);
++
++ ret = dev_requeue_skb(skb, dev, q);
++ break;
+ }
+- return 0;
+
+-out:
+- BUG_ON((int) q->q.qlen < 0);
+- return q->q.qlen;
++ return ret;
+ }
+
+ void __qdisc_run(struct net_device *dev)
+diff -Nurb linux-2.6.22-570/net/sched/sch_ingress.c linux-2.6.22-591/net/sched/sch_ingress.c
+--- linux-2.6.22-570/net/sched/sch_ingress.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sched/sch_ingress.c 2007-12-21 15:36:15.000000000 -0500
+@@ -243,6 +243,10 @@
+ struct net_device *dev = skb->dev;
+ int fwres=NF_ACCEPT;
+
++ /* Only filter packets in the initial network namespace */
++ if ((indev?indev:outdev)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
+ skb->sk ? "(owned)" : "(unowned)",
+ skb->dev ? (*pskb)->dev->name : "(no dev)",
+diff -Nurb linux-2.6.22-570/net/sctp/input.c linux-2.6.22-591/net/sctp/input.c
+--- linux-2.6.22-570/net/sctp/input.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sctp/input.c 2007-12-21 15:36:15.000000000 -0500
+@@ -126,6 +126,10 @@
+ int family;
+ struct sctp_af *af;
+
++ if (skb->dev->nd_net != &init_net) {
++ kfree_skb(skb);
++ return 0;
++ }
+ if (skb->pkt_type!=PACKET_HOST)
+ goto discard_it;
+
+@@ -509,6 +513,9 @@
+ sk_buff_data_t saveip, savesctp;
+ int err;
+
++ if (skb->dev->nd_net != &init_net)
++ return;
++
+ if (skb->len < ihlen + 8) {
+ ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ return;
+diff -Nurb linux-2.6.22-570/net/sctp/ipv6.c linux-2.6.22-591/net/sctp/ipv6.c
+--- linux-2.6.22-570/net/sctp/ipv6.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/sctp/ipv6.c 2007-12-21 15:36:15.000000000 -0500
+@@ -189,6 +189,7 @@
+
+ memset(&fl, 0, sizeof(fl));
+
++ fl.fl_net = &init_net;
+ fl.proto = sk->sk_protocol;
+
+ /* Fill in the dest address from the route entry passed with the skb
+@@ -230,6 +231,7 @@
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
++ fl.fl_net = &init_net;
+ ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr);
+ if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ fl.oif = daddr->v6.sin6_scope_id;
+@@ -619,7 +621,7 @@
+ struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct sctp6_sock *newsctp6sk;
+
+- newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1);
++ newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1);
+ if (!newsk)
+ goto out;
+
+@@ -664,7 +666,7 @@
+ newinet->mc_index = 0;
+ newinet->mc_list = NULL;
+
+- if (ipv4_config.no_pmtu_disc)
++ if (init_net.sysctl_ipv4_no_pmtu_disc)
+ newinet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ newinet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -841,7 +843,7 @@
+ if (type & IPV6_ADDR_LINKLOCAL) {
+ if (!addr->v6.sin6_scope_id)
+ return 0;
+- dev = dev_get_by_index(addr->v6.sin6_scope_id);
++ dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
+ if (!dev)
+ return 0;
+ if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) {
+@@ -872,7 +874,7 @@
+ if (type & IPV6_ADDR_LINKLOCAL) {
+ if (!addr->v6.sin6_scope_id)
+ return 0;
+- dev = dev_get_by_index(addr->v6.sin6_scope_id);
++ dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
+ if (!dev)
+ return 0;
+ if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) {
+diff -Nurb linux-2.6.22-570/net/sctp/protocol.c linux-2.6.22-591/net/sctp/protocol.c
+--- linux-2.6.22-570/net/sctp/protocol.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sctp/protocol.c 2007-12-21 15:36:15.000000000 -0500
+@@ -59,6 +59,7 @@
+ #include <net/addrconf.h>
+ #include <net/inet_common.h>
+ #include <net/inet_ecn.h>
++#include <net/net_namespace.h>
+
+ /* Global data structures. */
+ struct sctp_globals sctp_globals __read_mostly;
+@@ -93,7 +94,7 @@
+ {
+ if (!proc_net_sctp) {
+ struct proc_dir_entry *ent;
+- ent = proc_mkdir("net/sctp", NULL);
++ ent = proc_mkdir("sctp", init_net.proc_net);
+ if (ent) {
+ ent->owner = THIS_MODULE;
+ proc_net_sctp = ent;
+@@ -126,7 +127,7 @@
+
+ if (proc_net_sctp) {
+ proc_net_sctp = NULL;
+- remove_proc_entry("net/sctp", NULL);
++ remove_proc_entry("sctp", init_net.proc_net);
+ }
+ }
+
+@@ -170,7 +171,7 @@
+ struct sctp_af *af;
+
+ read_lock(&dev_base_lock);
+- for_each_netdev(dev) {
++ for_each_netdev(&init_net, dev) {
+ __list_for_each(pos, &sctp_address_families) {
+ af = list_entry(pos, struct sctp_af, list);
+ af->copy_addrlist(&sctp_local_addr_list, dev);
+@@ -354,13 +355,13 @@
+ /* Should this be available for binding? */
+ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
+ {
+- int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
++ int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
+
+
+ if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
+ ret != RTN_LOCAL &&
+ !sp->inet.freebind &&
+- !sysctl_ip_nonlocal_bind)
++ !init_net.sysctl_ip_nonlocal_bind)
+ return 0;
+
+ return 1;
+@@ -423,6 +424,7 @@
+ union sctp_addr dst_saddr;
+
+ memset(&fl, 0x0, sizeof(struct flowi));
++ fl.fl_net = &init_net;
+ fl.fl4_dst = daddr->v4.sin_addr.s_addr;
+ fl.proto = IPPROTO_SCTP;
+ if (asoc) {
+@@ -539,7 +541,7 @@
+ {
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+- struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1);
++ struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1);
+
+ if (!newsk)
+ goto out;
+@@ -1122,7 +1124,7 @@
+ }
+
+ spin_lock_init(&sctp_port_alloc_lock);
+- sctp_port_rover = sysctl_local_port_range[0] - 1;
++ sctp_port_rover = init_net.sysctl_local_port_range[0] - 1;
+
+ printk(KERN_INFO "SCTP: Hash tables configured "
+ "(established %d bind %d)\n",
+diff -Nurb linux-2.6.22-570/net/sctp/socket.c linux-2.6.22-591/net/sctp/socket.c
+--- linux-2.6.22-570/net/sctp/socket.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sctp/socket.c 2007-12-21 15:36:15.000000000 -0500
+@@ -5021,8 +5021,8 @@
+ * already in the hash table; if not, we use that; if
+ * it is, we try next.
+ */
+- int low = sysctl_local_port_range[0];
+- int high = sysctl_local_port_range[1];
++ int low = sk->sk_net->sysctl_local_port_range[0];
++ int high = sk->sk_net->sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+ int rover;
+ int index;
+diff -Nurb linux-2.6.22-570/net/socket.c linux-2.6.22-591/net/socket.c
+--- linux-2.6.22-570/net/socket.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/socket.c 2007-12-21 15:36:15.000000000 -0500
+@@ -84,6 +84,7 @@
+ #include <linux/kmod.h>
+ #include <linux/audit.h>
+ #include <linux/wireless.h>
++#include <linux/nsproxy.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -821,9 +822,9 @@
+ */
+
+ static DEFINE_MUTEX(br_ioctl_mutex);
+-static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
++static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+
+-void brioctl_set(int (*hook) (unsigned int, void __user *))
++void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+ {
+ mutex_lock(&br_ioctl_mutex);
+ br_ioctl_hook = hook;
+@@ -833,9 +834,9 @@
+ EXPORT_SYMBOL(brioctl_set);
+
+ static DEFINE_MUTEX(vlan_ioctl_mutex);
+-static int (*vlan_ioctl_hook) (void __user *arg);
++static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
+
+-void vlan_ioctl_set(int (*hook) (void __user *))
++void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
+ {
+ mutex_lock(&vlan_ioctl_mutex);
+ vlan_ioctl_hook = hook;
+@@ -864,16 +865,20 @@
+ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+ {
+ struct socket *sock;
++ struct sock *sk;
+ void __user *argp = (void __user *)arg;
+ int pid, err;
++ struct net *net;
+
+ sock = file->private_data;
++ sk = sock->sk;
++ net = sk->sk_net;
+ if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+- err = dev_ioctl(cmd, argp);
++ err = dev_ioctl(net, cmd, argp);
+ } else
+ #ifdef CONFIG_WIRELESS_EXT
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+- err = dev_ioctl(cmd, argp);
++ err = dev_ioctl(net, cmd, argp);
+ } else
+ #endif /* CONFIG_WIRELESS_EXT */
+ switch (cmd) {
+@@ -899,7 +904,7 @@
+
+ mutex_lock(&br_ioctl_mutex);
+ if (br_ioctl_hook)
+- err = br_ioctl_hook(cmd, argp);
++ err = br_ioctl_hook(net, cmd, argp);
+ mutex_unlock(&br_ioctl_mutex);
+ break;
+ case SIOCGIFVLAN:
+@@ -910,7 +915,7 @@
+
+ mutex_lock(&vlan_ioctl_mutex);
+ if (vlan_ioctl_hook)
+- err = vlan_ioctl_hook(argp);
++ err = vlan_ioctl_hook(net, argp);
+ mutex_unlock(&vlan_ioctl_mutex);
+ break;
+ case SIOCADDDLCI:
+@@ -933,7 +938,7 @@
+ * to the NIC driver.
+ */
+ if (err == -ENOIOCTLCMD)
+- err = dev_ioctl(cmd, argp);
++ err = dev_ioctl(net, cmd, argp);
+ break;
+ }
+ return err;
+@@ -1102,7 +1107,7 @@
+ return 0;
+ }
+
+-static int __sock_create(int family, int type, int protocol,
++static int __sock_create(struct net *net, int family, int type, int protocol,
+ struct socket **res, int kern)
+ {
+ int err;
+@@ -1185,7 +1190,7 @@
+ /* Now protected by module ref count */
+ rcu_read_unlock();
+
+- err = pf->create(sock, protocol);
++ err = pf->create(net, sock, protocol);
+ if (err < 0)
+ goto out_module_put;
+
+@@ -1224,12 +1229,12 @@
+
+ int sock_create(int family, int type, int protocol, struct socket **res)
+ {
+- return __sock_create(family, type, protocol, res, 0);
++ return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
+ }
+
+ int sock_create_kern(int family, int type, int protocol, struct socket **res)
+ {
+- return __sock_create(family, type, protocol, res, 1);
++ return __sock_create(&init_net, family, type, protocol, res, 1);
+ }
+
+ asmlinkage long sys_socket(int family, int type, int protocol)
+@@ -1389,8 +1394,6 @@
+ * ready for listening.
+ */
+
+-int sysctl_somaxconn __read_mostly = SOMAXCONN;
+-
+ asmlinkage long sys_listen(int fd, int backlog)
+ {
+ struct socket *sock;
+@@ -1398,8 +1401,9 @@
+
+ sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ if (sock) {
+- if ((unsigned)backlog > sysctl_somaxconn)
+- backlog = sysctl_somaxconn;
++ struct net *net = sock->sk->sk_net;
++ if ((unsigned)backlog > net->sysctl_somaxconn)
++ backlog = net->sysctl_somaxconn;
+
+ err = security_socket_listen(sock, backlog);
+ if (!err)
+@@ -2189,6 +2193,16 @@
+ printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+ }
+
++static int sock_pernet_init(struct net *net)
++{
++ net->sysctl_somaxconn = SOMAXCONN;
++ return 0;
++}
++
++static struct pernet_operations sock_net_ops = {
++ .init = sock_pernet_init,
++};
++
+ static int __init sock_init(void)
+ {
+ /*
+@@ -2217,6 +2231,8 @@
+ netfilter_init();
+ #endif
+
++ register_pernet_subsys(&sock_net_ops);
++
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/net/socket.c.orig linux-2.6.22-591/net/socket.c.orig
+--- linux-2.6.22-570/net/socket.c.orig 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/socket.c.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,2344 +0,0 @@
+-/*
+- * NET An implementation of the SOCKET network access protocol.
+- *
+- * Version: @(#)socket.c 1.1.93 18/02/95
+- *
+- * Authors: Orest Zborowski, <obz@Kodak.COM>
+- * Ross Biro
+- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+- *
+- * Fixes:
+- * Anonymous : NOTSOCK/BADF cleanup. Error fix in
+- * shutdown()
+- * Alan Cox : verify_area() fixes
+- * Alan Cox : Removed DDI
+- * Jonathan Kamens : SOCK_DGRAM reconnect bug
+- * Alan Cox : Moved a load of checks to the very
+- * top level.
+- * Alan Cox : Move address structures to/from user
+- * mode above the protocol layers.
+- * Rob Janssen : Allow 0 length sends.
+- * Alan Cox : Asynchronous I/O support (cribbed from the
+- * tty drivers).
+- * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
+- * Jeff Uphoff : Made max number of sockets command-line
+- * configurable.
+- * Matti Aarnio : Made the number of sockets dynamic,
+- * to be allocated when needed, and mr.
+- * Uphoff's max is used as max to be
+- * allowed to allocate.
+- * Linus : Argh. removed all the socket allocation
+- * altogether: it's in the inode now.
+- * Alan Cox : Made sock_alloc()/sock_release() public
+- * for NetROM and future kernel nfsd type
+- * stuff.
+- * Alan Cox : sendmsg/recvmsg basics.
+- * Tom Dyas : Export net symbols.
+- * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
+- * Alan Cox : Added thread locking to sys_* calls
+- * for sockets. May have errors at the
+- * moment.
+- * Kevin Buhr : Fixed the dumb errors in the above.
+- * Andi Kleen : Some small cleanups, optimizations,
+- * and fixed a copy_from_user() bug.
+- * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
+- * Tigran Aivazian : Made listen(2) backlog sanity checks
+- * protocol-independent
+- *
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License
+- * as published by the Free Software Foundation; either version
+- * 2 of the License, or (at your option) any later version.
+- *
+- *
+- * This module is effectively the top level interface to the BSD socket
+- * paradigm.
+- *
+- * Based upon Swansea University Computer Society NET3.039
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/socket.h>
+-#include <linux/file.h>
+-#include <linux/net.h>
+-#include <linux/interrupt.h>
+-#include <linux/rcupdate.h>
+-#include <linux/netdevice.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/mutex.h>
+-#include <linux/wanrouter.h>
+-#include <linux/if_bridge.h>
+-#include <linux/if_frad.h>
+-#include <linux/if_vlan.h>
+-#include <linux/init.h>
+-#include <linux/poll.h>
+-#include <linux/cache.h>
+-#include <linux/module.h>
+-#include <linux/highmem.h>
+-#include <linux/mount.h>
+-#include <linux/security.h>
+-#include <linux/syscalls.h>
+-#include <linux/compat.h>
+-#include <linux/kmod.h>
+-#include <linux/audit.h>
+-#include <linux/wireless.h>
+-
+-#include <asm/uaccess.h>
+-#include <asm/unistd.h>
+-
+-#include <net/compat.h>
+-
+-#include <net/sock.h>
+-#include <linux/netfilter.h>
+-
+-static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
+-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+- unsigned long nr_segs, loff_t pos);
+-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+- unsigned long nr_segs, loff_t pos);
+-static int sock_mmap(struct file *file, struct vm_area_struct *vma);
+-
+-static int sock_close(struct inode *inode, struct file *file);
+-static unsigned int sock_poll(struct file *file,
+- struct poll_table_struct *wait);
+-static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+-#ifdef CONFIG_COMPAT
+-static long compat_sock_ioctl(struct file *file,
+- unsigned int cmd, unsigned long arg);
+-#endif
+-static int sock_fasync(int fd, struct file *filp, int on);
+-static ssize_t sock_sendpage(struct file *file, struct page *page,
+- int offset, size_t size, loff_t *ppos, int more);
+-
+-/*
+- * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
+- * in the operation structures but are done directly via the socketcall() multiplexor.
+- */
+-
+-static const struct file_operations socket_file_ops = {
+- .owner = THIS_MODULE,
+- .llseek = no_llseek,
+- .aio_read = sock_aio_read,
+- .aio_write = sock_aio_write,
+- .poll = sock_poll,
+- .unlocked_ioctl = sock_ioctl,
+-#ifdef CONFIG_COMPAT
+- .compat_ioctl = compat_sock_ioctl,
+-#endif
+- .mmap = sock_mmap,
+- .open = sock_no_open, /* special open code to disallow open via /proc */
+- .release = sock_close,
+- .fasync = sock_fasync,
+- .sendpage = sock_sendpage,
+- .splice_write = generic_splice_sendpage,
+-};
+-
+-/*
+- * The protocol list. Each protocol is registered in here.
+- */
+-
+-static DEFINE_SPINLOCK(net_family_lock);
+-static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+-
+-/*
+- * Statistics counters of the socket lists
+- */
+-
+-static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+-
+-/*
+- * Support routines.
+- * Move socket addresses back and forth across the kernel/user
+- * divide and look after the messy bits.
+- */
+-
+-#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
+- 16 for IP, 16 for IPX,
+- 24 for IPv6,
+- about 80 for AX.25
+- must be at least one bigger than
+- the AF_UNIX size (see net/unix/af_unix.c
+- :unix_mkname()).
+- */
+-
+-/**
+- * move_addr_to_kernel - copy a socket address into kernel space
+- * @uaddr: Address in user space
+- * @kaddr: Address in kernel space
+- * @ulen: Length in user space
+- *
+- * The address is copied into kernel space. If the provided address is
+- * too long an error code of -EINVAL is returned. If the copy gives
+- * invalid addresses -EFAULT is returned. On a success 0 is returned.
+- */
+-
+-int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
+-{
+- if (ulen < 0 || ulen > MAX_SOCK_ADDR)
+- return -EINVAL;
+- if (ulen == 0)
+- return 0;
+- if (copy_from_user(kaddr, uaddr, ulen))
+- return -EFAULT;
+- return audit_sockaddr(ulen, kaddr);
+-}
+-
+-/**
+- * move_addr_to_user - copy an address to user space
+- * @kaddr: kernel space address
+- * @klen: length of address in kernel
+- * @uaddr: user space address
+- * @ulen: pointer to user length field
+- *
+- * The value pointed to by ulen on entry is the buffer length available.
+- * This is overwritten with the buffer space used. -EINVAL is returned
+- * if an overlong buffer is specified or a negative buffer size. -EFAULT
+- * is returned if either the buffer or the length field are not
+- * accessible.
+- * After copying the data up to the limit the user specifies, the true
+- * length of the data is written over the length limit the user
+- * specified. Zero is returned for a success.
+- */
+-
+-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+- int __user *ulen)
+-{
+- int err;
+- int len;
+-
+- err = get_user(len, ulen);
+- if (err)
+- return err;
+- if (len > klen)
+- len = klen;
+- if (len < 0 || len > MAX_SOCK_ADDR)
+- return -EINVAL;
+- if (len) {
+- if (audit_sockaddr(klen, kaddr))
+- return -ENOMEM;
+- if (copy_to_user(uaddr, kaddr, len))
+- return -EFAULT;
+- }
+- /*
+- * "fromlen shall refer to the value before truncation.."
+- * 1003.1g
+- */
+- return __put_user(klen, ulen);
+-}
+-
+-#define SOCKFS_MAGIC 0x534F434B
+-
+-static struct kmem_cache *sock_inode_cachep __read_mostly;
+-
+-static struct inode *sock_alloc_inode(struct super_block *sb)
+-{
+- struct socket_alloc *ei;
+-
+- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
+- if (!ei)
+- return NULL;
+- init_waitqueue_head(&ei->socket.wait);
+-
+- ei->socket.fasync_list = NULL;
+- ei->socket.state = SS_UNCONNECTED;
+- ei->socket.flags = 0;
+- ei->socket.ops = NULL;
+- ei->socket.sk = NULL;
+- ei->socket.file = NULL;
+-
+- return &ei->vfs_inode;
+-}
+-
+-static void sock_destroy_inode(struct inode *inode)
+-{
+- kmem_cache_free(sock_inode_cachep,
+- container_of(inode, struct socket_alloc, vfs_inode));
+-}
+-
+-static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
+-{
+- struct socket_alloc *ei = (struct socket_alloc *)foo;
+-
+- inode_init_once(&ei->vfs_inode);
+-}
+-
+-static int init_inodecache(void)
+-{
+- sock_inode_cachep = kmem_cache_create("sock_inode_cache",
+- sizeof(struct socket_alloc),
+- 0,
+- (SLAB_HWCACHE_ALIGN |
+- SLAB_RECLAIM_ACCOUNT |
+- SLAB_MEM_SPREAD),
+- init_once,
+- NULL);
+- if (sock_inode_cachep == NULL)
+- return -ENOMEM;
+- return 0;
+-}
+-
+-static struct super_operations sockfs_ops = {
+- .alloc_inode = sock_alloc_inode,
+- .destroy_inode =sock_destroy_inode,
+- .statfs = simple_statfs,
+-};
+-
+-static int sockfs_get_sb(struct file_system_type *fs_type,
+- int flags, const char *dev_name, void *data,
+- struct vfsmount *mnt)
+-{
+- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
+- mnt);
+-}
+-
+-static struct vfsmount *sock_mnt __read_mostly;
+-
+-static struct file_system_type sock_fs_type = {
+- .name = "sockfs",
+- .get_sb = sockfs_get_sb,
+- .kill_sb = kill_anon_super,
+-};
+-
+-static int sockfs_delete_dentry(struct dentry *dentry)
+-{
+- /*
+- * At creation time, we pretended this dentry was hashed
+- * (by clearing DCACHE_UNHASHED bit in d_flags)
+- * At delete time, we restore the truth : not hashed.
+- * (so that dput() can proceed correctly)
+- */
+- dentry->d_flags |= DCACHE_UNHASHED;
+- return 0;
+-}
+-
+-/*
+- * sockfs_dname() is called from d_path().
+- */
+-static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
+-{
+- return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
+- dentry->d_inode->i_ino);
+-}
+-
+-static struct dentry_operations sockfs_dentry_operations = {
+- .d_delete = sockfs_delete_dentry,
+- .d_dname = sockfs_dname,
+-};
+-
+-/*
+- * Obtains the first available file descriptor and sets it up for use.
+- *
+- * These functions create file structures and maps them to fd space
+- * of the current process. On success it returns file descriptor
+- * and file struct implicitly stored in sock->file.
+- * Note that another thread may close file descriptor before we return
+- * from this function. We use the fact that now we do not refer
+- * to socket after mapping. If one day we will need it, this
+- * function will increment ref. count on file by 1.
+- *
+- * In any case returned fd MAY BE not valid!
+- * This race condition is unavoidable
+- * with shared fd spaces, we cannot solve it inside kernel,
+- * but we take care of internal coherence yet.
+- */
+-
+-static int sock_alloc_fd(struct file **filep)
+-{
+- int fd;
+-
+- fd = get_unused_fd();
+- if (likely(fd >= 0)) {
+- struct file *file = get_empty_filp();
+-
+- *filep = file;
+- if (unlikely(!file)) {
+- put_unused_fd(fd);
+- return -ENFILE;
+- }
+- } else
+- *filep = NULL;
+- return fd;
+-}
+-
+-static int sock_attach_fd(struct socket *sock, struct file *file)
+-{
+- struct qstr name = { .name = "" };
+-
+- file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+- if (unlikely(!file->f_path.dentry))
+- return -ENOMEM;
+-
+- file->f_path.dentry->d_op = &sockfs_dentry_operations;
+- /*
+- * We dont want to push this dentry into global dentry hash table.
+- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
+- * This permits a working /proc/$pid/fd/XXX on sockets
+- */
+- file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
+- d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
+- file->f_path.mnt = mntget(sock_mnt);
+- file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
+-
+- sock->file = file;
+- file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
+- file->f_mode = FMODE_READ | FMODE_WRITE;
+- file->f_flags = O_RDWR;
+- file->f_pos = 0;
+- file->private_data = sock;
+-
+- return 0;
+-}
+-
+-int sock_map_fd(struct socket *sock)
+-{
+- struct file *newfile;
+- int fd = sock_alloc_fd(&newfile);
+-
+- if (likely(fd >= 0)) {
+- int err = sock_attach_fd(sock, newfile);
+-
+- if (unlikely(err < 0)) {
+- put_filp(newfile);
+- put_unused_fd(fd);
+- return err;
+- }
+- fd_install(fd, newfile);
+- }
+- return fd;
+-}
+-
+-static struct socket *sock_from_file(struct file *file, int *err)
+-{
+- if (file->f_op == &socket_file_ops)
+- return file->private_data; /* set in sock_map_fd */
+-
+- *err = -ENOTSOCK;
+- return NULL;
+-}
+-
+-/**
+- * sockfd_lookup - Go from a file number to its socket slot
+- * @fd: file handle
+- * @err: pointer to an error code return
+- *
+- * The file handle passed in is locked and the socket it is bound
+- * too is returned. If an error occurs the err pointer is overwritten
+- * with a negative errno code and NULL is returned. The function checks
+- * for both invalid handles and passing a handle which is not a socket.
+- *
+- * On a success the socket object pointer is returned.
+- */
+-
+-struct socket *sockfd_lookup(int fd, int *err)
+-{
+- struct file *file;
+- struct socket *sock;
+-
+- file = fget(fd);
+- if (!file) {
+- *err = -EBADF;
+- return NULL;
+- }
+-
+- sock = sock_from_file(file, err);
+- if (!sock)
+- fput(file);
+- return sock;
+-}
+-
+-static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
+-{
+- struct file *file;
+- struct socket *sock;
+-
+- *err = -EBADF;
+- file = fget_light(fd, fput_needed);
+- if (file) {
+- sock = sock_from_file(file, err);
+- if (sock)
+- return sock;
+- fput_light(file, *fput_needed);
+- }
+- return NULL;
+-}
+-
+-/**
+- * sock_alloc - allocate a socket
+- *
+- * Allocate a new inode and socket object. The two are bound together
+- * and initialised. The socket is then returned. If we are out of inodes
+- * NULL is returned.
+- */
+-
+-static struct socket *sock_alloc(void)
+-{
+- struct inode *inode;
+- struct socket *sock;
+-
+- inode = new_inode(sock_mnt->mnt_sb);
+- if (!inode)
+- return NULL;
+-
+- sock = SOCKET_I(inode);
+-
+- inode->i_mode = S_IFSOCK | S_IRWXUGO;
+- inode->i_uid = current->fsuid;
+- inode->i_gid = current->fsgid;
+-
+- get_cpu_var(sockets_in_use)++;
+- put_cpu_var(sockets_in_use);
+- return sock;
+-}
+-
+-/*
+- * In theory you can't get an open on this inode, but /proc provides
+- * a back door. Remember to keep it shut otherwise you'll let the
+- * creepy crawlies in.
+- */
+-
+-static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
+-{
+- return -ENXIO;
+-}
+-
+-const struct file_operations bad_sock_fops = {
+- .owner = THIS_MODULE,
+- .open = sock_no_open,
+-};
+-
+-/**
+- * sock_release - close a socket
+- * @sock: socket to close
+- *
+- * The socket is released from the protocol stack if it has a release
+- * callback, and the inode is then released if the socket is bound to
+- * an inode not a file.
+- */
+-
+-void sock_release(struct socket *sock)
+-{
+- if (sock->ops) {
+- struct module *owner = sock->ops->owner;
+-
+- sock->ops->release(sock);
+- sock->ops = NULL;
+- module_put(owner);
+- }
+-
+- if (sock->fasync_list)
+- printk(KERN_ERR "sock_release: fasync list not empty!\n");
+-
+- get_cpu_var(sockets_in_use)--;
+- put_cpu_var(sockets_in_use);
+- if (!sock->file) {
+- iput(SOCK_INODE(sock));
+- return;
+- }
+- sock->file = NULL;
+-}
+-
+-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+- struct msghdr *msg, size_t size)
+-{
+- struct sock_iocb *si = kiocb_to_siocb(iocb);
+- int err;
+-
+- si->sock = sock;
+- si->scm = NULL;
+- si->msg = msg;
+- si->size = size;
+-
+- err = security_socket_sendmsg(sock, msg, size);
+- if (err)
+- return err;
+-
+- return sock->ops->sendmsg(iocb, sock, msg, size);
+-}
+-
+-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+-{
+- struct kiocb iocb;
+- struct sock_iocb siocb;
+- int ret;
+-
+- init_sync_kiocb(&iocb, NULL);
+- iocb.private = &siocb;
+- ret = __sock_sendmsg(&iocb, sock, msg, size);
+- if (-EIOCBQUEUED == ret)
+- ret = wait_on_sync_kiocb(&iocb);
+- return ret;
+-}
+-
+-int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
+- struct kvec *vec, size_t num, size_t size)
+-{
+- mm_segment_t oldfs = get_fs();
+- int result;
+-
+- set_fs(KERNEL_DS);
+- /*
+- * the following is safe, since for compiler definitions of kvec and
+- * iovec are identical, yielding the same in-core layout and alignment
+- */
+- msg->msg_iov = (struct iovec *)vec;
+- msg->msg_iovlen = num;
+- result = sock_sendmsg(sock, msg, size);
+- set_fs(oldfs);
+- return result;
+-}
+-
+-/*
+- * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+- */
+-void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+- struct sk_buff *skb)
+-{
+- ktime_t kt = skb->tstamp;
+-
+- if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+- struct timeval tv;
+- /* Race occurred between timestamp enabling and packet
+- receiving. Fill in the current time for now. */
+- if (kt.tv64 == 0)
+- kt = ktime_get_real();
+- skb->tstamp = kt;
+- tv = ktime_to_timeval(kt);
+- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+- } else {
+- struct timespec ts;
+- /* Race occurred between timestamp enabling and packet
+- receiving. Fill in the current time for now. */
+- if (kt.tv64 == 0)
+- kt = ktime_get_real();
+- skb->tstamp = kt;
+- ts = ktime_to_timespec(kt);
+- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+- }
+-}
+-
+-EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+-
+-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+- struct msghdr *msg, size_t size, int flags)
+-{
+- int err;
+- struct sock_iocb *si = kiocb_to_siocb(iocb);
+-
+- si->sock = sock;
+- si->scm = NULL;
+- si->msg = msg;
+- si->size = size;
+- si->flags = flags;
+-
+- err = security_socket_recvmsg(sock, msg, size, flags);
+- if (err)
+- return err;
+-
+- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+-}
+-
+-int sock_recvmsg(struct socket *sock, struct msghdr *msg,
+- size_t size, int flags)
+-{
+- struct kiocb iocb;
+- struct sock_iocb siocb;
+- int ret;
+-
+- init_sync_kiocb(&iocb, NULL);
+- iocb.private = &siocb;
+- ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
+- if (-EIOCBQUEUED == ret)
+- ret = wait_on_sync_kiocb(&iocb);
+- return ret;
+-}
+-
+-int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+- struct kvec *vec, size_t num, size_t size, int flags)
+-{
+- mm_segment_t oldfs = get_fs();
+- int result;
+-
+- set_fs(KERNEL_DS);
+- /*
+- * the following is safe, since for compiler definitions of kvec and
+- * iovec are identical, yielding the same in-core layout and alignment
+- */
+- msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+- result = sock_recvmsg(sock, msg, size, flags);
+- set_fs(oldfs);
+- return result;
+-}
+-
+-static void sock_aio_dtor(struct kiocb *iocb)
+-{
+- kfree(iocb->private);
+-}
+-
+-static ssize_t sock_sendpage(struct file *file, struct page *page,
+- int offset, size_t size, loff_t *ppos, int more)
+-{
+- struct socket *sock;
+- int flags;
+-
+- sock = file->private_data;
+-
+- flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+- if (more)
+- flags |= MSG_MORE;
+-
+- return sock->ops->sendpage(sock, page, offset, size, flags);
+-}
+-
+-static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
+- struct sock_iocb *siocb)
+-{
+- if (!is_sync_kiocb(iocb)) {
+- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
+- if (!siocb)
+- return NULL;
+- iocb->ki_dtor = sock_aio_dtor;
+- }
+-
+- siocb->kiocb = iocb;
+- iocb->private = siocb;
+- return siocb;
+-}
+-
+-static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
+- struct file *file, const struct iovec *iov,
+- unsigned long nr_segs)
+-{
+- struct socket *sock = file->private_data;
+- size_t size = 0;
+- int i;
+-
+- for (i = 0; i < nr_segs; i++)
+- size += iov[i].iov_len;
+-
+- msg->msg_name = NULL;
+- msg->msg_namelen = 0;
+- msg->msg_control = NULL;
+- msg->msg_controllen = 0;
+- msg->msg_iov = (struct iovec *)iov;
+- msg->msg_iovlen = nr_segs;
+- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+-
+- return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+-}
+-
+-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+- unsigned long nr_segs, loff_t pos)
+-{
+- struct sock_iocb siocb, *x;
+-
+- if (pos != 0)
+- return -ESPIPE;
+-
+- if (iocb->ki_left == 0) /* Match SYS5 behaviour */
+- return 0;
+-
+-
+- x = alloc_sock_iocb(iocb, &siocb);
+- if (!x)
+- return -ENOMEM;
+- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+-}
+-
+-static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
+- struct file *file, const struct iovec *iov,
+- unsigned long nr_segs)
+-{
+- struct socket *sock = file->private_data;
+- size_t size = 0;
+- int i;
+-
+- for (i = 0; i < nr_segs; i++)
+- size += iov[i].iov_len;
+-
+- msg->msg_name = NULL;
+- msg->msg_namelen = 0;
+- msg->msg_control = NULL;
+- msg->msg_controllen = 0;
+- msg->msg_iov = (struct iovec *)iov;
+- msg->msg_iovlen = nr_segs;
+- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+- if (sock->type == SOCK_SEQPACKET)
+- msg->msg_flags |= MSG_EOR;
+-
+- return __sock_sendmsg(iocb, sock, msg, size);
+-}
+-
+-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+- unsigned long nr_segs, loff_t pos)
+-{
+- struct sock_iocb siocb, *x;
+-
+- if (pos != 0)
+- return -ESPIPE;
+-
+- x = alloc_sock_iocb(iocb, &siocb);
+- if (!x)
+- return -ENOMEM;
+-
+- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+-}
+-
+-/*
+- * Atomic setting of ioctl hooks to avoid race
+- * with module unload.
+- */
+-
+-static DEFINE_MUTEX(br_ioctl_mutex);
+-static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
+-
+-void brioctl_set(int (*hook) (unsigned int, void __user *))
+-{
+- mutex_lock(&br_ioctl_mutex);
+- br_ioctl_hook = hook;
+- mutex_unlock(&br_ioctl_mutex);
+-}
+-
+-EXPORT_SYMBOL(brioctl_set);
+-
+-static DEFINE_MUTEX(vlan_ioctl_mutex);
+-static int (*vlan_ioctl_hook) (void __user *arg);
+-
+-void vlan_ioctl_set(int (*hook) (void __user *))
+-{
+- mutex_lock(&vlan_ioctl_mutex);
+- vlan_ioctl_hook = hook;
+- mutex_unlock(&vlan_ioctl_mutex);
+-}
+-
+-EXPORT_SYMBOL(vlan_ioctl_set);
+-
+-static DEFINE_MUTEX(dlci_ioctl_mutex);
+-static int (*dlci_ioctl_hook) (unsigned int, void __user *);
+-
+-void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
+-{
+- mutex_lock(&dlci_ioctl_mutex);
+- dlci_ioctl_hook = hook;
+- mutex_unlock(&dlci_ioctl_mutex);
+-}
+-
+-EXPORT_SYMBOL(dlci_ioctl_set);
+-
+-/*
+- * With an ioctl, arg may well be a user mode pointer, but we don't know
+- * what to do with it - that's up to the protocol still.
+- */
+-
+-static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+-{
+- struct socket *sock;
+- void __user *argp = (void __user *)arg;
+- int pid, err;
+-
+- sock = file->private_data;
+- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+- err = dev_ioctl(cmd, argp);
+- } else
+-#ifdef CONFIG_WIRELESS_EXT
+- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+- err = dev_ioctl(cmd, argp);
+- } else
+-#endif /* CONFIG_WIRELESS_EXT */
+- switch (cmd) {
+- case FIOSETOWN:
+- case SIOCSPGRP:
+- err = -EFAULT;
+- if (get_user(pid, (int __user *)argp))
+- break;
+- err = f_setown(sock->file, pid, 1);
+- break;
+- case FIOGETOWN:
+- case SIOCGPGRP:
+- err = put_user(f_getown(sock->file),
+- (int __user *)argp);
+- break;
+- case SIOCGIFBR:
+- case SIOCSIFBR:
+- case SIOCBRADDBR:
+- case SIOCBRDELBR:
+- err = -ENOPKG;
+- if (!br_ioctl_hook)
+- request_module("bridge");
+-
+- mutex_lock(&br_ioctl_mutex);
+- if (br_ioctl_hook)
+- err = br_ioctl_hook(cmd, argp);
+- mutex_unlock(&br_ioctl_mutex);
+- break;
+- case SIOCGIFVLAN:
+- case SIOCSIFVLAN:
+- err = -ENOPKG;
+- if (!vlan_ioctl_hook)
+- request_module("8021q");
+-
+- mutex_lock(&vlan_ioctl_mutex);
+- if (vlan_ioctl_hook)
+- err = vlan_ioctl_hook(argp);
+- mutex_unlock(&vlan_ioctl_mutex);
+- break;
+- case SIOCADDDLCI:
+- case SIOCDELDLCI:
+- err = -ENOPKG;
+- if (!dlci_ioctl_hook)
+- request_module("dlci");
+-
+- if (dlci_ioctl_hook) {
+- mutex_lock(&dlci_ioctl_mutex);
+- err = dlci_ioctl_hook(cmd, argp);
+- mutex_unlock(&dlci_ioctl_mutex);
+- }
+- break;
+- default:
+- err = sock->ops->ioctl(sock, cmd, arg);
+-
+- /*
+- * If this ioctl is unknown try to hand it down
+- * to the NIC driver.
+- */
+- if (err == -ENOIOCTLCMD)
+- err = dev_ioctl(cmd, argp);
+- break;
+- }
+- return err;
+-}
+-
+-int sock_create_lite(int family, int type, int protocol, struct socket **res)
+-{
+- int err;
+- struct socket *sock = NULL;
+-
+- err = security_socket_create(family, type, protocol, 1);
+- if (err)
+- goto out;
+-
+- sock = sock_alloc();
+- if (!sock) {
+- err = -ENOMEM;
+- goto out;
+- }
+-
+- sock->type = type;
+- err = security_socket_post_create(sock, family, type, protocol, 1);
+- if (err)
+- goto out_release;
+-
+-out:
+- *res = sock;
+- return err;
+-out_release:
+- sock_release(sock);
+- sock = NULL;
+- goto out;
+-}
+-
+-/* No kernel lock held - perfect */
+-static unsigned int sock_poll(struct file *file, poll_table *wait)
+-{
+- struct socket *sock;
+-
+- /*
+- * We can't return errors to poll, so it's either yes or no.
+- */
+- sock = file->private_data;
+- return sock->ops->poll(file, sock, wait);
+-}
+-
+-static int sock_mmap(struct file *file, struct vm_area_struct *vma)
+-{
+- struct socket *sock = file->private_data;
+-
+- return sock->ops->mmap(file, sock, vma);
+-}
+-
+-static int sock_close(struct inode *inode, struct file *filp)
+-{
+- /*
+- * It was possible the inode is NULL we were
+- * closing an unfinished socket.
+- */
+-
+- if (!inode) {
+- printk(KERN_DEBUG "sock_close: NULL inode\n");
+- return 0;
+- }
+- sock_fasync(-1, filp, 0);
+- sock_release(SOCKET_I(inode));
+- return 0;
+-}
+-
+-/*
+- * Update the socket async list
+- *
+- * Fasync_list locking strategy.
+- *
+- * 1. fasync_list is modified only under process context socket lock
+- * i.e. under semaphore.
+- * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
+- * or under socket lock.
+- * 3. fasync_list can be used from softirq context, so that
+- * modification under socket lock have to be enhanced with
+- * write_lock_bh(&sk->sk_callback_lock).
+- * --ANK (990710)
+- */
+-
+-static int sock_fasync(int fd, struct file *filp, int on)
+-{
+- struct fasync_struct *fa, *fna = NULL, **prev;
+- struct socket *sock;
+- struct sock *sk;
+-
+- if (on) {
+- fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
+- if (fna == NULL)
+- return -ENOMEM;
+- }
+-
+- sock = filp->private_data;
+-
+- sk = sock->sk;
+- if (sk == NULL) {
+- kfree(fna);
+- return -EINVAL;
+- }
+-
+- lock_sock(sk);
+-
+- prev = &(sock->fasync_list);
+-
+- for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+- if (fa->fa_file == filp)
+- break;
+-
+- if (on) {
+- if (fa != NULL) {
+- write_lock_bh(&sk->sk_callback_lock);
+- fa->fa_fd = fd;
+- write_unlock_bh(&sk->sk_callback_lock);
+-
+- kfree(fna);
+- goto out;
+- }
+- fna->fa_file = filp;
+- fna->fa_fd = fd;
+- fna->magic = FASYNC_MAGIC;
+- fna->fa_next = sock->fasync_list;
+- write_lock_bh(&sk->sk_callback_lock);
+- sock->fasync_list = fna;
+- write_unlock_bh(&sk->sk_callback_lock);
+- } else {
+- if (fa != NULL) {
+- write_lock_bh(&sk->sk_callback_lock);
+- *prev = fa->fa_next;
+- write_unlock_bh(&sk->sk_callback_lock);
+- kfree(fa);
+- }
+- }
+-
+-out:
+- release_sock(sock->sk);
+- return 0;
+-}
+-
+-/* This function may be called only under socket lock or callback_lock */
+-
+-int sock_wake_async(struct socket *sock, int how, int band)
+-{
+- if (!sock || !sock->fasync_list)
+- return -1;
+- switch (how) {
+- case 1:
+-
+- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+- break;
+- goto call_kill;
+- case 2:
+- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+- break;
+- /* fall through */
+- case 0:
+-call_kill:
+- __kill_fasync(sock->fasync_list, SIGIO, band);
+- break;
+- case 3:
+- __kill_fasync(sock->fasync_list, SIGURG, band);
+- }
+- return 0;
+-}
+-
+-static int __sock_create(int family, int type, int protocol,
+- struct socket **res, int kern)
+-{
+- int err;
+- struct socket *sock;
+- const struct net_proto_family *pf;
+-
+- /*
+- * Check protocol is in range
+- */
+- if (family < 0 || family >= NPROTO)
+- return -EAFNOSUPPORT;
+- if (type < 0 || type >= SOCK_MAX)
+- return -EINVAL;
+-
+- /* Compatibility.
+-
+- This uglymoron is moved from INET layer to here to avoid
+- deadlock in module load.
+- */
+- if (family == PF_INET && type == SOCK_PACKET) {
+- static int warned;
+- if (!warned) {
+- warned = 1;
+- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+- current->comm);
+- }
+- family = PF_PACKET;
+- }
+-
+- err = security_socket_create(family, type, protocol, kern);
+- if (err)
+- return err;
+-
+- /*
+- * Allocate the socket and allow the family to set things up. if
+- * the protocol is 0, the family is instructed to select an appropriate
+- * default.
+- */
+- sock = sock_alloc();
+- if (!sock) {
+- if (net_ratelimit())
+- printk(KERN_WARNING "socket: no more sockets\n");
+- return -ENFILE; /* Not exactly a match, but its the
+- closest posix thing */
+- }
+-
+- sock->type = type;
+-
+-#if defined(CONFIG_KMOD)
+- /* Attempt to load a protocol module if the find failed.
+- *
+- * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
+- * requested real, full-featured networking support upon configuration.
+- * Otherwise module support will break!
+- */
+- if (net_families[family] == NULL)
+- request_module("net-pf-%d", family);
+-#endif
+-
+- rcu_read_lock();
+- pf = rcu_dereference(net_families[family]);
+- err = -EAFNOSUPPORT;
+- if (!pf)
+- goto out_release;
+-
+- /*
+- * We will call the ->create function, that possibly is in a loadable
+- * module, so we have to bump that loadable module refcnt first.
+- */
+- if (!try_module_get(pf->owner))
+- goto out_release;
+-
+- /* Now protected by module ref count */
+- rcu_read_unlock();
+-
+- err = pf->create(sock, protocol);
+- if (err < 0)
+- goto out_module_put;
+-
+- /*
+- * Now to bump the refcnt of the [loadable] module that owns this
+- * socket at sock_release time we decrement its refcnt.
+- */
+- if (!try_module_get(sock->ops->owner))
+- goto out_module_busy;
+-
+- /*
+- * Now that we're done with the ->create function, the [loadable]
+- * module can have its refcnt decremented
+- */
+- module_put(pf->owner);
+- err = security_socket_post_create(sock, family, type, protocol, kern);
+- if (err)
+- goto out_sock_release;
+- *res = sock;
+-
+- return 0;
+-
+-out_module_busy:
+- err = -EAFNOSUPPORT;
+-out_module_put:
+- sock->ops = NULL;
+- module_put(pf->owner);
+-out_sock_release:
+- sock_release(sock);
+- return err;
+-
+-out_release:
+- rcu_read_unlock();
+- goto out_sock_release;
+-}
+-
+-int sock_create(int family, int type, int protocol, struct socket **res)
+-{
+- return __sock_create(family, type, protocol, res, 0);
+-}
+-
+-int sock_create_kern(int family, int type, int protocol, struct socket **res)
+-{
+- return __sock_create(family, type, protocol, res, 1);
+-}
+-
+-asmlinkage long sys_socket(int family, int type, int protocol)
+-{
+- int retval;
+- struct socket *sock;
+-
+- retval = sock_create(family, type, protocol, &sock);
+- if (retval < 0)
+- goto out;
+-
+- retval = sock_map_fd(sock);
+- if (retval < 0)
+- goto out_release;
+-
+-out:
+- /* It may be already another descriptor 8) Not kernel problem. */
+- return retval;
+-
+-out_release:
+- sock_release(sock);
+- return retval;
+-}
+-
+-/*
+- * Create a pair of connected sockets.
+- */
+-
+-asmlinkage long sys_socketpair(int family, int type, int protocol,
+- int __user *usockvec)
+-{
+- struct socket *sock1, *sock2;
+- int fd1, fd2, err;
+- struct file *newfile1, *newfile2;
+-
+- /*
+- * Obtain the first socket and check if the underlying protocol
+- * supports the socketpair call.
+- */
+-
+- err = sock_create(family, type, protocol, &sock1);
+- if (err < 0)
+- goto out;
+-
+- err = sock_create(family, type, protocol, &sock2);
+- if (err < 0)
+- goto out_release_1;
+-
+- err = sock1->ops->socketpair(sock1, sock2);
+- if (err < 0)
+- goto out_release_both;
+-
+- fd1 = sock_alloc_fd(&newfile1);
+- if (unlikely(fd1 < 0)) {
+- err = fd1;
+- goto out_release_both;
+- }
+-
+- fd2 = sock_alloc_fd(&newfile2);
+- if (unlikely(fd2 < 0)) {
+- err = fd2;
+- put_filp(newfile1);
+- put_unused_fd(fd1);
+- goto out_release_both;
+- }
+-
+- err = sock_attach_fd(sock1, newfile1);
+- if (unlikely(err < 0)) {
+- goto out_fd2;
+- }
+-
+- err = sock_attach_fd(sock2, newfile2);
+- if (unlikely(err < 0)) {
+- fput(newfile1);
+- goto out_fd1;
+- }
+-
+- err = audit_fd_pair(fd1, fd2);
+- if (err < 0) {
+- fput(newfile1);
+- fput(newfile2);
+- goto out_fd;
+- }
+-
+- fd_install(fd1, newfile1);
+- fd_install(fd2, newfile2);
+- /* fd1 and fd2 may be already another descriptors.
+- * Not kernel problem.
+- */
+-
+- err = put_user(fd1, &usockvec[0]);
+- if (!err)
+- err = put_user(fd2, &usockvec[1]);
+- if (!err)
+- return 0;
+-
+- sys_close(fd2);
+- sys_close(fd1);
+- return err;
+-
+-out_release_both:
+- sock_release(sock2);
+-out_release_1:
+- sock_release(sock1);
+-out:
+- return err;
+-
+-out_fd2:
+- put_filp(newfile1);
+- sock_release(sock1);
+-out_fd1:
+- put_filp(newfile2);
+- sock_release(sock2);
+-out_fd:
+- put_unused_fd(fd1);
+- put_unused_fd(fd2);
+- goto out;
+-}
+-
+-/*
+- * Bind a name to a socket. Nothing much to do here since it's
+- * the protocol's responsibility to handle the local address.
+- *
+- * We move the socket address to kernel space before we call
+- * the protocol layer (having also checked the address is ok).
+- */
+-
+-asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
+-{
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- int err, fput_needed;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock) {
+- err = move_addr_to_kernel(umyaddr, addrlen, address);
+- if (err >= 0) {
+- err = security_socket_bind(sock,
+- (struct sockaddr *)address,
+- addrlen);
+- if (!err)
+- err = sock->ops->bind(sock,
+- (struct sockaddr *)
+- address, addrlen);
+- }
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/*
+- * Perform a listen. Basically, we allow the protocol to do anything
+- * necessary for a listen, and if that works, we mark the socket as
+- * ready for listening.
+- */
+-
+-int sysctl_somaxconn __read_mostly = SOMAXCONN;
+-
+-asmlinkage long sys_listen(int fd, int backlog)
+-{
+- struct socket *sock;
+- int err, fput_needed;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock) {
+- if ((unsigned)backlog > sysctl_somaxconn)
+- backlog = sysctl_somaxconn;
+-
+- err = security_socket_listen(sock, backlog);
+- if (!err)
+- err = sock->ops->listen(sock, backlog);
+-
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/*
+- * For accept, we attempt to create a new socket, set up the link
+- * with the client, wake up the client, then return the new
+- * connected fd. We collect the address of the connector in kernel
+- * space and move it to user at the very end. This is unclean because
+- * we open the socket then return an error.
+- *
+- * 1003.1g adds the ability to recvmsg() to query connection pending
+- * status to recvmsg. We need to add that support in a way thats
+- * clean when we restucture accept also.
+- */
+-
+-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+- int __user *upeer_addrlen)
+-{
+- struct socket *sock, *newsock;
+- struct file *newfile;
+- int err, len, newfd, fput_needed;
+- char address[MAX_SOCK_ADDR];
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (!sock)
+- goto out;
+-
+- err = -ENFILE;
+- if (!(newsock = sock_alloc()))
+- goto out_put;
+-
+- newsock->type = sock->type;
+- newsock->ops = sock->ops;
+-
+- /*
+- * We don't need try_module_get here, as the listening socket (sock)
+- * has the protocol module (sock->ops->owner) held.
+- */
+- __module_get(newsock->ops->owner);
+-
+- newfd = sock_alloc_fd(&newfile);
+- if (unlikely(newfd < 0)) {
+- err = newfd;
+- sock_release(newsock);
+- goto out_put;
+- }
+-
+- err = sock_attach_fd(newsock, newfile);
+- if (err < 0)
+- goto out_fd_simple;
+-
+- err = security_socket_accept(sock, newsock);
+- if (err)
+- goto out_fd;
+-
+- err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+- if (err < 0)
+- goto out_fd;
+-
+- if (upeer_sockaddr) {
+- if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+- &len, 2) < 0) {
+- err = -ECONNABORTED;
+- goto out_fd;
+- }
+- err = move_addr_to_user(address, len, upeer_sockaddr,
+- upeer_addrlen);
+- if (err < 0)
+- goto out_fd;
+- }
+-
+- /* File flags are not inherited via accept() unlike another OSes. */
+-
+- fd_install(newfd, newfile);
+- err = newfd;
+-
+- security_socket_post_accept(sock, newsock);
+-
+-out_put:
+- fput_light(sock->file, fput_needed);
+-out:
+- return err;
+-out_fd_simple:
+- sock_release(newsock);
+- put_filp(newfile);
+- put_unused_fd(newfd);
+- goto out_put;
+-out_fd:
+- fput(newfile);
+- put_unused_fd(newfd);
+- goto out_put;
+-}
+-
+-/*
+- * Attempt to connect to a socket with the server address. The address
+- * is in user space so we verify it is OK and move it to kernel space.
+- *
+- * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
+- * break bindings
+- *
+- * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
+- * other SEQPACKET protocols that take time to connect() as it doesn't
+- * include the -EINPROGRESS status for such sockets.
+- */
+-
+-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+- int addrlen)
+-{
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- int err, fput_needed;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (!sock)
+- goto out;
+- err = move_addr_to_kernel(uservaddr, addrlen, address);
+- if (err < 0)
+- goto out_put;
+-
+- err =
+- security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+- if (err)
+- goto out_put;
+-
+- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
+- sock->file->f_flags);
+-out_put:
+- fput_light(sock->file, fput_needed);
+-out:
+- return err;
+-}
+-
+-/*
+- * Get the local address ('name') of a socket object. Move the obtained
+- * name to user space.
+- */
+-
+-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+- int __user *usockaddr_len)
+-{
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- int len, err, fput_needed;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (!sock)
+- goto out;
+-
+- err = security_socket_getsockname(sock);
+- if (err)
+- goto out_put;
+-
+- err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+- if (err)
+- goto out_put;
+- err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+-
+-out_put:
+- fput_light(sock->file, fput_needed);
+-out:
+- return err;
+-}
+-
+-/*
+- * Get the remote address ('name') of a socket object. Move the obtained
+- * name to user space.
+- */
+-
+-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+- int __user *usockaddr_len)
+-{
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- int len, err, fput_needed;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock != NULL) {
+- err = security_socket_getpeername(sock);
+- if (err) {
+- fput_light(sock->file, fput_needed);
+- return err;
+- }
+-
+- err =
+- sock->ops->getname(sock, (struct sockaddr *)address, &len,
+- 1);
+- if (!err)
+- err = move_addr_to_user(address, len, usockaddr,
+- usockaddr_len);
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/*
+- * Send a datagram to a given address. We move the address into kernel
+- * space and check the user space data area is readable before invoking
+- * the protocol.
+- */
+-
+-asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+- unsigned flags, struct sockaddr __user *addr,
+- int addr_len)
+-{
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- int err;
+- struct msghdr msg;
+- struct iovec iov;
+- int fput_needed;
+- struct file *sock_file;
+-
+- sock_file = fget_light(fd, &fput_needed);
+- err = -EBADF;
+- if (!sock_file)
+- goto out;
+-
+- sock = sock_from_file(sock_file, &err);
+- if (!sock)
+- goto out_put;
+- iov.iov_base = buff;
+- iov.iov_len = len;
+- msg.msg_name = NULL;
+- msg.msg_iov = &iov;
+- msg.msg_iovlen = 1;
+- msg.msg_control = NULL;
+- msg.msg_controllen = 0;
+- msg.msg_namelen = 0;
+- if (addr) {
+- err = move_addr_to_kernel(addr, addr_len, address);
+- if (err < 0)
+- goto out_put;
+- msg.msg_name = address;
+- msg.msg_namelen = addr_len;
+- }
+- if (sock->file->f_flags & O_NONBLOCK)
+- flags |= MSG_DONTWAIT;
+- msg.msg_flags = flags;
+- err = sock_sendmsg(sock, &msg, len);
+-
+-out_put:
+- fput_light(sock_file, fput_needed);
+-out:
+- return err;
+-}
+-
+-/*
+- * Send a datagram down a socket.
+- */
+-
+-asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
+-{
+- return sys_sendto(fd, buff, len, flags, NULL, 0);
+-}
+-
+-/*
+- * Receive a frame from the socket and optionally record the address of the
+- * sender. We verify the buffers are writable and if needed move the
+- * sender address from kernel to user space.
+- */
+-
+-asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+- unsigned flags, struct sockaddr __user *addr,
+- int __user *addr_len)
+-{
+- struct socket *sock;
+- struct iovec iov;
+- struct msghdr msg;
+- char address[MAX_SOCK_ADDR];
+- int err, err2;
+- struct file *sock_file;
+- int fput_needed;
+-
+- sock_file = fget_light(fd, &fput_needed);
+- err = -EBADF;
+- if (!sock_file)
+- goto out;
+-
+- sock = sock_from_file(sock_file, &err);
+- if (!sock)
+- goto out_put;
+-
+- msg.msg_control = NULL;
+- msg.msg_controllen = 0;
+- msg.msg_iovlen = 1;
+- msg.msg_iov = &iov;
+- iov.iov_len = size;
+- iov.iov_base = ubuf;
+- msg.msg_name = address;
+- msg.msg_namelen = MAX_SOCK_ADDR;
+- if (sock->file->f_flags & O_NONBLOCK)
+- flags |= MSG_DONTWAIT;
+- err = sock_recvmsg(sock, &msg, size, flags);
+-
+- if (err >= 0 && addr != NULL) {
+- err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+- if (err2 < 0)
+- err = err2;
+- }
+-out_put:
+- fput_light(sock_file, fput_needed);
+-out:
+- return err;
+-}
+-
+-/*
+- * Receive a datagram from a socket.
+- */
+-
+-asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+- unsigned flags)
+-{
+- return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
+-}
+-
+-/*
+- * Set a socket option. Because we don't know the option lengths we have
+- * to pass the user mode parameter for the protocols to sort out.
+- */
+-
+-asmlinkage long sys_setsockopt(int fd, int level, int optname,
+- char __user *optval, int optlen)
+-{
+- int err, fput_needed;
+- struct socket *sock;
+-
+- if (optlen < 0)
+- return -EINVAL;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock != NULL) {
+- err = security_socket_setsockopt(sock, level, optname);
+- if (err)
+- goto out_put;
+-
+- if (level == SOL_SOCKET)
+- err =
+- sock_setsockopt(sock, level, optname, optval,
+- optlen);
+- else
+- err =
+- sock->ops->setsockopt(sock, level, optname, optval,
+- optlen);
+-out_put:
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/*
+- * Get a socket option. Because we don't know the option lengths we have
+- * to pass a user mode parameter for the protocols to sort out.
+- */
+-
+-asmlinkage long sys_getsockopt(int fd, int level, int optname,
+- char __user *optval, int __user *optlen)
+-{
+- int err, fput_needed;
+- struct socket *sock;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock != NULL) {
+- err = security_socket_getsockopt(sock, level, optname);
+- if (err)
+- goto out_put;
+-
+- if (level == SOL_SOCKET)
+- err =
+- sock_getsockopt(sock, level, optname, optval,
+- optlen);
+- else
+- err =
+- sock->ops->getsockopt(sock, level, optname, optval,
+- optlen);
+-out_put:
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/*
+- * Shutdown a socket.
+- */
+-
+-asmlinkage long sys_shutdown(int fd, int how)
+-{
+- int err, fput_needed;
+- struct socket *sock;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (sock != NULL) {
+- err = security_socket_shutdown(sock, how);
+- if (!err)
+- err = sock->ops->shutdown(sock, how);
+- fput_light(sock->file, fput_needed);
+- }
+- return err;
+-}
+-
+-/* A couple of helpful macros for getting the address of the 32/64 bit
+- * fields which are the same type (int / unsigned) on our platforms.
+- */
+-#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
+-#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
+-#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
+-
+-/*
+- * BSD sendmsg interface
+- */
+-
+-asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+-{
+- struct compat_msghdr __user *msg_compat =
+- (struct compat_msghdr __user *)msg;
+- struct socket *sock;
+- char address[MAX_SOCK_ADDR];
+- struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+- unsigned char ctl[sizeof(struct cmsghdr) + 20]
+- __attribute__ ((aligned(sizeof(__kernel_size_t))));
+- /* 20 is size of ipv6_pktinfo */
+- unsigned char *ctl_buf = ctl;
+- struct msghdr msg_sys;
+- int err, ctl_len, iov_size, total_len;
+- int fput_needed;
+-
+- err = -EFAULT;
+- if (MSG_CMSG_COMPAT & flags) {
+- if (get_compat_msghdr(&msg_sys, msg_compat))
+- return -EFAULT;
+- }
+- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+- return -EFAULT;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (!sock)
+- goto out;
+-
+- /* do not move before msg_sys is valid */
+- err = -EMSGSIZE;
+- if (msg_sys.msg_iovlen > UIO_MAXIOV)
+- goto out_put;
+-
+- /* Check whether to allocate the iovec area */
+- err = -ENOMEM;
+- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+- if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+- if (!iov)
+- goto out_put;
+- }
+-
+- /* This will also move the address data into kernel space */
+- if (MSG_CMSG_COMPAT & flags) {
+- err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
+- } else
+- err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
+- if (err < 0)
+- goto out_freeiov;
+- total_len = err;
+-
+- err = -ENOBUFS;
+-
+- if (msg_sys.msg_controllen > INT_MAX)
+- goto out_freeiov;
+- ctl_len = msg_sys.msg_controllen;
+- if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
+- err =
+- cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+- sizeof(ctl));
+- if (err)
+- goto out_freeiov;
+- ctl_buf = msg_sys.msg_control;
+- ctl_len = msg_sys.msg_controllen;
+- } else if (ctl_len) {
+- if (ctl_len > sizeof(ctl)) {
+- ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
+- if (ctl_buf == NULL)
+- goto out_freeiov;
+- }
+- err = -EFAULT;
+- /*
+- * Careful! Before this, msg_sys.msg_control contains a user pointer.
+- * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
+- * checking falls down on this.
+- */
+- if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+- ctl_len))
+- goto out_freectl;
+- msg_sys.msg_control = ctl_buf;
+- }
+- msg_sys.msg_flags = flags;
+-
+- if (sock->file->f_flags & O_NONBLOCK)
+- msg_sys.msg_flags |= MSG_DONTWAIT;
+- err = sock_sendmsg(sock, &msg_sys, total_len);
+-
+-out_freectl:
+- if (ctl_buf != ctl)
+- sock_kfree_s(sock->sk, ctl_buf, ctl_len);
+-out_freeiov:
+- if (iov != iovstack)
+- sock_kfree_s(sock->sk, iov, iov_size);
+-out_put:
+- fput_light(sock->file, fput_needed);
+-out:
+- return err;
+-}
+-
+-/*
+- * BSD recvmsg interface
+- */
+-
+-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+- unsigned int flags)
+-{
+- struct compat_msghdr __user *msg_compat =
+- (struct compat_msghdr __user *)msg;
+- struct socket *sock;
+- struct iovec iovstack[UIO_FASTIOV];
+- struct iovec *iov = iovstack;
+- struct msghdr msg_sys;
+- unsigned long cmsg_ptr;
+- int err, iov_size, total_len, len;
+- int fput_needed;
+-
+- /* kernel mode address */
+- char addr[MAX_SOCK_ADDR];
+-
+- /* user mode address pointers */
+- struct sockaddr __user *uaddr;
+- int __user *uaddr_len;
+-
+- if (MSG_CMSG_COMPAT & flags) {
+- if (get_compat_msghdr(&msg_sys, msg_compat))
+- return -EFAULT;
+- }
+- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+- return -EFAULT;
+-
+- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+- if (!sock)
+- goto out;
+-
+- err = -EMSGSIZE;
+- if (msg_sys.msg_iovlen > UIO_MAXIOV)
+- goto out_put;
+-
+- /* Check whether to allocate the iovec area */
+- err = -ENOMEM;
+- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+- if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+- if (!iov)
+- goto out_put;
+- }
+-
+- /*
+- * Save the user-mode address (verify_iovec will change the
+- * kernel msghdr to use the kernel address space)
+- */
+-
+- uaddr = (void __user *)msg_sys.msg_name;
+- uaddr_len = COMPAT_NAMELEN(msg);
+- if (MSG_CMSG_COMPAT & flags) {
+- err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+- } else
+- err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+- if (err < 0)
+- goto out_freeiov;
+- total_len = err;
+-
+- cmsg_ptr = (unsigned long)msg_sys.msg_control;
+- msg_sys.msg_flags = 0;
+- if (MSG_CMSG_COMPAT & flags)
+- msg_sys.msg_flags = MSG_CMSG_COMPAT;
+-
+- if (sock->file->f_flags & O_NONBLOCK)
+- flags |= MSG_DONTWAIT;
+- err = sock_recvmsg(sock, &msg_sys, total_len, flags);
+- if (err < 0)
+- goto out_freeiov;
+- len = err;
+-
+- if (uaddr != NULL) {
+- err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+- uaddr_len);
+- if (err < 0)
+- goto out_freeiov;
+- }
+- err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
+- COMPAT_FLAGS(msg));
+- if (err)
+- goto out_freeiov;
+- if (MSG_CMSG_COMPAT & flags)
+- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+- &msg_compat->msg_controllen);
+- else
+- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+- &msg->msg_controllen);
+- if (err)
+- goto out_freeiov;
+- err = len;
+-
+-out_freeiov:
+- if (iov != iovstack)
+- sock_kfree_s(sock->sk, iov, iov_size);
+-out_put:
+- fput_light(sock->file, fput_needed);
+-out:
+- return err;
+-}
+-
+-#ifdef __ARCH_WANT_SYS_SOCKETCALL
+-
+-/* Argument list sizes for sys_socketcall */
+-#define AL(x) ((x) * sizeof(unsigned long))
+-static const unsigned char nargs[18]={
+- AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+-};
+-
+-#undef AL
+-
+-/*
+- * System call vectors.
+- *
+- * Argument checking cleaned up. Saved 20% in size.
+- * This function doesn't need to set the kernel lock because
+- * it is set by the callees.
+- */
+-
+-asmlinkage long sys_socketcall(int call, unsigned long __user *args)
+-{
+- unsigned long a[6];
+- unsigned long a0, a1;
+- int err;
+-
+- if (call < 1 || call > SYS_RECVMSG)
+- return -EINVAL;
+-
+- /* copy_from_user should be SMP safe. */
+- if (copy_from_user(a, args, nargs[call]))
+- return -EFAULT;
+-
+- err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
+- if (err)
+- return err;
+-
+- a0 = a[0];
+- a1 = a[1];
+-
+- switch (call) {
+- case SYS_SOCKET:
+- err = sys_socket(a0, a1, a[2]);
+- break;
+- case SYS_BIND:
+- err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+- break;
+- case SYS_CONNECT:
+- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+- break;
+- case SYS_LISTEN:
+- err = sys_listen(a0, a1);
+- break;
+- case SYS_ACCEPT:
+- err =
+- sys_accept(a0, (struct sockaddr __user *)a1,
+- (int __user *)a[2]);
+- break;
+- case SYS_GETSOCKNAME:
+- err =
+- sys_getsockname(a0, (struct sockaddr __user *)a1,
+- (int __user *)a[2]);
+- break;
+- case SYS_GETPEERNAME:
+- err =
+- sys_getpeername(a0, (struct sockaddr __user *)a1,
+- (int __user *)a[2]);
+- break;
+- case SYS_SOCKETPAIR:
+- err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+- break;
+- case SYS_SEND:
+- err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+- break;
+- case SYS_SENDTO:
+- err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+- (struct sockaddr __user *)a[4], a[5]);
+- break;
+- case SYS_RECV:
+- err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+- break;
+- case SYS_RECVFROM:
+- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+- (struct sockaddr __user *)a[4],
+- (int __user *)a[5]);
+- break;
+- case SYS_SHUTDOWN:
+- err = sys_shutdown(a0, a1);
+- break;
+- case SYS_SETSOCKOPT:
+- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+- break;
+- case SYS_GETSOCKOPT:
+- err =
+- sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+- (int __user *)a[4]);
+- break;
+- case SYS_SENDMSG:
+- err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+- break;
+- case SYS_RECVMSG:
+- err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+- break;
+- default:
+- err = -EINVAL;
+- break;
+- }
+- return err;
+-}
+-
+-#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+-
+-/**
+- * sock_register - add a socket protocol handler
+- * @ops: description of protocol
+- *
+- * This function is called by a protocol handler that wants to
+- * advertise its address family, and have it linked into the
+- * socket interface. The value ops->family coresponds to the
+- * socket system call protocol family.
+- */
+-int sock_register(const struct net_proto_family *ops)
+-{
+- int err;
+-
+- if (ops->family >= NPROTO) {
+- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+- NPROTO);
+- return -ENOBUFS;
+- }
+-
+- spin_lock(&net_family_lock);
+- if (net_families[ops->family])
+- err = -EEXIST;
+- else {
+- net_families[ops->family] = ops;
+- err = 0;
+- }
+- spin_unlock(&net_family_lock);
+-
+- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
+- return err;
+-}
+-
+-/**
+- * sock_unregister - remove a protocol handler
+- * @family: protocol family to remove
+- *
+- * This function is called by a protocol handler that wants to
+- * remove its address family, and have it unlinked from the
+- * new socket creation.
+- *
+- * If protocol handler is a module, then it can use module reference
+- * counts to protect against new references. If protocol handler is not
+- * a module then it needs to provide its own protection in
+- * the ops->create routine.
+- */
+-void sock_unregister(int family)
+-{
+- BUG_ON(family < 0 || family >= NPROTO);
+-
+- spin_lock(&net_family_lock);
+- net_families[family] = NULL;
+- spin_unlock(&net_family_lock);
+-
+- synchronize_rcu();
+-
+- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+-}
+-
+-static int __init sock_init(void)
+-{
+- /*
+- * Initialize sock SLAB cache.
+- */
+-
+- sk_init();
+-
+- /*
+- * Initialize skbuff SLAB cache
+- */
+- skb_init();
+-
+- /*
+- * Initialize the protocols module.
+- */
+-
+- init_inodecache();
+- register_filesystem(&sock_fs_type);
+- sock_mnt = kern_mount(&sock_fs_type);
+-
+- /* The real protocol initialization is performed in later initcalls.
+- */
+-
+-#ifdef CONFIG_NETFILTER
+- netfilter_init();
+-#endif
+-
+- return 0;
+-}
+-
+-core_initcall(sock_init); /* early initcall */
+-
+-#ifdef CONFIG_PROC_FS
+-void socket_seq_show(struct seq_file *seq)
+-{
+- int cpu;
+- int counter = 0;
+-
+- for_each_possible_cpu(cpu)
+- counter += per_cpu(sockets_in_use, cpu);
+-
+- /* It can be negative, by the way. 8) */
+- if (counter < 0)
+- counter = 0;
+-
+- seq_printf(seq, "sockets: used %d\n", counter);
+-}
+-#endif /* CONFIG_PROC_FS */
+-
+-#ifdef CONFIG_COMPAT
+-static long compat_sock_ioctl(struct file *file, unsigned cmd,
+- unsigned long arg)
+-{
+- struct socket *sock = file->private_data;
+- int ret = -ENOIOCTLCMD;
+-
+- if (sock->ops->compat_ioctl)
+- ret = sock->ops->compat_ioctl(sock, cmd, arg);
+-
+- return ret;
+-}
+-#endif
+-
+-int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+-{
+- return sock->ops->bind(sock, addr, addrlen);
+-}
+-
+-int kernel_listen(struct socket *sock, int backlog)
+-{
+- return sock->ops->listen(sock, backlog);
+-}
+-
+-int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+-{
+- struct sock *sk = sock->sk;
+- int err;
+-
+- err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+- newsock);
+- if (err < 0)
+- goto done;
+-
+- err = sock->ops->accept(sock, *newsock, flags);
+- if (err < 0) {
+- sock_release(*newsock);
+- goto done;
+- }
+-
+- (*newsock)->ops = sock->ops;
+-
+-done:
+- return err;
+-}
+-
+-int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+- int flags)
+-{
+- return sock->ops->connect(sock, addr, addrlen, flags);
+-}
+-
+-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+- int *addrlen)
+-{
+- return sock->ops->getname(sock, addr, addrlen, 0);
+-}
+-
+-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+- int *addrlen)
+-{
+- return sock->ops->getname(sock, addr, addrlen, 1);
+-}
+-
+-int kernel_getsockopt(struct socket *sock, int level, int optname,
+- char *optval, int *optlen)
+-{
+- mm_segment_t oldfs = get_fs();
+- int err;
+-
+- set_fs(KERNEL_DS);
+- if (level == SOL_SOCKET)
+- err = sock_getsockopt(sock, level, optname, optval, optlen);
+- else
+- err = sock->ops->getsockopt(sock, level, optname, optval,
+- optlen);
+- set_fs(oldfs);
+- return err;
+-}
+-
+-int kernel_setsockopt(struct socket *sock, int level, int optname,
+- char *optval, int optlen)
+-{
+- mm_segment_t oldfs = get_fs();
+- int err;
+-
+- set_fs(KERNEL_DS);
+- if (level == SOL_SOCKET)
+- err = sock_setsockopt(sock, level, optname, optval, optlen);
+- else
+- err = sock->ops->setsockopt(sock, level, optname, optval,
+- optlen);
+- set_fs(oldfs);
+- return err;
+-}
+-
+-int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+- size_t size, int flags)
+-{
+- if (sock->ops->sendpage)
+- return sock->ops->sendpage(sock, page, offset, size, flags);
+-
+- return sock_no_sendpage(sock, page, offset, size, flags);
+-}
+-
+-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+-{
+- mm_segment_t oldfs = get_fs();
+- int err;
+-
+- set_fs(KERNEL_DS);
+- err = sock->ops->ioctl(sock, cmd, arg);
+- set_fs(oldfs);
+-
+- return err;
+-}
+-
+-/* ABI emulation layers need these two */
+-EXPORT_SYMBOL(move_addr_to_kernel);
+-EXPORT_SYMBOL(move_addr_to_user);
+-EXPORT_SYMBOL(sock_create);
+-EXPORT_SYMBOL(sock_create_kern);
+-EXPORT_SYMBOL(sock_create_lite);
+-EXPORT_SYMBOL(sock_map_fd);
+-EXPORT_SYMBOL(sock_recvmsg);
+-EXPORT_SYMBOL(sock_register);
+-EXPORT_SYMBOL(sock_release);
+-EXPORT_SYMBOL(sock_sendmsg);
+-EXPORT_SYMBOL(sock_unregister);
+-EXPORT_SYMBOL(sock_wake_async);
+-EXPORT_SYMBOL(sockfd_lookup);
+-EXPORT_SYMBOL(kernel_sendmsg);
+-EXPORT_SYMBOL(kernel_recvmsg);
+-EXPORT_SYMBOL(kernel_bind);
+-EXPORT_SYMBOL(kernel_listen);
+-EXPORT_SYMBOL(kernel_accept);
+-EXPORT_SYMBOL(kernel_connect);
+-EXPORT_SYMBOL(kernel_getsockname);
+-EXPORT_SYMBOL(kernel_getpeername);
+-EXPORT_SYMBOL(kernel_getsockopt);
+-EXPORT_SYMBOL(kernel_setsockopt);
+-EXPORT_SYMBOL(kernel_sendpage);
+-EXPORT_SYMBOL(kernel_sock_ioctl);
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-591/net/sunrpc/auth.c
+--- linux-2.6.22-570/net/sunrpc/auth.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/sunrpc/auth.c 2007-12-21 15:36:12.000000000 -0500
+@@ -19,12 +19,16 @@
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+ #endif
+
+-static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = {
++static DEFINE_SPINLOCK(rpc_authflavor_lock);
++static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+ &authnull_ops, /* AUTH_NULL */
+ &authunix_ops, /* AUTH_UNIX */
+ NULL, /* others can be loadable modules */
+ };
+
++static LIST_HEAD(cred_unused);
++static unsigned long number_cred_unused;
++
+ static u32
+ pseudoflavor_to_flavor(u32 flavor) {
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+@@ -33,55 +37,67 @@
+ }
+
+ int
+-rpcauth_register(struct rpc_authops *ops)
++rpcauth_register(const struct rpc_authops *ops)
+ {
+ rpc_authflavor_t flavor;
++ int ret = -EPERM;
+
+ if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ return -EINVAL;
+- if (auth_flavors[flavor] != NULL)
+- return -EPERM; /* what else? */
++ spin_lock(&rpc_authflavor_lock);
++ if (auth_flavors[flavor] == NULL) {
+ auth_flavors[flavor] = ops;
+- return 0;
++ ret = 0;
++ }
++ spin_unlock(&rpc_authflavor_lock);
++ return ret;
+ }
+
+ int
+-rpcauth_unregister(struct rpc_authops *ops)
++rpcauth_unregister(const struct rpc_authops *ops)
+ {
+ rpc_authflavor_t flavor;
++ int ret = -EPERM;
+
+ if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ return -EINVAL;
+- if (auth_flavors[flavor] != ops)
+- return -EPERM; /* what else? */
++ spin_lock(&rpc_authflavor_lock);
++ if (auth_flavors[flavor] == ops) {
+ auth_flavors[flavor] = NULL;
+- return 0;
++ ret = 0;
++ }
++ spin_unlock(&rpc_authflavor_lock);
++ return ret;
+ }
+
+ struct rpc_auth *
+ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+ {
+ struct rpc_auth *auth;
+- struct rpc_authops *ops;
++ const struct rpc_authops *ops;
+ u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
+
+ auth = ERR_PTR(-EINVAL);
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ goto out;
+
+- /* FIXME - auth_flavors[] really needs an rw lock,
+- * and module refcounting. */
+ #ifdef CONFIG_KMOD
+ if ((ops = auth_flavors[flavor]) == NULL)
+ request_module("rpc-auth-%u", flavor);
+ #endif
+- if ((ops = auth_flavors[flavor]) == NULL)
++ spin_lock(&rpc_authflavor_lock);
++ ops = auth_flavors[flavor];
++ if (ops == NULL || !try_module_get(ops->owner)) {
++ spin_unlock(&rpc_authflavor_lock);
+ goto out;
++ }
++ spin_unlock(&rpc_authflavor_lock);
+ auth = ops->create(clnt, pseudoflavor);
++ module_put(ops->owner);
+ if (IS_ERR(auth))
+ return auth;
+ if (clnt->cl_auth)
+- rpcauth_destroy(clnt->cl_auth);
++ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = auth;
+
+ out:
+@@ -89,7 +105,7 @@
+ }
+
+ void
+-rpcauth_destroy(struct rpc_auth *auth)
++rpcauth_release(struct rpc_auth *auth)
+ {
+ if (!atomic_dec_and_test(&auth->au_count))
+ return;
+@@ -98,11 +114,31 @@
+
+ static DEFINE_SPINLOCK(rpc_credcache_lock);
+
++static void
++rpcauth_unhash_cred_locked(struct rpc_cred *cred)
++{
++ hlist_del_rcu(&cred->cr_hash);
++ smp_mb__before_clear_bit();
++ clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++}
++
++static void
++rpcauth_unhash_cred(struct rpc_cred *cred)
++{
++ spinlock_t *cache_lock;
++
++ cache_lock = &cred->cr_auth->au_credcache->lock;
++ spin_lock(cache_lock);
++ if (atomic_read(&cred->cr_count) == 0)
++ rpcauth_unhash_cred_locked(cred);
++ spin_unlock(cache_lock);
++}
++
+ /*
+ * Initialize RPC credential cache
+ */
+ int
+-rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
++rpcauth_init_credcache(struct rpc_auth *auth)
+ {
+ struct rpc_cred_cache *new;
+ int i;
+@@ -112,8 +148,7 @@
+ return -ENOMEM;
+ for (i = 0; i < RPC_CREDCACHE_NR; i++)
+ INIT_HLIST_HEAD(&new->hashtable[i]);
+- new->expire = expire;
+- new->nextgc = jiffies + (expire >> 1);
++ spin_lock_init(&new->lock);
+ auth->au_credcache = new;
+ return 0;
+ }
+@@ -122,13 +157,13 @@
+ * Destroy a list of credentials
+ */
+ static inline
+-void rpcauth_destroy_credlist(struct hlist_head *head)
++void rpcauth_destroy_credlist(struct list_head *head)
+ {
+ struct rpc_cred *cred;
+
+- while (!hlist_empty(head)) {
+- cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+- hlist_del_init(&cred->cr_hash);
++ while (!list_empty(head)) {
++ cred = list_entry(head->next, struct rpc_cred, cr_lru);
++ list_del_init(&cred->cr_lru);
+ put_rpccred(cred);
+ }
+ }
+@@ -138,58 +173,95 @@
+ * that are not referenced.
+ */
+ void
+-rpcauth_free_credcache(struct rpc_auth *auth)
++rpcauth_clear_credcache(struct rpc_cred_cache *cache)
+ {
+- struct rpc_cred_cache *cache = auth->au_credcache;
+- HLIST_HEAD(free);
+- struct hlist_node *pos, *next;
++ LIST_HEAD(free);
++ struct hlist_head *head;
+ struct rpc_cred *cred;
+ int i;
+
+ spin_lock(&rpc_credcache_lock);
++ spin_lock(&cache->lock);
+ for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+- __hlist_del(&cred->cr_hash);
+- hlist_add_head(&cred->cr_hash, &free);
++ head = &cache->hashtable[i];
++ while (!hlist_empty(head)) {
++ cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
++ get_rpccred(cred);
++ if (!list_empty(&cred->cr_lru)) {
++ list_del(&cred->cr_lru);
++ number_cred_unused--;
+ }
++ list_add_tail(&cred->cr_lru, &free);
++ rpcauth_unhash_cred_locked(cred);
+ }
++ }
++ spin_unlock(&cache->lock);
+ spin_unlock(&rpc_credcache_lock);
+ rpcauth_destroy_credlist(&free);
+ }
+
+-static void
+-rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
++/*
++ * Destroy the RPC credential cache
++ */
++void
++rpcauth_destroy_credcache(struct rpc_auth *auth)
+ {
+- if (atomic_read(&cred->cr_count) != 1)
+- return;
+- if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+- if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
+- __hlist_del(&cred->cr_hash);
+- hlist_add_head(&cred->cr_hash, free);
++ struct rpc_cred_cache *cache = auth->au_credcache;
++
++ if (cache) {
++ auth->au_credcache = NULL;
++ rpcauth_clear_credcache(cache);
++ kfree(cache);
+ }
+ }
+
+ /*
+ * Remove stale credentials. Avoid sleeping inside the loop.
+ */
+-static void
+-rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
++static int
++rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
+ {
+- struct rpc_cred_cache *cache = auth->au_credcache;
+- struct hlist_node *pos, *next;
++ spinlock_t *cache_lock;
+ struct rpc_cred *cred;
+- int i;
+
+- dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
+- for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+- hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+- cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+- rpcauth_prune_expired(auth, cred, free);
++ while(!list_empty(&cred_unused)) {
++ cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
++ list_del_init(&cred->cr_lru);
++ number_cred_unused--;
++ if (atomic_read(&cred->cr_count) != 0)
++ continue;
++ cache_lock = &cred->cr_auth->au_credcache->lock;
++ spin_lock(cache_lock);
++ if (atomic_read(&cred->cr_count) == 0) {
++ get_rpccred(cred);
++ list_add_tail(&cred->cr_lru, free);
++ rpcauth_unhash_cred_locked(cred);
++ nr_to_scan --;
+ }
++ spin_unlock(cache_lock);
++ if (nr_to_scan == 0)
++ break;
+ }
+- cache->nextgc = jiffies + cache->expire;
++ return nr_to_scan;
++}
++
++/*
++ * Run memory cache shrinker.
++ */
++static int
++rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
++{
++ LIST_HEAD(free);
++ int res;
++
++ if (list_empty(&cred_unused))
++ return 0;
++ spin_lock(&rpc_credcache_lock);
++ nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
++ res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++ spin_unlock(&rpc_credcache_lock);
++ rpcauth_destroy_credlist(&free);
++ return res;
+ }
+
+ /*
+@@ -199,53 +271,56 @@
+ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+ int flags)
+ {
++ LIST_HEAD(free);
+ struct rpc_cred_cache *cache = auth->au_credcache;
+- HLIST_HEAD(free);
+- struct hlist_node *pos, *next;
+- struct rpc_cred *new = NULL,
+- *cred = NULL;
++ struct hlist_node *pos;
++ struct rpc_cred *cred = NULL,
++ *entry, *new;
+ int nr = 0;
+
+ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
+ nr = acred->uid & RPC_CREDCACHE_MASK;
+-retry:
+- spin_lock(&rpc_credcache_lock);
+- if (time_before(cache->nextgc, jiffies))
+- rpcauth_gc_credcache(auth, &free);
+- hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
+- struct rpc_cred *entry;
+- entry = hlist_entry(pos, struct rpc_cred, cr_hash);
+- if (entry->cr_ops->crmatch(acred, entry, flags)) {
+- hlist_del(&entry->cr_hash);
+- cred = entry;
+- break;
+- }
+- rpcauth_prune_expired(auth, entry, &free);
+- }
+- if (new) {
+- if (cred)
+- hlist_add_head(&new->cr_hash, &free);
+- else
+- cred = new;
++
++ rcu_read_lock();
++ hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
++ if (!entry->cr_ops->crmatch(acred, entry, flags))
++ continue;
++ spin_lock(&cache->lock);
++ if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
++ spin_unlock(&cache->lock);
++ continue;
+ }
+- if (cred) {
+- hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
+- get_rpccred(cred);
++ cred = get_rpccred(entry);
++ spin_unlock(&cache->lock);
++ break;
+ }
+- spin_unlock(&rpc_credcache_lock);
++ rcu_read_unlock();
+
+- rpcauth_destroy_credlist(&free);
++ if (cred != NULL)
++ goto found;
+
+- if (!cred) {
+ new = auth->au_ops->crcreate(auth, acred, flags);
+- if (!IS_ERR(new)) {
+-#ifdef RPC_DEBUG
+- new->cr_magic = RPCAUTH_CRED_MAGIC;
+-#endif
+- goto retry;
+- } else
++ if (IS_ERR(new)) {
++ cred = new;
++ goto out;
++ }
++
++ spin_lock(&cache->lock);
++ hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
++ if (!entry->cr_ops->crmatch(acred, entry, flags))
++ continue;
++ cred = get_rpccred(entry);
++ break;
++ }
++ if (cred == NULL) {
+ cred = new;
+- } else if ((cred->cr_flags & RPCAUTH_CRED_NEW)
++ set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++ hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
++ } else
++ list_add_tail(&new->cr_lru, &free);
++ spin_unlock(&cache->lock);
++found:
++ if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)
+ && cred->cr_ops->cr_init != NULL
+ && !(flags & RPCAUTH_LOOKUP_NEW)) {
+ int res = cred->cr_ops->cr_init(auth, cred);
+@@ -254,8 +329,9 @@
+ cred = ERR_PTR(res);
+ }
+ }
+-
+- return (struct rpc_cred *) cred;
++ rpcauth_destroy_credlist(&free);
++out:
++ return cred;
+ }
+
+ struct rpc_cred *
+@@ -277,6 +353,23 @@
+ return ret;
+ }
+
++void
++rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
++ struct rpc_auth *auth, const struct rpc_credops *ops)
++{
++ INIT_HLIST_NODE(&cred->cr_hash);
++ INIT_LIST_HEAD(&cred->cr_lru);
++ atomic_set(&cred->cr_count, 1);
++ cred->cr_auth = auth;
++ cred->cr_ops = ops;
++ cred->cr_expire = jiffies;
++#ifdef RPC_DEBUG
++ cred->cr_magic = RPCAUTH_CRED_MAGIC;
++#endif
++ cred->cr_uid = acred->uid;
++}
++EXPORT_SYMBOL(rpcauth_init_cred);
++
+ struct rpc_cred *
+ rpcauth_bindcred(struct rpc_task *task)
+ {
+@@ -317,9 +410,31 @@
+ void
+ put_rpccred(struct rpc_cred *cred)
+ {
+- cred->cr_expire = jiffies;
++ /* Fast path for unhashed credentials */
++ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
++ goto need_lock;
++
+ if (!atomic_dec_and_test(&cred->cr_count))
+ return;
++ goto out_destroy;
++need_lock:
++ if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
++ return;
++ if (!list_empty(&cred->cr_lru)) {
++ number_cred_unused--;
++ list_del_init(&cred->cr_lru);
++ }
++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
++ rpcauth_unhash_cred(cred);
++ else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
++ cred->cr_expire = jiffies;
++ list_add_tail(&cred->cr_lru, &cred_unused);
++ number_cred_unused++;
++ spin_unlock(&rpc_credcache_lock);
++ return;
++ }
++ spin_unlock(&rpc_credcache_lock);
++out_destroy:
+ cred->cr_ops->crdestroy(cred);
+ }
+
+@@ -404,17 +519,34 @@
+ void
+ rpcauth_invalcred(struct rpc_task *task)
+ {
++ struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
+ dprintk("RPC: %5u invalidating %s cred %p\n",
+- task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
+- spin_lock(&rpc_credcache_lock);
+- if (task->tk_msg.rpc_cred)
+- task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+- spin_unlock(&rpc_credcache_lock);
++ task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++ if (cred)
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ }
+
+ int
+ rpcauth_uptodatecred(struct rpc_task *task)
+ {
+- return !(task->tk_msg.rpc_cred) ||
+- (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
++ struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
++ return cred == NULL ||
++ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
++}
++
++
++static struct shrinker *rpc_cred_shrinker;
++
++void __init rpcauth_init_module(void)
++{
++ rpc_init_authunix();
++ rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker);
++}
++
++void __exit rpcauth_remove_module(void)
++{
++ if (rpc_cred_shrinker != NULL)
++ remove_shrinker(rpc_cred_shrinker);
+ }
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c 2007-12-21 15:36:12.000000000 -0500
+@@ -54,9 +54,9 @@
+ #include <linux/sunrpc/gss_api.h>
+ #include <asm/uaccess.h>
+
+-static struct rpc_authops authgss_ops;
++static const struct rpc_authops authgss_ops;
+
+-static struct rpc_credops gss_credops;
++static const struct rpc_credops gss_credops;
+
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+@@ -64,7 +64,6 @@
+
+ #define NFS_NGROUPS 16
+
+-#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */
+ #define GSS_CRED_SLACK 1024 /* XXX: unused */
+ /* length of a krb5 verifier (48), plus data added before arguments when
+ * using integrity (two 4-byte integers): */
+@@ -85,10 +84,8 @@
+ struct rpc_auth rpc_auth;
+ struct gss_api_mech *mech;
+ enum rpc_gss_svc service;
+- struct list_head upcalls;
+ struct rpc_clnt *client;
+ struct dentry *dentry;
+- spinlock_t lock;
+ };
+
+ static void gss_destroy_ctx(struct gss_cl_ctx *);
+@@ -116,8 +113,8 @@
+ write_lock(&gss_ctx_lock);
+ old = gss_cred->gc_ctx;
+ gss_cred->gc_ctx = ctx;
+- cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+- cred->cr_flags &= ~RPCAUTH_CRED_NEW;
++ set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
++ clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
+ write_unlock(&gss_ctx_lock);
+ if (old)
+ gss_put_ctx(old);
+@@ -130,7 +127,7 @@
+ int res = 0;
+
+ read_lock(&gss_ctx_lock);
+- if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx)
+ res = 1;
+ read_unlock(&gss_ctx_lock);
+ return res;
+@@ -269,10 +266,10 @@
+ }
+
+ static struct gss_upcall_msg *
+-__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
++__gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
+ {
+ struct gss_upcall_msg *pos;
+- list_for_each_entry(pos, &gss_auth->upcalls, list) {
++ list_for_each_entry(pos, &rpci->in_downcall, list) {
+ if (pos->uid != uid)
+ continue;
+ atomic_inc(&pos->count);
+@@ -290,24 +287,24 @@
+ static inline struct gss_upcall_msg *
+ gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
+ {
++ struct inode *inode = gss_auth->dentry->d_inode;
++ struct rpc_inode *rpci = RPC_I(inode);
+ struct gss_upcall_msg *old;
+
+- spin_lock(&gss_auth->lock);
+- old = __gss_find_upcall(gss_auth, gss_msg->uid);
++ spin_lock(&inode->i_lock);
++ old = __gss_find_upcall(rpci, gss_msg->uid);
+ if (old == NULL) {
+ atomic_inc(&gss_msg->count);
+- list_add(&gss_msg->list, &gss_auth->upcalls);
++ list_add(&gss_msg->list, &rpci->in_downcall);
+ } else
+ gss_msg = old;
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ return gss_msg;
+ }
+
+ static void
+ __gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+- if (list_empty(&gss_msg->list))
+- return;
+ list_del_init(&gss_msg->list);
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+ wake_up_all(&gss_msg->waitqueue);
+@@ -318,10 +315,14 @@
+ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+ struct gss_auth *gss_auth = gss_msg->auth;
++ struct inode *inode = gss_auth->dentry->d_inode;
+
+- spin_lock(&gss_auth->lock);
++ if (list_empty(&gss_msg->list))
++ return;
++ spin_lock(&inode->i_lock);
++ if (!list_empty(&gss_msg->list))
+ __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ }
+
+ static void
+@@ -330,16 +331,16 @@
+ struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+ struct gss_cred, gc_base);
+ struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
++ struct inode *inode = gss_msg->auth->dentry->d_inode;
+
+- BUG_ON(gss_msg == NULL);
+ if (gss_msg->ctx)
+ gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
+ else
+ task->tk_status = gss_msg->msg.errno;
+- spin_lock(&gss_msg->auth->lock);
++ spin_lock(&inode->i_lock);
+ gss_cred->gc_upcall = NULL;
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+- spin_unlock(&gss_msg->auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+ }
+
+@@ -386,11 +387,12 @@
+ gss_refresh_upcall(struct rpc_task *task)
+ {
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+- struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
++ struct gss_auth *gss_auth = container_of(cred->cr_auth,
+ struct gss_auth, rpc_auth);
+ struct gss_cred *gss_cred = container_of(cred,
+ struct gss_cred, gc_base);
+ struct gss_upcall_msg *gss_msg;
++ struct inode *inode = gss_auth->dentry->d_inode;
+ int err = 0;
+
+ dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
+@@ -400,7 +402,7 @@
+ err = PTR_ERR(gss_msg);
+ goto out;
+ }
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ if (gss_cred->gc_upcall != NULL)
+ rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
+ else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+@@ -411,7 +413,7 @@
+ rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
+ } else
+ err = gss_msg->msg.errno;
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+ out:
+ dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
+@@ -422,6 +424,7 @@
+ static inline int
+ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ {
++ struct inode *inode = gss_auth->dentry->d_inode;
+ struct rpc_cred *cred = &gss_cred->gc_base;
+ struct gss_upcall_msg *gss_msg;
+ DEFINE_WAIT(wait);
+@@ -435,12 +438,12 @@
+ }
+ for (;;) {
+ prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ break;
+ }
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ if (signalled()) {
+ err = -ERESTARTSYS;
+ goto out_intr;
+@@ -489,12 +492,11 @@
+ const void *p, *end;
+ void *buf;
+ struct rpc_clnt *clnt;
+- struct gss_auth *gss_auth;
+- struct rpc_cred *cred;
+ struct gss_upcall_msg *gss_msg;
++ struct inode *inode = filp->f_path.dentry->d_inode;
+ struct gss_cl_ctx *ctx;
+ uid_t uid;
+- int err = -EFBIG;
++ ssize_t err = -EFBIG;
+
+ if (mlen > MSG_BUF_MAXSIZE)
+ goto out;
+@@ -503,7 +505,7 @@
+ if (!buf)
+ goto out;
+
+- clnt = RPC_I(filp->f_path.dentry->d_inode)->private;
++ clnt = RPC_I(inode)->private;
+ err = -EFAULT;
+ if (copy_from_user(buf, src, mlen))
+ goto err;
+@@ -519,43 +521,38 @@
+ ctx = gss_alloc_context();
+ if (ctx == NULL)
+ goto err;
+- err = 0;
+- gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
+- p = gss_fill_context(p, end, ctx, gss_auth->mech);
++
++ err = -ENOENT;
++ /* Find a matching upcall */
++ spin_lock(&inode->i_lock);
++ gss_msg = __gss_find_upcall(RPC_I(inode), uid);
++ if (gss_msg == NULL) {
++ spin_unlock(&inode->i_lock);
++ goto err_put_ctx;
++ }
++ list_del_init(&gss_msg->list);
++ spin_unlock(&inode->i_lock);
++
++ p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+- if (err != -EACCES)
+- goto err_put_ctx;
++ gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN;
++ goto err_release_msg;
+ }
+- spin_lock(&gss_auth->lock);
+- gss_msg = __gss_find_upcall(gss_auth, uid);
+- if (gss_msg) {
+- if (err == 0 && gss_msg->ctx == NULL)
+ gss_msg->ctx = gss_get_ctx(ctx);
+- gss_msg->msg.errno = err;
++ err = mlen;
++
++err_release_msg:
++ spin_lock(&inode->i_lock);
+ __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+- } else {
+- struct auth_cred acred = { .uid = uid };
+- spin_unlock(&gss_auth->lock);
+- cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW);
+- if (IS_ERR(cred)) {
+- err = PTR_ERR(cred);
+- goto err_put_ctx;
+- }
+- gss_cred_set_ctx(cred, gss_get_ctx(ctx));
+- }
+- gss_put_ctx(ctx);
+- kfree(buf);
+- dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen);
+- return mlen;
+ err_put_ctx:
+ gss_put_ctx(ctx);
+ err:
+ kfree(buf);
+ out:
+- dprintk("RPC: gss_pipe_downcall returning %d\n", err);
++ dprintk("RPC: gss_pipe_downcall returning %Zd\n", err);
+ return err;
+ }
+
+@@ -563,27 +560,21 @@
+ gss_pipe_release(struct inode *inode)
+ {
+ struct rpc_inode *rpci = RPC_I(inode);
+- struct rpc_clnt *clnt;
+- struct rpc_auth *auth;
+- struct gss_auth *gss_auth;
+-
+- clnt = rpci->private;
+- auth = clnt->cl_auth;
+- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+- spin_lock(&gss_auth->lock);
+- while (!list_empty(&gss_auth->upcalls)) {
+ struct gss_upcall_msg *gss_msg;
+
+- gss_msg = list_entry(gss_auth->upcalls.next,
++ spin_lock(&inode->i_lock);
++ while (!list_empty(&rpci->in_downcall)) {
++
++ gss_msg = list_entry(rpci->in_downcall.next,
+ struct gss_upcall_msg, list);
+ gss_msg->msg.errno = -EPIPE;
+ atomic_inc(&gss_msg->count);
+ __gss_unhash_msg(gss_msg);
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+- spin_lock(&gss_auth->lock);
++ spin_lock(&inode->i_lock);
+ }
+- spin_unlock(&gss_auth->lock);
++ spin_unlock(&inode->i_lock);
+ }
+
+ static void
+@@ -637,8 +628,6 @@
+ gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
+ if (gss_auth->service == 0)
+ goto err_put_mech;
+- INIT_LIST_HEAD(&gss_auth->upcalls);
+- spin_lock_init(&gss_auth->lock);
+ auth = &gss_auth->rpc_auth;
+ auth->au_cslack = GSS_CRED_SLACK >> 2;
+ auth->au_rslack = GSS_VERF_SLACK >> 2;
+@@ -646,10 +635,6 @@
+ auth->au_flavor = flavor;
+ atomic_set(&auth->au_count, 1);
+
+- err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
+- if (err)
+- goto err_put_mech;
+-
+ gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+ clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(gss_auth->dentry)) {
+@@ -657,7 +642,13 @@
+ goto err_put_mech;
+ }
+
++ err = rpcauth_init_credcache(auth);
++ if (err)
++ goto err_unlink_pipe;
++
+ return auth;
++err_unlink_pipe:
++ rpc_unlink(gss_auth->dentry);
+ err_put_mech:
+ gss_mech_put(gss_auth->mech);
+ err_free:
+@@ -675,12 +666,13 @@
+ dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
+ auth, auth->au_flavor);
+
++ rpcauth_destroy_credcache(auth);
++
+ gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+ rpc_unlink(gss_auth->dentry);
+ gss_auth->dentry = NULL;
+ gss_mech_put(gss_auth->mech);
+
+- rpcauth_free_credcache(auth);
+ kfree(gss_auth);
+ module_put(THIS_MODULE);
+ }
+@@ -701,17 +693,27 @@
+ }
+
+ static void
+-gss_destroy_cred(struct rpc_cred *rc)
++gss_free_cred(struct gss_cred *cred)
+ {
+- struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
+-
+- dprintk("RPC: gss_destroy_cred \n");
+-
++ dprintk("RPC: gss_free_cred %p\n", cred);
+ if (cred->gc_ctx)
+ gss_put_ctx(cred->gc_ctx);
+ kfree(cred);
+ }
+
++static void
++gss_free_cred_callback(struct rcu_head *head)
++{
++ struct gss_cred *cred = container_of(head, struct gss_cred, gc_base.cr_rcu);
++ gss_free_cred(cred);
++}
++
++static void
++gss_destroy_cred(struct rpc_cred *rc)
++{
++ call_rcu(&rc->cr_rcu, gss_free_cred_callback);
++}
++
+ /*
+ * Lookup RPCSEC_GSS cred for the current process
+ */
+@@ -734,15 +736,12 @@
+ if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
+ goto out_err;
+
+- atomic_set(&cred->gc_count, 1);
+- cred->gc_uid = acred->uid;
++ rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
+ /*
+ * Note: in order to force a call to call_refresh(), we deliberately
+ * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
+ */
+- cred->gc_flags = 0;
+- cred->gc_base.cr_ops = &gss_credops;
+- cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
++ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
+ cred->gc_service = gss_auth->service;
+ return &cred->gc_base;
+
+@@ -774,7 +773,7 @@
+ * we don't really care if the credential has expired or not,
+ * since the caller should be prepared to reinitialise it.
+ */
+- if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW))
++ if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
+ goto out;
+ /* Don't match with creds that have expired. */
+ if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+@@ -830,7 +829,7 @@
+ mic.data = (u8 *)(p + 1);
+ maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ } else if (maj_stat != 0) {
+ printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
+ goto out_put_ctx;
+@@ -883,7 +882,7 @@
+
+ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat)
+ goto out_bad;
+ /* We leave it to unwrap to calculate au_rslack. For now we just
+@@ -937,7 +936,7 @@
+ maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ status = -EIO; /* XXX? */
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ else if (maj_stat)
+ return status;
+ q = xdr_encode_opaque(p, NULL, mic.len);
+@@ -1036,7 +1035,7 @@
+ /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+ * done anyway, so it's safe to put the request on the wire: */
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ else if (maj_stat)
+ return status;
+
+@@ -1123,7 +1122,7 @@
+
+ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat != GSS_S_COMPLETE)
+ return status;
+ return 0;
+@@ -1148,7 +1147,7 @@
+
+ maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ if (maj_stat != GSS_S_COMPLETE)
+ return status;
+ if (ntohl(*(*p)++) != rqstp->rq_seqno)
+@@ -1199,7 +1198,7 @@
+ return status;
+ }
+
+-static struct rpc_authops authgss_ops = {
++static const struct rpc_authops authgss_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_GSS,
+ #ifdef RPC_DEBUG
+@@ -1211,7 +1210,7 @@
+ .crcreate = gss_create_cred
+ };
+
+-static struct rpc_credops gss_credops = {
++static const struct rpc_credops gss_credops = {
+ .cr_name = "AUTH_GSS",
+ .crdestroy = gss_destroy_cred,
+ .cr_init = gss_cred_init,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-12-21 15:36:12.000000000 -0500
+@@ -201,7 +201,7 @@
+ kfree(kctx);
+ }
+
+-static struct gss_api_ops gss_kerberos_ops = {
++static const struct gss_api_ops gss_kerberos_ops = {
+ .gss_import_sec_context = gss_import_sec_context_kerberos,
+ .gss_get_mic = gss_get_mic_kerberos,
+ .gss_verify_mic = gss_verify_mic_kerberos,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-12-21 15:36:12.000000000 -0500
+@@ -202,7 +202,7 @@
+ return err;
+ }
+
+-static struct gss_api_ops gss_spkm3_ops = {
++static const struct gss_api_ops gss_spkm3_ops = {
+ .gss_import_sec_context = gss_import_sec_context_spkm3,
+ .gss_get_mic = gss_get_mic_spkm3,
+ .gss_verify_mic = gss_verify_mic_spkm3,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_null.c linux-2.6.22-591/net/sunrpc/auth_null.c
+--- linux-2.6.22-570/net/sunrpc/auth_null.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/auth_null.c 2007-12-21 15:36:12.000000000 -0500
+@@ -76,7 +76,7 @@
+ static int
+ nul_refresh(struct rpc_task *task)
+ {
+- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ return 0;
+ }
+
+@@ -101,7 +101,7 @@
+ return p;
+ }
+
+-struct rpc_authops authnull_ops = {
++const struct rpc_authops authnull_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_NULL,
+ #ifdef RPC_DEBUG
+@@ -122,7 +122,7 @@
+ };
+
+ static
+-struct rpc_credops null_credops = {
++const struct rpc_credops null_credops = {
+ .cr_name = "AUTH_NULL",
+ .crdestroy = nul_destroy_cred,
+ .crmatch = nul_match,
+@@ -133,9 +133,11 @@
+
+ static
+ struct rpc_cred null_cred = {
++ .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
++ .cr_auth = &null_auth,
+ .cr_ops = &null_credops,
+ .cr_count = ATOMIC_INIT(1),
+- .cr_flags = RPCAUTH_CRED_UPTODATE,
++ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
+ #ifdef RPC_DEBUG
+ .cr_magic = RPCAUTH_CRED_MAGIC,
+ #endif
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c linux-2.6.22-591/net/sunrpc/auth_unix.c
+--- linux-2.6.22-570/net/sunrpc/auth_unix.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/sunrpc/auth_unix.c 2007-12-23 02:13:00.000000000 -0500
+@@ -22,11 +22,6 @@
+ gid_t uc_gids[NFS_NGROUPS];
+ };
+ #define uc_uid uc_base.cr_uid
+-#define uc_count uc_base.cr_count
+-#define uc_flags uc_base.cr_flags
+-#define uc_expire uc_base.cr_expire
+-
+-#define UNX_CRED_EXPIRE (60 * HZ)
+
+ #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
+
+@@ -36,15 +31,14 @@
+
+ static struct rpc_auth unix_auth;
+ static struct rpc_cred_cache unix_cred_cache;
+-static struct rpc_credops unix_credops;
++static const struct rpc_credops unix_credops;
+
+ static struct rpc_auth *
+ unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+ {
+ dprintk("RPC: creating UNIX authenticator for client %p\n",
+ clnt);
+- if (atomic_inc_return(&unix_auth.au_count) == 0)
+- unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
++ atomic_inc(&unix_auth.au_count);
+ return &unix_auth;
+ }
+
+@@ -52,7 +46,7 @@
+ unx_destroy(struct rpc_auth *auth)
+ {
+ dprintk("RPC: destroying UNIX authenticator %p\n", auth);
+- rpcauth_free_credcache(auth);
++ rpcauth_clear_credcache(auth->au_credcache);
+ }
+
+ /*
+@@ -76,8 +70,8 @@
+ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+- atomic_set(&cred->uc_count, 1);
+- cred->uc_flags = RPCAUTH_CRED_UPTODATE;
++ rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops);
++ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
+ cred->uc_uid = 0;
+ cred->uc_gid = 0;
+@@ -88,7 +82,6 @@
+ if (groups > NFS_NGROUPS)
+ groups = NFS_NGROUPS;
+
+- cred->uc_uid = acred->uid;
+ cred->uc_gid = acred->gid;
+ cred->uc_tag = acred->tag;
+ for (i = 0; i < groups; i++)
+@@ -96,17 +89,31 @@
+ if (i < NFS_NGROUPS)
+ cred->uc_gids[i] = NOGROUP;
+ }
+- cred->uc_base.cr_ops = &unix_credops;
+
+- return (struct rpc_cred *) cred;
++ return &cred->uc_base;
+ }
+
+ static void
+-unx_destroy_cred(struct rpc_cred *cred)
++ unx_free_cred(struct unx_cred *cred)
+ {
++ dprintk("RPC: unx_free_cred %p\n", cred);
+ kfree(cred);
+ }
+
++static void
++unx_free_cred_callback(struct rcu_head *head)
++{
++ struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
++ unx_free_cred(cred);
++}
++
++static void
++unx_destroy_cred(struct rpc_cred *cred)
++{
++ call_rcu(&cred->cr_rcu, unx_free_cred_callback);
++}
++
++
+ /*
+ * Match credentials against current process creds.
+ * The root_override argument takes care of cases where the caller may
+@@ -115,7 +122,7 @@
+ static int
+ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+ {
+- struct unx_cred *cred = (struct unx_cred *) rcred;
++ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
+ int i;
+
+ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
+@@ -147,7 +154,7 @@
+ unx_marshal(struct rpc_task *task, __be32 *p)
+ {
+ struct rpc_clnt *clnt = task->tk_client;
+- struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
++ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+ __be32 *base, *hold;
+ int i, tag;
+
+@@ -159,7 +166,6 @@
+ * Copy the UTS nodename captured when the client was created.
+ */
+ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
+- tag = task->tk_client->cl_tag;
+
+ *p++ = htonl((u32) TAGINO_UID(tag,
+ cred->uc_uid, cred->uc_tag));
+@@ -183,7 +189,7 @@
+ static int
+ unx_refresh(struct rpc_task *task)
+ {
+- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ return 0;
+ }
+
+@@ -212,7 +218,12 @@
+ return p;
+ }
+
+-struct rpc_authops authunix_ops = {
++void __init rpc_init_authunix(void)
++{
++ spin_lock_init(&unix_cred_cache.lock);
++}
++
++const struct rpc_authops authunix_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_UNIX,
+ #ifdef RPC_DEBUG
+@@ -226,7 +237,6 @@
+
+ static
+ struct rpc_cred_cache unix_cred_cache = {
+- .expire = UNX_CRED_EXPIRE,
+ };
+
+ static
+@@ -240,7 +250,7 @@
+ };
+
+ static
+-struct rpc_credops unix_credops = {
++const struct rpc_credops unix_credops = {
+ .cr_name = "AUTH_UNIX",
+ .crdestroy = unx_destroy_cred,
+ .crmatch = unx_match,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c.orig linux-2.6.22-591/net/sunrpc/auth_unix.c.orig
+--- linux-2.6.22-570/net/sunrpc/auth_unix.c.orig 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/net/sunrpc/auth_unix.c.orig 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,261 @@
++/*
++ * linux/net/sunrpc/auth_unix.c
++ *
++ * UNIX-style authentication; no AUTH_SHORT support
++ *
++ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
++ */
++
++#include <linux/types.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/auth.h>
++#include <linux/vs_tag.h>
++
++#define NFS_NGROUPS 16
++
++struct unx_cred {
++ struct rpc_cred uc_base;
++ gid_t uc_gid;
++ tag_t uc_tag;
++ gid_t uc_gids[NFS_NGROUPS];
++};
++#define uc_uid uc_base.cr_uid
++
++#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
++
++#ifdef RPC_DEBUG
++# define RPCDBG_FACILITY RPCDBG_AUTH
++#endif
++
++static struct rpc_auth unix_auth;
++static struct rpc_cred_cache unix_cred_cache;
++static const struct rpc_credops unix_credops;
++
++static struct rpc_auth *
++unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
++{
++ dprintk("RPC: creating UNIX authenticator for client %p\n",
++ clnt);
++ atomic_inc(&unix_auth.au_count);
++ return &unix_auth;
++}
++
++static void
++unx_destroy(struct rpc_auth *auth)
++{
++ dprintk("RPC: destroying UNIX authenticator %p\n", auth);
++ rpcauth_clear_credcache(auth->au_credcache);
++}
++
++/*
++ * Lookup AUTH_UNIX creds for current process
++ */
++static struct rpc_cred *
++unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
++{
++ return rpcauth_lookup_credcache(auth, acred, flags);
++}
++
++static struct rpc_cred *
++unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
++{
++ struct unx_cred *cred;
++ int i;
++
++ dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
++ acred->uid, acred->gid);
++
++ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
++ return ERR_PTR(-ENOMEM);
++
++ rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops);
++ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
++ if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
++ cred->uc_uid = 0;
++ cred->uc_gid = 0;
++ cred->uc_tag = dx_current_tag();
++ cred->uc_gids[0] = NOGROUP;
++ } else {
++ int groups = acred->group_info->ngroups;
++ if (groups > NFS_NGROUPS)
++ groups = NFS_NGROUPS;
++
++ cred->uc_gid = acred->gid;
++ cred->uc_tag = acred->tag;
++ for (i = 0; i < groups; i++)
++ cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
++ if (i < NFS_NGROUPS)
++ cred->uc_gids[i] = NOGROUP;
++ }
++
++ return &cred->uc_base;
++}
++
++static void
++ unx_free_cred(struct unx_cred *cred)
++{
++ dprintk("RPC: unx_free_cred %p\n", cred);
++ kfree(cred);
++}
++
++static void
++unx_free_cred_callback(struct rcu_head *head)
++{
++ struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
++ unx_free_cred(cred);
++}
++
++static void
++unx_destroy_cred(struct rpc_cred *cred)
++{
++ call_rcu(&cred->cr_rcu, unx_free_cred_callback);
++}
++
++
++/*
++ * Match credentials against current process creds.
++ * The root_override argument takes care of cases where the caller may
++ * request root creds (e.g. for NFS swapping).
++ */
++static int
++unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
++{
++ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
++ int i;
++
++ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
++ int groups;
++
++ if (cred->uc_uid != acred->uid
++ || cred->uc_gid != acred->gid
++ || cred->uc_tag != acred->tag)
++ return 0;
++
++ groups = acred->group_info->ngroups;
++ if (groups > NFS_NGROUPS)
++ groups = NFS_NGROUPS;
++ for (i = 0; i < groups ; i++)
++ if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
++ return 0;
++ return 1;
++ }
++ return (cred->uc_uid == 0
++ && cred->uc_gid == 0
++ && cred->uc_gids[0] == (gid_t) NOGROUP);
++}
++
++/*
++ * Marshal credentials.
++ * Maybe we should keep a cached credential for performance reasons.
++ */
++static __be32 *
++unx_marshal(struct rpc_task *task, __be32 *p)
++{
++ struct rpc_clnt *clnt = task->tk_client;
++ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
++ __be32 *base, *hold;
++ int i, tag;
++
++ *p++ = htonl(RPC_AUTH_UNIX);
++ base = p++;
++ *p++ = htonl(jiffies/HZ);
++
++ /*
++ * Copy the UTS nodename captured when the client was created.
++ */
++ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
++ tag = task->tk_client->cl_tag;
++
++ *p++ = htonl((u32) TAGINO_UID(tag,
++ cred->uc_uid, cred->uc_tag));
++ *p++ = htonl((u32) TAGINO_GID(tag,
++ cred->uc_gid, cred->uc_tag));
++ hold = p++;
++ for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
++ *p++ = htonl((u32) cred->uc_gids[i]);
++ *hold = htonl(p - hold - 1); /* gid array length */
++ *base = htonl((p - base - 1) << 2); /* cred length */
++
++ *p++ = htonl(RPC_AUTH_NULL);
++ *p++ = htonl(0);
++
++ return p;
++}
++
++/*
++ * Refresh credentials. This is a no-op for AUTH_UNIX
++ */
++static int
++unx_refresh(struct rpc_task *task)
++{
++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
++ return 0;
++}
++
++static __be32 *
++unx_validate(struct rpc_task *task, __be32 *p)
++{
++ rpc_authflavor_t flavor;
++ u32 size;
++
++ flavor = ntohl(*p++);
++ if (flavor != RPC_AUTH_NULL &&
++ flavor != RPC_AUTH_UNIX &&
++ flavor != RPC_AUTH_SHORT) {
++ printk("RPC: bad verf flavor: %u\n", flavor);
++ return NULL;
++ }
++
++ size = ntohl(*p++);
++ if (size > RPC_MAX_AUTH_SIZE) {
++ printk("RPC: giant verf size: %u\n", size);
++ return NULL;
++ }
++ task->tk_auth->au_rslack = (size >> 2) + 2;
++ p += (size >> 2);
++
++ return p;
++}
++
++void __init rpc_init_authunix(void)
++{
++ spin_lock_init(&unix_cred_cache.lock);
++}
++
++const struct rpc_authops authunix_ops = {
++ .owner = THIS_MODULE,
++ .au_flavor = RPC_AUTH_UNIX,
++#ifdef RPC_DEBUG
++ .au_name = "UNIX",
++#endif
++ .create = unx_create,
++ .destroy = unx_destroy,
++ .lookup_cred = unx_lookup_cred,
++ .crcreate = unx_create_cred,
++};
++
++static
++struct rpc_cred_cache unix_cred_cache = {
++};
++
++static
++struct rpc_auth unix_auth = {
++ .au_cslack = UNX_WRITESLACK,
++ .au_rslack = 2, /* assume AUTH_NULL verf */
++ .au_ops = &authunix_ops,
++ .au_flavor = RPC_AUTH_UNIX,
++ .au_count = ATOMIC_INIT(0),
++ .au_credcache = &unix_cred_cache,
++};
++
++static
++const struct rpc_credops unix_credops = {
++ .cr_name = "AUTH_UNIX",
++ .crdestroy = unx_destroy_cred,
++ .crmatch = unx_match,
++ .crmarshal = unx_marshal,
++ .crrefresh = unx_refresh,
++ .crvalidate = unx_validate,
++};
+diff -Nurb linux-2.6.22-570/net/sunrpc/clnt.c linux-2.6.22-591/net/sunrpc/clnt.c
+--- linux-2.6.22-570/net/sunrpc/clnt.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/sunrpc/clnt.c 2007-12-21 15:36:12.000000000 -0500
+@@ -45,6 +45,12 @@
+ dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \
+ __FUNCTION__, t->tk_status)
+
++/*
++ * All RPC clients are linked into this list
++ */
++static LIST_HEAD(all_clients);
++static DEFINE_SPINLOCK(rpc_client_lock);
++
+ static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
+
+
+@@ -67,6 +73,21 @@
+ static __be32 * call_header(struct rpc_task *task);
+ static __be32 * call_verify(struct rpc_task *task);
+
++static int rpc_ping(struct rpc_clnt *clnt, int flags);
++
++static void rpc_register_client(struct rpc_clnt *clnt)
++{
++ spin_lock(&rpc_client_lock);
++ list_add(&clnt->cl_clients, &all_clients);
++ spin_unlock(&rpc_client_lock);
++}
++
++static void rpc_unregister_client(struct rpc_clnt *clnt)
++{
++ spin_lock(&rpc_client_lock);
++ list_del(&clnt->cl_clients);
++ spin_unlock(&rpc_client_lock);
++}
+
+ static int
+ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
+@@ -112,6 +133,9 @@
+ dprintk("RPC: creating %s client for %s (xprt %p)\n",
+ program->name, servname, xprt);
+
++ err = rpciod_up();
++ if (err)
++ goto out_no_rpciod;
+ err = -EINVAL;
+ if (!xprt)
+ goto out_no_xprt;
+@@ -122,8 +146,6 @@
+ clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
+ if (!clnt)
+ goto out_err;
+- atomic_set(&clnt->cl_users, 0);
+- atomic_set(&clnt->cl_count, 1);
+ clnt->cl_parent = clnt;
+
+ clnt->cl_server = clnt->cl_inline_name;
+@@ -149,6 +171,8 @@
+ if (clnt->cl_metrics == NULL)
+ goto out_no_stats;
+ clnt->cl_program = program;
++ INIT_LIST_HEAD(&clnt->cl_tasks);
++ spin_lock_init(&clnt->cl_lock);
+
+ if (!xprt_bound(clnt->cl_xprt))
+ clnt->cl_autobind = 1;
+@@ -156,6 +180,8 @@
+ clnt->cl_rtt = &clnt->cl_rtt_default;
+ rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+
++ kref_init(&clnt->cl_kref);
++
+ err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
+ if (err < 0)
+ goto out_no_path;
+@@ -173,6 +199,7 @@
+ if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ clnt->cl_nodelen = UNX_MAXNODENAME;
+ memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
++ rpc_register_client(clnt);
+ return clnt;
+
+ out_no_auth:
+@@ -189,6 +216,8 @@
+ out_err:
+ xprt_put(xprt);
+ out_no_xprt:
++ rpciod_down();
++out_no_rpciod:
+ return ERR_PTR(err);
+ }
+
+@@ -246,8 +275,6 @@
+ clnt->cl_intr = 1;
+ if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+ clnt->cl_autobind = 1;
+- if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+- clnt->cl_oneshot = 1;
+ if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
+ clnt->cl_discrtry = 1;
+ /* TODO: handle RPC_CLNT_CREATE_TAGGED
+@@ -271,24 +298,25 @@
+ new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
+ if (!new)
+ goto out_no_clnt;
+- atomic_set(&new->cl_count, 1);
+- atomic_set(&new->cl_users, 0);
++ new->cl_parent = clnt;
++ /* Turn off autobind on clones */
++ new->cl_autobind = 0;
++ INIT_LIST_HEAD(&new->cl_tasks);
++ spin_lock_init(&new->cl_lock);
++ rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ new->cl_metrics = rpc_alloc_iostats(clnt);
+ if (new->cl_metrics == NULL)
+ goto out_no_stats;
++ kref_init(&new->cl_kref);
+ err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
+ if (err != 0)
+ goto out_no_path;
+- new->cl_parent = clnt;
+- atomic_inc(&clnt->cl_count);
+- new->cl_xprt = xprt_get(clnt->cl_xprt);
+- /* Turn off autobind on clones */
+- new->cl_autobind = 0;
+- new->cl_oneshot = 0;
+- new->cl_dead = 0;
+- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ if (new->cl_auth)
+ atomic_inc(&new->cl_auth->au_count);
++ xprt_get(clnt->cl_xprt);
++ kref_get(&clnt->cl_kref);
++ rpc_register_client(new);
++ rpciod_up();
+ return new;
+ out_no_path:
+ rpc_free_iostats(new->cl_metrics);
+@@ -301,52 +329,34 @@
+
+ /*
+ * Properly shut down an RPC client, terminating all outstanding
+- * requests. Note that we must be certain that cl_oneshot and
+- * cl_dead are cleared, or else the client would be destroyed
+- * when the last task releases it.
++ * requests.
+ */
+-int
+-rpc_shutdown_client(struct rpc_clnt *clnt)
++void rpc_shutdown_client(struct rpc_clnt *clnt)
+ {
+- dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
+- clnt->cl_protname, clnt->cl_server,
+- atomic_read(&clnt->cl_users));
+-
+- while (atomic_read(&clnt->cl_users) > 0) {
+- /* Don't let rpc_release_client destroy us */
+- clnt->cl_oneshot = 0;
+- clnt->cl_dead = 0;
++ dprintk("RPC: shutting down %s client for %s\n",
++ clnt->cl_protname, clnt->cl_server);
++
++ while (!list_empty(&clnt->cl_tasks)) {
+ rpc_killall_tasks(clnt);
+ wait_event_timeout(destroy_wait,
+- !atomic_read(&clnt->cl_users), 1*HZ);
+- }
+-
+- if (atomic_read(&clnt->cl_users) < 0) {
+- printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
+- clnt, atomic_read(&clnt->cl_users));
+-#ifdef RPC_DEBUG
+- rpc_show_tasks();
+-#endif
+- BUG();
++ list_empty(&clnt->cl_tasks), 1*HZ);
+ }
+
+- return rpc_destroy_client(clnt);
++ rpc_release_client(clnt);
+ }
+
+ /*
+- * Delete an RPC client
++ * Free an RPC client
+ */
+-int
+-rpc_destroy_client(struct rpc_clnt *clnt)
++static void
++rpc_free_client(struct kref *kref)
+ {
+- if (!atomic_dec_and_test(&clnt->cl_count))
+- return 1;
+- BUG_ON(atomic_read(&clnt->cl_users) != 0);
++ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
+
+ dprintk("RPC: destroying %s client for %s\n",
+ clnt->cl_protname, clnt->cl_server);
+ if (clnt->cl_auth) {
+- rpcauth_destroy(clnt->cl_auth);
++ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = NULL;
+ }
+ if (!IS_ERR(clnt->cl_dentry)) {
+@@ -354,33 +364,31 @@
+ rpc_put_mount();
+ }
+ if (clnt->cl_parent != clnt) {
+- rpc_destroy_client(clnt->cl_parent);
++ rpc_release_client(clnt->cl_parent);
+ goto out_free;
+ }
+ if (clnt->cl_server != clnt->cl_inline_name)
+ kfree(clnt->cl_server);
+ out_free:
++ rpc_unregister_client(clnt);
+ rpc_free_iostats(clnt->cl_metrics);
+ clnt->cl_metrics = NULL;
+ xprt_put(clnt->cl_xprt);
++ rpciod_down();
+ kfree(clnt);
+- return 0;
+ }
+
+ /*
+- * Release an RPC client
++ * Release reference to the RPC client
+ */
+ void
+ rpc_release_client(struct rpc_clnt *clnt)
+ {
+- dprintk("RPC: rpc_release_client(%p, %d)\n",
+- clnt, atomic_read(&clnt->cl_users));
++ dprintk("RPC: rpc_release_client(%p)\n", clnt);
+
+- if (!atomic_dec_and_test(&clnt->cl_users))
+- return;
++ if (list_empty(&clnt->cl_tasks))
+ wake_up(&destroy_wait);
+- if (clnt->cl_oneshot || clnt->cl_dead)
+- rpc_destroy_client(clnt);
++ kref_put(&clnt->cl_kref, rpc_free_client);
+ }
+
+ /**
+@@ -471,82 +479,96 @@
+ rpc_restore_sigmask(oldset);
+ }
+
+-/*
+- * New rpc_call implementation
+- */
+-int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
++static
++struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
++ struct rpc_message *msg,
++ int flags,
++ const struct rpc_call_ops *ops,
++ void *data)
+ {
+- struct rpc_task *task;
++ struct rpc_task *task, *ret;
+ sigset_t oldset;
+- int status;
+-
+- /* If this client is slain all further I/O fails */
+- if (clnt->cl_dead)
+- return -EIO;
+-
+- BUG_ON(flags & RPC_TASK_ASYNC);
+
+- task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
+- if (task == NULL)
+- return -ENOMEM;
++ task = rpc_new_task(clnt, flags, ops, data);
++ if (task == NULL) {
++ rpc_release_calldata(ops, data);
++ return ERR_PTR(-ENOMEM);
++ }
+
+- /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
++ /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
+ rpc_task_sigmask(task, &oldset);
+-
+- /* Set up the call info struct and execute the task */
++ if (msg != NULL) {
+ rpc_call_setup(task, msg, 0);
+- if (task->tk_status == 0) {
++ if (task->tk_status != 0) {
++ ret = ERR_PTR(task->tk_status);
++ rpc_put_task(task);
++ goto out;
++ }
++ }
+ atomic_inc(&task->tk_count);
+ rpc_execute(task);
+- }
++ ret = task;
++out:
++ rpc_restore_sigmask(&oldset);
++ return ret;
++}
++
++/**
++ * rpc_call_sync - Perform a synchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
++ */
++int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
++{
++ struct rpc_task *task;
++ int status;
++
++ BUG_ON(flags & RPC_TASK_ASYNC);
++
++ task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+- rpc_restore_sigmask(&oldset);
+ return status;
+ }
+
+-/*
+- * New rpc_call implementation
++/**
++ * rpc_call_async - Perform an asynchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
++ * @ops: RPC call ops
++ * @data: user call data
+ */
+ int
+ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+ const struct rpc_call_ops *tk_ops, void *data)
+ {
+ struct rpc_task *task;
+- sigset_t oldset;
+- int status;
+
+- /* If this client is slain all further I/O fails */
+- status = -EIO;
+- if (clnt->cl_dead)
+- goto out_release;
+-
+- flags |= RPC_TASK_ASYNC;
+-
+- /* Create/initialize a new RPC task */
+- status = -ENOMEM;
+- if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
+- goto out_release;
+-
+- /* Mask signals on GSS_AUTH upcalls */
+- rpc_task_sigmask(task, &oldset);
+-
+- rpc_call_setup(task, msg, 0);
+-
+- /* Set up the call info struct and execute the task */
+- status = task->tk_status;
+- if (status == 0)
+- rpc_execute(task);
+- else
++ task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
++ if (IS_ERR(task))
++ return PTR_ERR(task);
+ rpc_put_task(task);
+-
+- rpc_restore_sigmask(&oldset);
+- return status;
+-out_release:
+- rpc_release_calldata(tk_ops, data);
+- return status;
++ return 0;
+ }
+
++/**
++ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
++ * @clnt: pointer to RPC client
++ * @flags: RPC flags
++ * @ops: RPC call ops
++ * @data: user call data
++ */
++struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
++ const struct rpc_call_ops *tk_ops,
++ void *data)
++{
++ return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
++}
++EXPORT_SYMBOL(rpc_run_task);
+
+ void
+ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+@@ -1424,7 +1446,7 @@
+ .p_decode = rpcproc_decode_null,
+ };
+
+-int rpc_ping(struct rpc_clnt *clnt, int flags)
++static int rpc_ping(struct rpc_clnt *clnt, int flags)
+ {
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null,
+@@ -1435,3 +1457,51 @@
+ put_rpccred(msg.rpc_cred);
+ return err;
+ }
++
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
++{
++ struct rpc_message msg = {
++ .rpc_proc = &rpcproc_null,
++ .rpc_cred = cred,
++ };
++ return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
++}
++EXPORT_SYMBOL(rpc_call_null);
++
++#ifdef RPC_DEBUG
++void rpc_show_tasks(void)
++{
++ struct rpc_clnt *clnt;
++ struct rpc_task *t;
++
++ spin_lock(&rpc_client_lock);
++ if (list_empty(&all_clients))
++ goto out;
++ printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
++ "-rpcwait -action- ---ops--\n");
++ list_for_each_entry(clnt, &all_clients, cl_clients) {
++ if (list_empty(&clnt->cl_tasks))
++ continue;
++ spin_lock(&clnt->cl_lock);
++ list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
++ const char *rpc_waitq = "none";
++
++ if (RPC_IS_QUEUED(t))
++ rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
++
++ printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
++ t->tk_pid,
++ (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
++ t->tk_flags, t->tk_status,
++ t->tk_client,
++ (t->tk_client ? t->tk_client->cl_prog : 0),
++ t->tk_rqstp, t->tk_timeout,
++ rpc_waitq,
++ t->tk_action, t->tk_ops);
++ }
++ spin_unlock(&clnt->cl_lock);
++ }
++out:
++ spin_unlock(&rpc_client_lock);
++}
++#endif
+diff -Nurb linux-2.6.22-570/net/sunrpc/rpc_pipe.c linux-2.6.22-591/net/sunrpc/rpc_pipe.c
+--- linux-2.6.22-570/net/sunrpc/rpc_pipe.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/rpc_pipe.c 2007-12-21 15:36:12.000000000 -0500
+@@ -14,7 +14,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/mount.h>
+ #include <linux/namei.h>
+-#include <linux/dnotify.h>
++#include <linux/fsnotify.h>
+ #include <linux/kernel.h>
+
+ #include <asm/ioctls.h>
+@@ -344,7 +344,7 @@
+ mutex_lock(&inode->i_mutex);
+ clnt = RPC_I(inode)->private;
+ if (clnt) {
+- atomic_inc(&clnt->cl_users);
++ kref_get(&clnt->cl_kref);
+ m->private = clnt;
+ } else {
+ single_release(inode, file);
+@@ -448,6 +448,15 @@
+ simple_release_fs(&rpc_mount, &rpc_mount_count);
+ }
+
++static int rpc_delete_dentry(struct dentry *dentry)
++{
++ return 1;
++}
++
++static struct dentry_operations rpc_dentry_operations = {
++ .d_delete = rpc_delete_dentry,
++};
++
+ static int
+ rpc_lookup_parent(char *path, struct nameidata *nd)
+ {
+@@ -506,7 +515,7 @@
+ * FIXME: This probably has races.
+ */
+ static void
+-rpc_depopulate(struct dentry *parent)
++rpc_depopulate(struct dentry *parent, int start, int eof)
+ {
+ struct inode *dir = parent->d_inode;
+ struct list_head *pos, *next;
+@@ -518,6 +527,10 @@
+ spin_lock(&dcache_lock);
+ list_for_each_safe(pos, next, &parent->d_subdirs) {
+ dentry = list_entry(pos, struct dentry, d_u.d_child);
++ if (!dentry->d_inode ||
++ dentry->d_inode->i_ino < start ||
++ dentry->d_inode->i_ino >= eof)
++ continue;
+ spin_lock(&dentry->d_lock);
+ if (!d_unhashed(dentry)) {
+ dget_locked(dentry);
+@@ -533,11 +546,11 @@
+ if (n) {
+ do {
+ dentry = dvec[--n];
+- if (dentry->d_inode) {
+- rpc_close_pipes(dentry->d_inode);
++ if (S_ISREG(dentry->d_inode->i_mode))
+ simple_unlink(dir, dentry);
+- }
+- inode_dir_notify(dir, DN_DELETE);
++ else if (S_ISDIR(dentry->d_inode->i_mode))
++ simple_rmdir(dir, dentry);
++ d_delete(dentry);
+ dput(dentry);
+ } while (n);
+ goto repeat;
+@@ -560,6 +573,7 @@
+ dentry = d_alloc_name(parent, files[i].name);
+ if (!dentry)
+ goto out_bad;
++ dentry->d_op = &rpc_dentry_operations;
+ mode = files[i].mode;
+ inode = rpc_get_inode(dir->i_sb, mode);
+ if (!inode) {
+@@ -574,6 +588,7 @@
+ if (S_ISDIR(mode))
+ inc_nlink(dir);
+ d_add(dentry, inode);
++ fsnotify_create(dir, dentry);
+ }
+ mutex_unlock(&dir->i_mutex);
+ return 0;
+@@ -595,7 +610,7 @@
+ inode->i_ino = iunique(dir->i_sb, 100);
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+- inode_dir_notify(dir, DN_CREATE);
++ fsnotify_mkdir(dir, dentry);
+ return 0;
+ out_err:
+ printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+@@ -607,21 +622,14 @@
+ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+ int error;
+-
+- shrink_dcache_parent(dentry);
+- if (d_unhashed(dentry))
+- return 0;
+- if ((error = simple_rmdir(dir, dentry)) != 0)
++ error = simple_rmdir(dir, dentry);
++ if (!error)
++ d_delete(dentry);
+ return error;
+- if (!error) {
+- inode_dir_notify(dir, DN_DELETE);
+- d_drop(dentry);
+- }
+- return 0;
+ }
+
+ static struct dentry *
+-rpc_lookup_create(struct dentry *parent, const char *name, int len)
++rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive)
+ {
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+@@ -630,7 +638,9 @@
+ dentry = lookup_one_len(name, parent, len);
+ if (IS_ERR(dentry))
+ goto out_err;
+- if (dentry->d_inode) {
++ if (!dentry->d_inode)
++ dentry->d_op = &rpc_dentry_operations;
++ else if (exclusive) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ goto out_err;
+@@ -649,7 +659,7 @@
+
+ if ((error = rpc_lookup_parent(path, nd)) != 0)
+ return ERR_PTR(error);
+- dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
++ dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1);
+ if (IS_ERR(dentry))
+ rpc_release_path(nd);
+ return dentry;
+@@ -681,7 +691,7 @@
+ rpc_release_path(&nd);
+ return dentry;
+ err_depopulate:
+- rpc_depopulate(dentry);
++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ __rpc_rmdir(dir, dentry);
+ err_dput:
+ dput(dentry);
+@@ -701,7 +711,7 @@
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+- rpc_depopulate(dentry);
++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ error = __rpc_rmdir(dir, dentry);
+ dput(dentry);
+ mutex_unlock(&dir->i_mutex);
+@@ -716,10 +726,21 @@
+ struct inode *dir, *inode;
+ struct rpc_inode *rpci;
+
+- dentry = rpc_lookup_create(parent, name, strlen(name));
++ dentry = rpc_lookup_create(parent, name, strlen(name), 0);
+ if (IS_ERR(dentry))
+ return dentry;
+ dir = parent->d_inode;
++ if (dentry->d_inode) {
++ rpci = RPC_I(dentry->d_inode);
++ if (rpci->private != private ||
++ rpci->ops != ops ||
++ rpci->flags != flags) {
++ dput (dentry);
++ dentry = ERR_PTR(-EBUSY);
++ }
++ rpci->nkern_readwriters++;
++ goto out;
++ }
+ inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
+ if (!inode)
+ goto err_dput;
+@@ -730,7 +751,8 @@
+ rpci->private = private;
+ rpci->flags = flags;
+ rpci->ops = ops;
+- inode_dir_notify(dir, DN_CREATE);
++ rpci->nkern_readwriters = 1;
++ fsnotify_create(dir, dentry);
+ dget(dentry);
+ out:
+ mutex_unlock(&dir->i_mutex);
+@@ -754,13 +776,11 @@
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+- if (!d_unhashed(dentry)) {
+- d_drop(dentry);
+- if (dentry->d_inode) {
++ if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) {
+ rpc_close_pipes(dentry->d_inode);
+ error = simple_unlink(dir, dentry);
+- }
+- inode_dir_notify(dir, DN_DELETE);
++ if (!error)
++ d_delete(dentry);
+ }
+ dput(dentry);
+ mutex_unlock(&dir->i_mutex);
+@@ -833,6 +853,7 @@
+ rpci->nreaders = 0;
+ rpci->nwriters = 0;
+ INIT_LIST_HEAD(&rpci->in_upcall);
++ INIT_LIST_HEAD(&rpci->in_downcall);
+ INIT_LIST_HEAD(&rpci->pipe);
+ rpci->pipelen = 0;
+ init_waitqueue_head(&rpci->waitq);
+diff -Nurb linux-2.6.22-570/net/sunrpc/rpcb_clnt.c linux-2.6.22-591/net/sunrpc/rpcb_clnt.c
+--- linux-2.6.22-570/net/sunrpc/rpcb_clnt.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/rpcb_clnt.c 2007-12-21 15:36:12.000000000 -0500
+@@ -184,8 +184,7 @@
+ .program = &rpcb_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+- .flags = (RPC_CLNT_CREATE_ONESHOT |
+- RPC_CLNT_CREATE_NOPING),
++ .flags = RPC_CLNT_CREATE_NOPING,
+ };
+
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+@@ -238,6 +237,7 @@
+
+ error = rpc_call_sync(rpcb_clnt, &msg, 0);
+
++ rpc_shutdown_client(rpcb_clnt);
+ if (error < 0)
+ printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ "server (errno %d).\n", -error);
+@@ -286,6 +286,7 @@
+ return PTR_ERR(rpcb_clnt);
+
+ status = rpc_call_sync(rpcb_clnt, &msg, 0);
++ rpc_shutdown_client(rpcb_clnt);
+
+ if (status >= 0) {
+ if (map.r_port != 0)
+@@ -379,6 +380,7 @@
+ }
+
+ child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
++ rpc_release_client(rpcb_clnt);
+ if (IS_ERR(child)) {
+ status = -EIO;
+ dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
+diff -Nurb linux-2.6.22-570/net/sunrpc/sched.c linux-2.6.22-591/net/sunrpc/sched.c
+--- linux-2.6.22-570/net/sunrpc/sched.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/sched.c 2007-12-21 15:36:12.000000000 -0500
+@@ -25,7 +25,6 @@
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY RPCDBG_SCHED
+ #define RPC_TASK_MAGIC_ID 0xf00baa
+-static int rpc_task_id;
+ #endif
+
+ /*
+@@ -40,7 +39,6 @@
+ static mempool_t *rpc_buffer_mempool __read_mostly;
+
+ static void __rpc_default_timer(struct rpc_task *task);
+-static void rpciod_killall(void);
+ static void rpc_async_schedule(struct work_struct *);
+ static void rpc_release_task(struct rpc_task *task);
+
+@@ -50,23 +48,13 @@
+ static RPC_WAITQ(delay_queue, "delayq");
+
+ /*
+- * All RPC tasks are linked into this list
+- */
+-static LIST_HEAD(all_tasks);
+-
+-/*
+ * rpciod-related stuff
+ */
+ static DEFINE_MUTEX(rpciod_mutex);
+-static unsigned int rpciod_users;
++static atomic_t rpciod_users = ATOMIC_INIT(0);
+ struct workqueue_struct *rpciod_workqueue;
+
+ /*
+- * Spinlock for other critical sections of code.
+- */
+-static DEFINE_SPINLOCK(rpc_sched_lock);
+-
+-/*
+ * Disable the timer for a given RPC task. Should be called with
+ * queue->lock and bh_disabled in order to avoid races within
+ * rpc_run_timer().
+@@ -267,18 +255,33 @@
+ return 0;
+ }
+
++#ifdef RPC_DEBUG
++static void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++ static atomic_t rpc_pid;
++
++ task->tk_magic = RPC_TASK_MAGIC_ID;
++ task->tk_pid = atomic_inc_return(&rpc_pid);
++}
++#else
++static inline void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++}
++#endif
++
+ static void rpc_set_active(struct rpc_task *task)
+ {
++ struct rpc_clnt *clnt;
+ if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
+ return;
+- spin_lock(&rpc_sched_lock);
+-#ifdef RPC_DEBUG
+- task->tk_magic = RPC_TASK_MAGIC_ID;
+- task->tk_pid = rpc_task_id++;
+-#endif
++ rpc_task_set_debuginfo(task);
+ /* Add to global list of all tasks */
+- list_add_tail(&task->tk_task, &all_tasks);
+- spin_unlock(&rpc_sched_lock);
++ clnt = task->tk_client;
++ if (clnt != NULL) {
++ spin_lock(&clnt->cl_lock);
++ list_add_tail(&task->tk_task, &clnt->cl_tasks);
++ spin_unlock(&clnt->cl_lock);
++ }
+ }
+
+ /*
+@@ -818,6 +821,7 @@
+ if (tk_ops->rpc_call_prepare != NULL)
+ task->tk_action = rpc_prepare_task;
+ task->tk_calldata = calldata;
++ INIT_LIST_HEAD(&task->tk_task);
+
+ /* Initialize retry counters */
+ task->tk_garb_retry = 2;
+@@ -830,7 +834,7 @@
+ task->tk_workqueue = rpciod_workqueue;
+
+ if (clnt) {
+- atomic_inc(&clnt->cl_users);
++ kref_get(&clnt->cl_kref);
+ if (clnt->cl_softrtry)
+ task->tk_flags |= RPC_TASK_SOFT;
+ if (!clnt->cl_intr)
+@@ -860,9 +864,7 @@
+ }
+
+ /*
+- * Create a new task for the specified client. We have to
+- * clean up after an allocation failure, as the client may
+- * have specified "oneshot".
++ * Create a new task for the specified client.
+ */
+ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+ {
+@@ -870,7 +872,7 @@
+
+ task = rpc_alloc_task();
+ if (!task)
+- goto cleanup;
++ goto out;
+
+ rpc_init_task(task, clnt, flags, tk_ops, calldata);
+
+@@ -878,16 +880,6 @@
+ task->tk_flags |= RPC_TASK_DYNAMIC;
+ out:
+ return task;
+-
+-cleanup:
+- /* Check whether to release the client */
+- if (clnt) {
+- printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
+- atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+- atomic_inc(&clnt->cl_users); /* pretend we were used ... */
+- rpc_release_client(clnt);
+- }
+- goto out;
+ }
+
+
+@@ -920,11 +912,13 @@
+ #endif
+ dprintk("RPC: %5u release task\n", task->tk_pid);
+
+- /* Remove from global task list */
+- spin_lock(&rpc_sched_lock);
++ if (!list_empty(&task->tk_task)) {
++ struct rpc_clnt *clnt = task->tk_client;
++ /* Remove from client task list */
++ spin_lock(&clnt->cl_lock);
+ list_del(&task->tk_task);
+- spin_unlock(&rpc_sched_lock);
+-
++ spin_unlock(&clnt->cl_lock);
++ }
+ BUG_ON (RPC_IS_QUEUED(task));
+
+ /* Synchronously delete any running timer */
+@@ -939,29 +933,6 @@
+ rpc_put_task(task);
+ }
+
+-/**
+- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+- * @clnt: pointer to RPC client
+- * @flags: RPC flags
+- * @ops: RPC call ops
+- * @data: user call data
+- */
+-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+- const struct rpc_call_ops *ops,
+- void *data)
+-{
+- struct rpc_task *task;
+- task = rpc_new_task(clnt, flags, ops, data);
+- if (task == NULL) {
+- rpc_release_calldata(ops, data);
+- return ERR_PTR(-ENOMEM);
+- }
+- atomic_inc(&task->tk_count);
+- rpc_execute(task);
+- return task;
+-}
+-EXPORT_SYMBOL(rpc_run_task);
+-
+ /*
+ * Kill all tasks for the given client.
+ * XXX: kill their descendants as well?
+@@ -969,44 +940,25 @@
+ void rpc_killall_tasks(struct rpc_clnt *clnt)
+ {
+ struct rpc_task *rovr;
+- struct list_head *le;
+
+- dprintk("RPC: killing all tasks for client %p\n", clnt);
+
++ if (list_empty(&clnt->cl_tasks))
++ return;
++ dprintk("RPC: killing all tasks for client %p\n", clnt);
+ /*
+ * Spin lock all_tasks to prevent changes...
+ */
+- spin_lock(&rpc_sched_lock);
+- alltask_for_each(rovr, le, &all_tasks) {
++ spin_lock(&clnt->cl_lock);
++ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+ if (! RPC_IS_ACTIVATED(rovr))
+ continue;
+- if (!clnt || rovr->tk_client == clnt) {
++ if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+ rovr->tk_flags |= RPC_TASK_KILLED;
+ rpc_exit(rovr, -EIO);
+ rpc_wake_up_task(rovr);
+ }
+ }
+- spin_unlock(&rpc_sched_lock);
+-}
+-
+-static void rpciod_killall(void)
+-{
+- unsigned long flags;
+-
+- while (!list_empty(&all_tasks)) {
+- clear_thread_flag(TIF_SIGPENDING);
+- rpc_killall_tasks(NULL);
+- flush_workqueue(rpciod_workqueue);
+- if (!list_empty(&all_tasks)) {
+- dprintk("RPC: rpciod_killall: waiting for tasks "
+- "to exit\n");
+- yield();
+- }
+- }
+-
+- spin_lock_irqsave(¤t->sighand->siglock, flags);
+- recalc_sigpending();
+- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
++ spin_unlock(&clnt->cl_lock);
+ }
+
+ /*
+@@ -1018,28 +970,27 @@
+ struct workqueue_struct *wq;
+ int error = 0;
+
++ if (atomic_inc_not_zero(&rpciod_users))
++ return 0;
++
+ mutex_lock(&rpciod_mutex);
+- dprintk("RPC: rpciod_up: users %u\n", rpciod_users);
+- rpciod_users++;
+- if (rpciod_workqueue)
+- goto out;
+- /*
+- * If there's no pid, we should be the first user.
+- */
+- if (rpciod_users > 1)
+- printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users);
++
++ /* Guard against races with rpciod_down() */
++ if (rpciod_workqueue != NULL)
++ goto out_ok;
+ /*
+ * Create the rpciod thread and wait for it to start.
+ */
++ dprintk("RPC: creating workqueue rpciod\n");
+ error = -ENOMEM;
+ wq = create_workqueue("rpciod");
+- if (wq == NULL) {
+- printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
+- rpciod_users--;
++ if (wq == NULL)
+ goto out;
+- }
++
+ rpciod_workqueue = wq;
+ error = 0;
++out_ok:
++ atomic_inc(&rpciod_users);
+ out:
+ mutex_unlock(&rpciod_mutex);
+ return error;
+@@ -1048,58 +999,18 @@
+ void
+ rpciod_down(void)
+ {
+- mutex_lock(&rpciod_mutex);
+- dprintk("RPC: rpciod_down sema %u\n", rpciod_users);
+- if (rpciod_users) {
+- if (--rpciod_users)
+- goto out;
+- } else
+- printk(KERN_WARNING "rpciod_down: no users??\n");
++ if (!atomic_dec_and_test(&rpciod_users))
++ return;
+
+- if (!rpciod_workqueue) {
+- dprintk("RPC: rpciod_down: Nothing to do!\n");
+- goto out;
+- }
+- rpciod_killall();
++ mutex_lock(&rpciod_mutex);
++ dprintk("RPC: destroying workqueue rpciod\n");
+
++ if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) {
+ destroy_workqueue(rpciod_workqueue);
+ rpciod_workqueue = NULL;
+- out:
+- mutex_unlock(&rpciod_mutex);
+-}
+-
+-#ifdef RPC_DEBUG
+-void rpc_show_tasks(void)
+-{
+- struct list_head *le;
+- struct rpc_task *t;
+-
+- spin_lock(&rpc_sched_lock);
+- if (list_empty(&all_tasks)) {
+- spin_unlock(&rpc_sched_lock);
+- return;
+- }
+- printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+- "-rpcwait -action- ---ops--\n");
+- alltask_for_each(t, le, &all_tasks) {
+- const char *rpc_waitq = "none";
+-
+- if (RPC_IS_QUEUED(t))
+- rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+-
+- printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
+- t->tk_pid,
+- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+- t->tk_flags, t->tk_status,
+- t->tk_client,
+- (t->tk_client ? t->tk_client->cl_prog : 0),
+- t->tk_rqstp, t->tk_timeout,
+- rpc_waitq,
+- t->tk_action, t->tk_ops);
+ }
+- spin_unlock(&rpc_sched_lock);
++ mutex_unlock(&rpciod_mutex);
+ }
+-#endif
+
+ void
+ rpc_destroy_mempool(void)
+diff -Nurb linux-2.6.22-570/net/sunrpc/stats.c linux-2.6.22-591/net/sunrpc/stats.c
+--- linux-2.6.22-570/net/sunrpc/stats.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/stats.c 2007-12-21 15:36:15.000000000 -0500
+@@ -21,6 +21,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/svcsock.h>
+ #include <linux/sunrpc/metrics.h>
++#include <net/net_namespace.h>
+
+ #define RPCDBG_FACILITY RPCDBG_MISC
+
+@@ -265,7 +266,7 @@
+ dprintk("RPC: registering /proc/net/rpc\n");
+ if (!proc_net_rpc) {
+ struct proc_dir_entry *ent;
+- ent = proc_mkdir("rpc", proc_net);
++ ent = proc_mkdir("rpc", init_net.proc_net);
+ if (ent) {
+ ent->owner = THIS_MODULE;
+ proc_net_rpc = ent;
+@@ -279,7 +280,7 @@
+ dprintk("RPC: unregistering /proc/net/rpc\n");
+ if (proc_net_rpc) {
+ proc_net_rpc = NULL;
+- remove_proc_entry("net/rpc", NULL);
++ remove_proc_entry("rpc", init_net.proc_net);
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/net/sunrpc/sunrpc_syms.c linux-2.6.22-591/net/sunrpc/sunrpc_syms.c
+--- linux-2.6.22-570/net/sunrpc/sunrpc_syms.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/sunrpc_syms.c 2007-12-21 15:36:12.000000000 -0500
+@@ -28,15 +28,11 @@
+ EXPORT_SYMBOL(rpc_sleep_on);
+ EXPORT_SYMBOL(rpc_wake_up_next);
+ EXPORT_SYMBOL(rpc_wake_up_task);
+-EXPORT_SYMBOL(rpciod_down);
+-EXPORT_SYMBOL(rpciod_up);
+-EXPORT_SYMBOL(rpc_new_task);
+ EXPORT_SYMBOL(rpc_wake_up_status);
+
+ /* RPC client functions */
+ EXPORT_SYMBOL(rpc_clone_client);
+ EXPORT_SYMBOL(rpc_bind_new_program);
+-EXPORT_SYMBOL(rpc_destroy_client);
+ EXPORT_SYMBOL(rpc_shutdown_client);
+ EXPORT_SYMBOL(rpc_killall_tasks);
+ EXPORT_SYMBOL(rpc_call_sync);
+@@ -61,7 +57,7 @@
+ EXPORT_SYMBOL(rpcauth_create);
+ EXPORT_SYMBOL(rpcauth_lookupcred);
+ EXPORT_SYMBOL(rpcauth_lookup_credcache);
+-EXPORT_SYMBOL(rpcauth_free_credcache);
++EXPORT_SYMBOL(rpcauth_destroy_credcache);
+ EXPORT_SYMBOL(rpcauth_init_credcache);
+ EXPORT_SYMBOL(put_rpccred);
+
+@@ -156,6 +152,7 @@
+ cache_register(&ip_map_cache);
+ cache_register(&unix_gid_cache);
+ init_socket_xprt();
++ rpcauth_init_module();
+ out:
+ return err;
+ }
+@@ -163,6 +160,7 @@
+ static void __exit
+ cleanup_sunrpc(void)
+ {
++ rpcauth_remove_module();
+ cleanup_socket_xprt();
+ unregister_rpc_pipefs();
+ rpc_destroy_mempool();
+diff -Nurb linux-2.6.22-570/net/sunrpc/xprt.c linux-2.6.22-591/net/sunrpc/xprt.c
+--- linux-2.6.22-570/net/sunrpc/xprt.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/xprt.c 2007-12-21 15:36:12.000000000 -0500
+@@ -127,7 +127,7 @@
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+ } else
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ }
+
+ /*
+@@ -515,7 +515,7 @@
+ if (xprt_connecting(xprt))
+ xprt_release_write(xprt, NULL);
+ else
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ return;
+ out_abort:
+ spin_unlock(&xprt->transport_lock);
+diff -Nurb linux-2.6.22-570/net/sunrpc/xprtsock.c linux-2.6.22-591/net/sunrpc/xprtsock.c
+--- linux-2.6.22-570/net/sunrpc/xprtsock.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sunrpc/xprtsock.c 2007-12-21 15:36:12.000000000 -0500
+@@ -653,8 +653,7 @@
+
+ dprintk("RPC: xs_destroy xprt %p\n", xprt);
+
+- cancel_delayed_work(&transport->connect_worker);
+- flush_scheduled_work();
++ cancel_rearming_delayed_work(&transport->connect_worker);
+
+ xprt_disconnect(xprt);
+ xs_close(xprt);
+@@ -1001,7 +1000,7 @@
+ /* Try to schedule an autoclose RPC calls */
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+- schedule_work(&xprt->task_cleanup);
++ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ default:
+ xprt_disconnect(xprt);
+ }
+@@ -1410,18 +1409,16 @@
+ dprintk("RPC: xs_connect delayed xprt %p for %lu "
+ "seconds\n",
+ xprt, xprt->reestablish_timeout / HZ);
+- schedule_delayed_work(&transport->connect_worker,
++ queue_delayed_work(rpciod_workqueue,
++ &transport->connect_worker,
+ xprt->reestablish_timeout);
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
+ } else {
+ dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
+- schedule_delayed_work(&transport->connect_worker, 0);
+-
+- /* flush_scheduled_work can sleep... */
+- if (!RPC_IS_ASYNC(task))
+- flush_scheduled_work();
++ queue_delayed_work(rpciod_workqueue,
++ &transport->connect_worker, 0);
+ }
+ }
+
+diff -Nurb linux-2.6.22-570/net/sysctl_net.c linux-2.6.22-591/net/sysctl_net.c
+--- linux-2.6.22-570/net/sysctl_net.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/sysctl_net.c 2007-12-21 15:36:15.000000000 -0500
+@@ -54,3 +54,31 @@
+ #endif
+ { 0 },
+ };
++
++struct ctl_table multi_net_table[] = {
++ {
++ .ctl_name = NET_CORE,
++ .procname = "core",
++ .mode = 0555,
++ .child = multi_core_table,
++ },
++#ifdef CONFIG_INET
++ {
++ .ctl_name = NET_IPV4,
++ .procname = "ipv4",
++ .mode = 0555,
++ .child = multi_ipv4_table,
++ },
++#endif
++ {},
++};
++
++struct ctl_table net_root_table[] = {
++ {
++ .ctl_name = CTL_NET,
++ .procname = "net",
++ .mode = 0555,
++ .child = multi_net_table,
++ },
++ {},
++};
+diff -Nurb linux-2.6.22-570/net/tipc/eth_media.c linux-2.6.22-591/net/tipc/eth_media.c
+--- linux-2.6.22-570/net/tipc/eth_media.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/tipc/eth_media.c 2007-12-21 15:36:15.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/tipc/eth_media.c: Ethernet bearer support for TIPC
+ *
+- * Copyright (c) 2001-2006, Ericsson AB
+- * Copyright (c) 2005-2006, Wind River Systems
++ * Copyright (c) 2001-2007, Ericsson AB
++ * Copyright (c) 2005-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -38,6 +38,7 @@
+ #include <net/tipc/tipc_bearer.h>
+ #include <net/tipc/tipc_msg.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+
+ #define MAX_ETH_BEARERS 2
+ #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI
+@@ -87,6 +88,9 @@
+ /**
+ * recv_msg - handle incoming TIPC message from an Ethernet interface
+ *
++ * Accept only packets explicitly sent to this node, or broadcast packets;
++ * ignores packets sent using Ethernet multicast, and traffic sent to other
++ * nodes (which can happen if interface is running in promiscuous mode).
+ * Routine truncates any Ethernet padding/CRC appended to the message,
+ * and ensures message size matches actual length
+ */
+@@ -97,10 +101,13 @@
+ struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
+ u32 size;
+
++ if (dev->nd_net != &init_net) {
++ kfree_skb(buf);
++ return 0;
++ }
++
+ if (likely(eb_ptr->bearer)) {
+- if (likely(!dev->promiscuity) ||
+- !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+- !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
++ if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
+ size = msg_size((struct tipc_msg *)buf->data);
+ skb_trim(buf, size);
+ if (likely(buf->len == size)) {
+@@ -128,7 +135,7 @@
+
+ /* Find device with specified name */
+
+- for_each_netdev(pdev){
++ for_each_netdev(&init_net, pdev){
+ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
+ dev = pdev;
+ break;
+@@ -191,6 +198,9 @@
+ struct eth_bearer *eb_ptr = ð_bearers[0];
+ struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS];
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ while ((eb_ptr->dev != dev)) {
+ if (++eb_ptr == stop)
+ return NOTIFY_DONE; /* couldn't find device */
+diff -Nurb linux-2.6.22-570/net/tipc/link.c linux-2.6.22-591/net/tipc/link.c
+--- linux-2.6.22-570/net/tipc/link.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/tipc/link.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/tipc/link.c: TIPC link code
+ *
+- * Copyright (c) 1996-2006, Ericsson AB
+- * Copyright (c) 2004-2006, Wind River Systems
++ * Copyright (c) 1996-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -1260,7 +1260,7 @@
+ * (Must not hold any locks while building message.)
+ */
+
+- res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
++ res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
+ !sender->user_port, &buf);
+
+ read_lock_bh(&tipc_net_lock);
+@@ -1271,7 +1271,7 @@
+ if (likely(l_ptr)) {
+ if (likely(buf)) {
+ res = link_send_buf_fast(l_ptr, buf,
+- &sender->max_pkt);
++ &sender->publ.max_pkt);
+ if (unlikely(res < 0))
+ buf_discard(buf);
+ exit:
+@@ -1299,12 +1299,12 @@
+ * then re-try fast path or fragment the message
+ */
+
+- sender->max_pkt = link_max_pkt(l_ptr);
++ sender->publ.max_pkt = link_max_pkt(l_ptr);
+ tipc_node_unlock(node);
+ read_unlock_bh(&tipc_net_lock);
+
+
+- if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
++ if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt)
+ goto again;
+
+ return link_send_sections_long(sender, msg_sect,
+@@ -1357,7 +1357,7 @@
+
+ again:
+ fragm_no = 1;
+- max_pkt = sender->max_pkt - INT_H_SIZE;
++ max_pkt = sender->publ.max_pkt - INT_H_SIZE;
+ /* leave room for tunnel header in case of link changeover */
+ fragm_sz = max_pkt - INT_H_SIZE;
+ /* leave room for fragmentation header in each fragment */
+@@ -1463,7 +1463,7 @@
+ goto reject;
+ }
+ if (link_max_pkt(l_ptr) < max_pkt) {
+- sender->max_pkt = link_max_pkt(l_ptr);
++ sender->publ.max_pkt = link_max_pkt(l_ptr);
+ tipc_node_unlock(node);
+ for (; buf_chain; buf_chain = buf) {
+ buf = buf_chain->next;
+diff -Nurb linux-2.6.22-570/net/tipc/port.c linux-2.6.22-591/net/tipc/port.c
+--- linux-2.6.22-570/net/tipc/port.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/tipc/port.c 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/tipc/port.c: TIPC port code
+ *
+- * Copyright (c) 1992-2006, Ericsson AB
+- * Copyright (c) 2004-2005, Wind River Systems
++ * Copyright (c) 1992-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -239,6 +239,8 @@
+ }
+
+ tipc_port_lock(ref);
++ p_ptr->publ.usr_handle = usr_handle;
++ p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
+ p_ptr->publ.ref = ref;
+ msg = &p_ptr->publ.phdr;
+ msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0);
+@@ -248,11 +250,9 @@
+ msg_set_importance(msg,importance);
+ p_ptr->last_in_seqno = 41;
+ p_ptr->sent = 1;
+- p_ptr->publ.usr_handle = usr_handle;
+ INIT_LIST_HEAD(&p_ptr->wait_list);
+ INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
+ p_ptr->congested_link = NULL;
+- p_ptr->max_pkt = MAX_PKT_DEFAULT;
+ p_ptr->dispatcher = dispatcher;
+ p_ptr->wakeup = wakeup;
+ p_ptr->user_port = NULL;
+@@ -1243,7 +1243,7 @@
+ res = TIPC_OK;
+ exit:
+ tipc_port_unlock(p_ptr);
+- p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
++ p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
+ return res;
+ }
+
+diff -Nurb linux-2.6.22-570/net/tipc/port.h linux-2.6.22-591/net/tipc/port.h
+--- linux-2.6.22-570/net/tipc/port.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/tipc/port.h 2007-12-21 15:36:12.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/tipc/port.h: Include file for TIPC port code
+ *
+- * Copyright (c) 1994-2006, Ericsson AB
+- * Copyright (c) 2004-2005, Wind River Systems
++ * Copyright (c) 1994-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -81,7 +81,6 @@
+ * @acked:
+ * @publications: list of publications for port
+ * @pub_count: total # of publications port has made during its lifetime
+- * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @probing_state:
+ * @probing_interval:
+ * @last_in_seqno:
+@@ -102,7 +101,6 @@
+ u32 acked;
+ struct list_head publications;
+ u32 pub_count;
+- u32 max_pkt;
+ u32 probing_state;
+ u32 probing_interval;
+ u32 last_in_seqno;
+diff -Nurb linux-2.6.22-570/net/tipc/socket.c linux-2.6.22-591/net/tipc/socket.c
+--- linux-2.6.22-570/net/tipc/socket.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/tipc/socket.c 2007-12-21 15:36:15.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+ * net/tipc/socket.c: TIPC socket API
+ *
+- * Copyright (c) 2001-2006, Ericsson AB
+- * Copyright (c) 2004-2006, Wind River Systems
++ * Copyright (c) 2001-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+@@ -162,13 +162,16 @@
+ *
+ * Returns 0 on success, errno otherwise
+ */
+-static int tipc_create(struct socket *sock, int protocol)
++static int tipc_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct tipc_sock *tsock;
+ struct tipc_port *port;
+ struct sock *sk;
+ u32 ref;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (unlikely(protocol != 0))
+ return -EPROTONOSUPPORT;
+
+@@ -198,7 +201,7 @@
+ return -EPROTOTYPE;
+ }
+
+- sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
++ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
+ if (!sk) {
+ tipc_deleteport(ref);
+ return -ENOMEM;
+@@ -607,23 +610,24 @@
+ static int send_stream(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
+ {
++ struct tipc_port *tport;
+ struct msghdr my_msg;
+ struct iovec my_iov;
+ struct iovec *curr_iov;
+ int curr_iovlen;
+ char __user *curr_start;
++ u32 hdr_size;
+ int curr_left;
+ int bytes_to_send;
+ int bytes_sent;
+ int res;
+
+- if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
+- return send_packet(iocb, sock, m, total_len);
+-
+- /* Can only send large data streams if already connected */
++ /* Handle special cases where there is no connection */
+
+ if (unlikely(sock->state != SS_CONNECTED)) {
+- if (sock->state == SS_DISCONNECTING)
++ if (sock->state == SS_UNCONNECTED)
++ return send_packet(iocb, sock, m, total_len);
++ else if (sock->state == SS_DISCONNECTING)
+ return -EPIPE;
+ else
+ return -ENOTCONN;
+@@ -648,17 +652,25 @@
+ my_msg.msg_name = NULL;
+ bytes_sent = 0;
+
++ tport = tipc_sk(sock->sk)->p;
++ hdr_size = msg_hdr_sz(&tport->phdr);
++
+ while (curr_iovlen--) {
+ curr_start = curr_iov->iov_base;
+ curr_left = curr_iov->iov_len;
+
+ while (curr_left) {
+- bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE)
+- ? curr_left : TIPC_MAX_USER_MSG_SIZE;
++ bytes_to_send = tport->max_pkt - hdr_size;
++ if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
++ bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
++ if (curr_left < bytes_to_send)
++ bytes_to_send = curr_left;
+ my_iov.iov_base = curr_start;
+ my_iov.iov_len = bytes_to_send;
+ if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
+- return bytes_sent ? bytes_sent : res;
++ if (bytes_sent != 0)
++ res = bytes_sent;
++ return res;
+ }
+ curr_left -= bytes_to_send;
+ curr_start += bytes_to_send;
+@@ -1363,7 +1375,7 @@
+ }
+ buf = skb_peek(&sock->sk->sk_receive_queue);
+
+- res = tipc_create(newsock, 0);
++ res = tipc_create(sock->sk->sk_net, newsock, 0);
+ if (!res) {
+ struct tipc_sock *new_tsock = tipc_sk(newsock->sk);
+ struct tipc_portid id;
+@@ -1600,33 +1612,6 @@
+ }
+
+ /**
+- * Placeholders for non-implemented functionality
+- *
+- * Returns error code (POSIX-compliant where defined)
+- */
+-
+-static int ioctl(struct socket *s, u32 cmd, unsigned long arg)
+-{
+- return -EINVAL;
+-}
+-
+-static int no_mmap(struct file *file, struct socket *sock,
+- struct vm_area_struct *vma)
+-{
+- return -EINVAL;
+-}
+-static ssize_t no_sendpage(struct socket *sock, struct page *page,
+- int offset, size_t size, int flags)
+-{
+- return -EINVAL;
+-}
+-
+-static int no_skpair(struct socket *s1, struct socket *s2)
+-{
+- return -EOPNOTSUPP;
+-}
+-
+-/**
+ * Protocol switches for the various types of TIPC sockets
+ */
+
+@@ -1636,19 +1621,19 @@
+ .release = release,
+ .bind = bind,
+ .connect = connect,
+- .socketpair = no_skpair,
++ .socketpair = sock_no_socketpair,
+ .accept = accept,
+ .getname = get_name,
+ .poll = poll,
+- .ioctl = ioctl,
++ .ioctl = sock_no_ioctl,
+ .listen = listen,
+ .shutdown = shutdown,
+ .setsockopt = setsockopt,
+ .getsockopt = getsockopt,
+ .sendmsg = send_msg,
+ .recvmsg = recv_msg,
+- .mmap = no_mmap,
+- .sendpage = no_sendpage
++ .mmap = sock_no_mmap,
++ .sendpage = sock_no_sendpage
+ };
+
+ static struct proto_ops packet_ops = {
+@@ -1657,19 +1642,19 @@
+ .release = release,
+ .bind = bind,
+ .connect = connect,
+- .socketpair = no_skpair,
++ .socketpair = sock_no_socketpair,
+ .accept = accept,
+ .getname = get_name,
+ .poll = poll,
+- .ioctl = ioctl,
++ .ioctl = sock_no_ioctl,
+ .listen = listen,
+ .shutdown = shutdown,
+ .setsockopt = setsockopt,
+ .getsockopt = getsockopt,
+ .sendmsg = send_packet,
+ .recvmsg = recv_msg,
+- .mmap = no_mmap,
+- .sendpage = no_sendpage
++ .mmap = sock_no_mmap,
++ .sendpage = sock_no_sendpage
+ };
+
+ static struct proto_ops stream_ops = {
+@@ -1678,19 +1663,19 @@
+ .release = release,
+ .bind = bind,
+ .connect = connect,
+- .socketpair = no_skpair,
++ .socketpair = sock_no_socketpair,
+ .accept = accept,
+ .getname = get_name,
+ .poll = poll,
+- .ioctl = ioctl,
++ .ioctl = sock_no_ioctl,
+ .listen = listen,
+ .shutdown = shutdown,
+ .setsockopt = setsockopt,
+ .getsockopt = getsockopt,
+ .sendmsg = send_stream,
+ .recvmsg = recv_stream,
+- .mmap = no_mmap,
+- .sendpage = no_sendpage
++ .mmap = sock_no_mmap,
++ .sendpage = sock_no_sendpage
+ };
+
+ static struct net_proto_family tipc_family_ops = {
+diff -Nurb linux-2.6.22-570/net/unix/af_unix.c linux-2.6.22-591/net/unix/af_unix.c
+--- linux-2.6.22-570/net/unix/af_unix.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/unix/af_unix.c 2007-12-21 15:36:15.000000000 -0500
+@@ -117,8 +117,8 @@
+ #include <linux/security.h>
+ #include <linux/vs_context.h>
+ #include <linux/vs_limit.h>
++#include <net/net_namespace.h>
+
+-int sysctl_unix_max_dgram_qlen __read_mostly = 10;
+
+ struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+ DEFINE_SPINLOCK(unix_table_lock);
+@@ -245,7 +245,8 @@
+ spin_unlock(&unix_table_lock);
+ }
+
+-static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
++static struct sock *__unix_find_socket_byname(struct net *net,
++ struct sockaddr_un *sunname,
+ int len, int type, unsigned hash)
+ {
+ struct sock *s;
+@@ -254,7 +255,7 @@
+ sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ struct unix_sock *u = unix_sk(s);
+
+- if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT) || (s->sk_net != net))
+ continue;
+ if (u->addr->len == len &&
+ !memcmp(u->addr->name, sunname, len))
+@@ -265,21 +266,22 @@
+ return s;
+ }
+
+-static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
++static inline struct sock *unix_find_socket_byname(struct net *net,
++ struct sockaddr_un *sunname,
+ int len, int type,
+ unsigned hash)
+ {
+ struct sock *s;
+
+ spin_lock(&unix_table_lock);
+- s = __unix_find_socket_byname(sunname, len, type, hash);
++ s = __unix_find_socket_byname(net, sunname, len, type, hash);
+ if (s)
+ sock_hold(s);
+ spin_unlock(&unix_table_lock);
+ return s;
+ }
+
+-static struct sock *unix_find_socket_byinode(struct inode *i)
++static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+ {
+ struct sock *s;
+ struct hlist_node *node;
+@@ -289,6 +291,9 @@
+ &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
+ struct dentry *dentry = unix_sk(s)->dentry;
+
++ if (s->sk_net != net)
++ continue;
++
+ if(dentry && dentry->d_inode == i)
+ {
+ sock_hold(s);
+@@ -571,7 +576,7 @@
+ */
+ static struct lock_class_key af_unix_sk_receive_queue_lock_key;
+
+-static struct sock * unix_create1(struct socket *sock)
++static struct sock * unix_create1(struct net *net, struct socket *sock)
+ {
+ struct sock *sk = NULL;
+ struct unix_sock *u;
+@@ -579,7 +584,7 @@
+ if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
+ goto out;
+
+- sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
++ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+ if (!sk)
+ goto out;
+
+@@ -590,7 +595,7 @@
+ &af_unix_sk_receive_queue_lock_key);
+
+ sk->sk_write_space = unix_write_space;
+- sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
++ sk->sk_max_ack_backlog = net->sysctl_unix_max_dgram_qlen;
+ sk->sk_destruct = unix_sock_destructor;
+ u = unix_sk(sk);
+ u->dentry = NULL;
+@@ -604,7 +609,7 @@
+ return sk;
+ }
+
+-static int unix_create(struct socket *sock, int protocol)
++static int unix_create(struct net *net, struct socket *sock, int protocol)
+ {
+ if (protocol && protocol != PF_UNIX)
+ return -EPROTONOSUPPORT;
+@@ -631,7 +636,7 @@
+ return -ESOCKTNOSUPPORT;
+ }
+
+- return unix_create1(sock) ? 0 : -ENOMEM;
++ return unix_create1(net, sock) ? 0 : -ENOMEM;
+ }
+
+ static int unix_release(struct socket *sock)
+@@ -649,6 +654,7 @@
+ static int unix_autobind(struct socket *sock)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct unix_sock *u = unix_sk(sk);
+ static u32 ordernum = 1;
+ struct unix_address * addr;
+@@ -675,7 +681,7 @@
+ spin_lock(&unix_table_lock);
+ ordernum = (ordernum+1)&0xFFFFF;
+
+- if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
++ if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
+ addr->hash)) {
+ spin_unlock(&unix_table_lock);
+ /* Sanity yield. It is unusual case, but yet... */
+@@ -695,7 +701,8 @@
+ return err;
+ }
+
+-static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
++static struct sock *unix_find_other(struct net *net,
++ struct sockaddr_un *sunname, int len,
+ int type, unsigned hash, int *error)
+ {
+ struct sock *u;
+@@ -713,7 +720,7 @@
+ err = -ECONNREFUSED;
+ if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
+ goto put_fail;
+- u=unix_find_socket_byinode(nd.dentry->d_inode);
++ u=unix_find_socket_byinode(net, nd.dentry->d_inode);
+ if (!u)
+ goto put_fail;
+
+@@ -729,7 +736,7 @@
+ }
+ } else {
+ err = -ECONNREFUSED;
+- u=unix_find_socket_byname(sunname, len, type, hash);
++ u=unix_find_socket_byname(net, sunname, len, type, hash);
+ if (u) {
+ struct dentry *dentry;
+ dentry = unix_sk(u)->dentry;
+@@ -751,6 +758,7 @@
+ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct unix_sock *u = unix_sk(sk);
+ struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ struct dentry * dentry = NULL;
+@@ -825,7 +833,7 @@
+
+ if (!sunaddr->sun_path[0]) {
+ err = -EADDRINUSE;
+- if (__unix_find_socket_byname(sunaddr, addr_len,
++ if (__unix_find_socket_byname(net, sunaddr, addr_len,
+ sk->sk_type, hash)) {
+ unix_release_addr(addr);
+ goto out_unlock;
+@@ -891,6 +899,7 @@
+ int alen, int flags)
+ {
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
+ struct sock *other;
+ unsigned hash;
+@@ -907,7 +916,7 @@
+ goto out;
+
+ restart:
+- other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
++ other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
+ if (!other)
+ goto out;
+
+@@ -987,6 +996,7 @@
+ {
+ struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct unix_sock *u = unix_sk(sk), *newu, *otheru;
+ struct sock *newsk = NULL;
+ struct sock *other = NULL;
+@@ -1015,7 +1025,7 @@
+ err = -ENOMEM;
+
+ /* create new sock for complete connection */
+- newsk = unix_create1(NULL);
++ newsk = unix_create1(sk->sk_net, NULL);
+ if (newsk == NULL)
+ goto out;
+
+@@ -1026,7 +1036,7 @@
+
+ restart:
+ /* Find listening sock. */
+- other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
++ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
+ if (!other)
+ goto out;
+
+@@ -1305,6 +1315,7 @@
+ {
+ struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ struct sock *sk = sock->sk;
++ struct net *net = sk->sk_net;
+ struct unix_sock *u = unix_sk(sk);
+ struct sockaddr_un *sunaddr=msg->msg_name;
+ struct sock *other = NULL;
+@@ -1368,7 +1379,7 @@
+ if (sunaddr == NULL)
+ goto out_free;
+
+- other = unix_find_other(sunaddr, namelen, sk->sk_type,
++ other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
+ hash, &err);
+ if (other==NULL)
+ goto out_free;
+@@ -1974,12 +1985,18 @@
+
+
+ #ifdef CONFIG_PROC_FS
+-static struct sock *unix_seq_idx(int *iter, loff_t pos)
++struct unix_iter_state {
++ struct net *net;
++ int i;
++};
++static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
+ {
+ loff_t off = 0;
+ struct sock *s;
+
+- for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
++ for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
++ if (s->sk_net != iter->net)
++ continue;
+ if (off == pos)
+ return s;
+ ++off;
+@@ -1990,17 +2007,24 @@
+
+ static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++ struct unix_iter_state *iter = seq->private;
+ spin_lock(&unix_table_lock);
+- return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
++ return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
+ }
+
+ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++ struct unix_iter_state *iter = seq->private;
++ struct sock *sk = v;
+ ++*pos;
+
+ if (v == (void *)1)
+- return first_unix_socket(seq->private);
+- return next_unix_socket(seq->private, v);
++ sk = first_unix_socket(&iter->i);
++ else
++ sk = next_unix_socket(&iter->i, sk);
++ while (sk && (sk->sk_net != iter->net))
++ sk = next_unix_socket(&iter->i, sk);
++ return sk;
+ }
+
+ static void unix_seq_stop(struct seq_file *seq, void *v)
+@@ -2064,7 +2088,7 @@
+ {
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+- int *iter = kmalloc(sizeof(int), GFP_KERNEL);
++ struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+
+ if (!iter)
+ goto out;
+@@ -2075,7 +2099,8 @@
+
+ seq = file->private_data;
+ seq->private = iter;
+- *iter = 0;
++ iter->net = get_net(PROC_NET(inode));
++ iter->i = 0;
+ out:
+ return rc;
+ out_kfree:
+@@ -2083,12 +2108,20 @@
+ goto out;
+ }
+
++static int unix_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct unix_iter_state *iter = seq->private;
++ put_net(iter->net);
++ return seq_release_private(inode, file);
++}
++
+ static const struct file_operations unix_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = unix_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release_private,
++ .release = unix_seq_release,
+ };
+
+ #endif
+@@ -2099,6 +2132,33 @@
+ .owner = THIS_MODULE,
+ };
+
++
++static int unix_net_init(struct net *net)
++{
++ int error = -ENOMEM;
++
++ net->sysctl_unix_max_dgram_qlen = 10;
++#ifdef CONFIG_PROC_FS
++ if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
++ goto out;
++#endif
++ unix_sysctl_register(net);
++ error = 0;
++out:
++ return 0;
++}
++
++static void unix_net_exit(struct net *net)
++{
++ unix_sysctl_unregister(net);
++ proc_net_remove(net, "unix");
++}
++
++static struct pernet_operations unix_net_ops = {
++ .init = unix_net_init,
++ .exit = unix_net_exit,
++};
++
+ static int __init af_unix_init(void)
+ {
+ int rc = -1;
+@@ -2114,10 +2174,7 @@
+ }
+
+ sock_register(&unix_family_ops);
+-#ifdef CONFIG_PROC_FS
+- proc_net_fops_create("unix", 0, &unix_seq_fops);
+-#endif
+- unix_sysctl_register();
++ register_pernet_subsys(&unix_net_ops);
+ out:
+ return rc;
+ }
+@@ -2125,9 +2182,8 @@
+ static void __exit af_unix_exit(void)
+ {
+ sock_unregister(PF_UNIX);
+- unix_sysctl_unregister();
+- proc_net_remove("unix");
+ proto_unregister(&unix_proto);
++ unregister_pernet_subsys(&unix_net_ops);
+ }
+
+ module_init(af_unix_init);
+diff -Nurb linux-2.6.22-570/net/unix/sysctl_net_unix.c linux-2.6.22-591/net/unix/sysctl_net_unix.c
+--- linux-2.6.22-570/net/unix/sysctl_net_unix.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/unix/sysctl_net_unix.c 2007-12-21 15:36:15.000000000 -0500
+@@ -14,47 +14,71 @@
+
+ #include <net/af_unix.h>
+
+-static ctl_table unix_table[] = {
++static struct unix_sysctl_table {
++ struct ctl_table_header *sysctl_header;
++ struct ctl_table unix_table[2];
++ struct ctl_table unix_net_table[2];
++ struct ctl_table unix_root_table[2];
++} unix_sysctl = {
++ .unix_table = {
+ {
+ .ctl_name = NET_UNIX_MAX_DGRAM_QLEN,
+ .procname = "max_dgram_qlen",
+- .data = &sysctl_unix_max_dgram_qlen,
++ .data = &init_net.sysctl_unix_max_dgram_qlen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+- { .ctl_name = 0 }
+-};
+-
+-static ctl_table unix_net_table[] = {
++ {}
++ },
++ .unix_net_table = {
+ {
+ .ctl_name = NET_UNIX,
+ .procname = "unix",
+ .mode = 0555,
+- .child = unix_table
++ .child = unix_sysctl.unix_table
+ },
+- { .ctl_name = 0 }
+-};
+-
+-static ctl_table unix_root_table[] = {
++ {}
++ },
++ .unix_root_table = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+- .child = unix_net_table
++ .child = unix_sysctl.unix_net_table
+ },
+- { .ctl_name = 0 }
++ {}
++ }
+ };
+
+-static struct ctl_table_header * unix_sysctl_header;
+-
+-void unix_sysctl_register(void)
++void unix_sysctl_register(struct net *net)
+ {
+- unix_sysctl_header = register_sysctl_table(unix_root_table);
++ struct unix_sysctl_table *table;
++ int i;
++
++ table = kmemdup(&unix_sysctl, sizeof(*table), GFP_KERNEL);
++ if (!table)
++ return;
++ for (i = 0; i < ARRAY_SIZE(table->unix_table) - 1; i++)
++ table->unix_table[i].data += (char *)net - (char *)&init_net;
++
++ table->unix_net_table[0].child = table->unix_table;
++ table->unix_root_table[0].child = table->unix_net_table;
++
++ table->sysctl_header =
++ register_net_sysctl_table(net, table->unix_root_table);
++ if (!table->sysctl_header) {
++ kfree(table);
++ return;
++ }
++ net->unix_sysctl = table;
+ }
+
+-void unix_sysctl_unregister(void)
++void unix_sysctl_unregister(struct net *net)
+ {
+- unregister_sysctl_table(unix_sysctl_header);
++ struct unix_sysctl_table *table = net->unix_sysctl;
++ if (table)
++ unregister_net_sysctl_table(table->sysctl_header);
++ kfree(table);
+ }
+
+diff -Nurb linux-2.6.22-570/net/wanrouter/wanproc.c linux-2.6.22-591/net/wanrouter/wanproc.c
+--- linux-2.6.22-570/net/wanrouter/wanproc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/wanrouter/wanproc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/wanrouter.h> /* WAN router API definitions */
+ #include <linux/seq_file.h>
+ #include <linux/smp_lock.h>
++#include <net/net_namespace.h>
+
+ #include <asm/io.h>
+
+@@ -287,7 +288,7 @@
+ int __init wanrouter_proc_init(void)
+ {
+ struct proc_dir_entry *p;
+- proc_router = proc_mkdir(ROUTER_NAME, proc_net);
++ proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
+ if (!proc_router)
+ goto fail;
+
+@@ -303,7 +304,7 @@
+ fail_stat:
+ remove_proc_entry("config", proc_router);
+ fail_config:
+- remove_proc_entry(ROUTER_NAME, proc_net);
++ remove_proc_entry(ROUTER_NAME, init_net.proc_net);
+ fail:
+ return -ENOMEM;
+ }
+@@ -316,7 +317,7 @@
+ {
+ remove_proc_entry("config", proc_router);
+ remove_proc_entry("status", proc_router);
+- remove_proc_entry(ROUTER_NAME, proc_net);
++ remove_proc_entry(ROUTER_NAME, init_net.proc_net);
+ }
+
+ /*
+diff -Nurb linux-2.6.22-570/net/wireless/wext.c linux-2.6.22-591/net/wireless/wext.c
+--- linux-2.6.22-570/net/wireless/wext.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/wireless/wext.c 2007-12-21 15:36:15.000000000 -0500
+@@ -95,6 +95,7 @@
+ #include <linux/interrupt.h>
+
+ #include <linux/wireless.h> /* Pretty obvious */
++#include <net/net_namespace.h>
+ #include <net/iw_handler.h> /* New driver API */
+ #include <net/netlink.h>
+ #include <net/wext.h>
+@@ -672,7 +673,22 @@
+
+ static int wireless_seq_open(struct inode *inode, struct file *file)
+ {
+- return seq_open(file, &wireless_seq_ops);
++ struct seq_file *seq;
++ int res;
++ res = seq_open(file, &wireless_seq_ops);
++ if (!res) {
++ seq = file->private_data;
++ seq->private = get_net(PROC_NET(inode));
++ }
++ return res;
++}
++
++static int wireless_seq_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct net *net = seq->private;
++ put_net(net);
++ return seq_release(inode, file);
+ }
+
+ static const struct file_operations wireless_seq_fops = {
+@@ -680,17 +696,22 @@
+ .open = wireless_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = wireless_seq_release,
+ };
+
+-int __init wext_proc_init(void)
++int wext_proc_init(struct net *net)
+ {
+ /* Create /proc/net/wireless entry */
+- if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
++ if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+ }
++
++void wext_proc_exit(struct net *net)
++{
++ proc_net_remove(net, "wireless");
++}
+ #endif /* CONFIG_PROC_FS */
+
+ /************************** IOCTL SUPPORT **************************/
+@@ -1010,7 +1031,7 @@
+ * Main IOCTl dispatcher.
+ * Check the type of IOCTL and call the appropriate wrapper...
+ */
+-static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
++static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd)
+ {
+ struct net_device *dev;
+ iw_handler handler;
+@@ -1019,7 +1040,7 @@
+ * The copy_to/from_user() of ifr is also dealt with in there */
+
+ /* Make sure the device exist */
+- if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
++ if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+ return -ENODEV;
+
+ /* A bunch of special cases, then the generic case...
+@@ -1053,7 +1074,7 @@
+ }
+
+ /* entry point from dev ioctl */
+-int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ void __user *arg)
+ {
+ int ret;
+@@ -1065,9 +1086,9 @@
+ && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+- dev_load(ifr->ifr_name);
++ dev_load(net, ifr->ifr_name);
+ rtnl_lock();
+- ret = wireless_process_ioctl(ifr, cmd);
++ ret = wireless_process_ioctl(net, ifr, cmd);
+ rtnl_unlock();
+ if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+@@ -1111,8 +1132,13 @@
+ {
+ struct sk_buff *skb;
+
+- while ((skb = skb_dequeue(&wireless_nlevent_queue)))
+- rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++ while ((skb = skb_dequeue(&wireless_nlevent_queue))) {
++ struct net_device *dev = skb->dev;
++ struct net *net = dev->nd_net;
++ skb->dev = NULL;
++ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++ dev_put(dev);
++ }
+ }
+
+ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+@@ -1173,6 +1199,9 @@
+ kfree_skb(skb);
+ return;
+ }
++ /* Remember the device until we are in process context */
++ dev_hold(dev);
++ skb->dev = dev;
+ NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ skb_queue_tail(&wireless_nlevent_queue, skb);
+ tasklet_schedule(&wireless_nlevent_tasklet);
+diff -Nurb linux-2.6.22-570/net/x25/af_x25.c linux-2.6.22-591/net/x25/af_x25.c
+--- linux-2.6.22-570/net/x25/af_x25.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/net/x25/af_x25.c 2007-12-21 15:36:15.000000000 -0500
+@@ -191,6 +191,9 @@
+ struct net_device *dev = ptr;
+ struct x25_neigh *nb;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (dev->type == ARPHRD_X25
+ #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+ || dev->type == ARPHRD_ETHER
+@@ -466,10 +469,10 @@
+ .obj_size = sizeof(struct x25_sock),
+ };
+
+-static struct sock *x25_alloc_socket(void)
++static struct sock *x25_alloc_socket(struct net *net)
+ {
+ struct x25_sock *x25;
+- struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1);
++ struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1);
+
+ if (!sk)
+ goto out;
+@@ -485,17 +488,20 @@
+ return sk;
+ }
+
+-static int x25_create(struct socket *sock, int protocol)
++static int x25_create(struct net *net, struct socket *sock, int protocol)
+ {
+ struct sock *sk;
+ struct x25_sock *x25;
+ int rc = -ESOCKTNOSUPPORT;
+
++ if (net != &init_net)
++ return -EAFNOSUPPORT;
++
+ if (sock->type != SOCK_SEQPACKET || protocol)
+ goto out;
+
+ rc = -ENOMEM;
+- if ((sk = x25_alloc_socket()) == NULL)
++ if ((sk = x25_alloc_socket(net)) == NULL)
+ goto out;
+
+ x25 = x25_sk(sk);
+@@ -546,7 +552,7 @@
+ if (osk->sk_type != SOCK_SEQPACKET)
+ goto out;
+
+- if ((sk = x25_alloc_socket()) == NULL)
++ if ((sk = x25_alloc_socket(osk->sk_net)) == NULL)
+ goto out;
+
+ x25 = x25_sk(sk);
+diff -Nurb linux-2.6.22-570/net/x25/x25_dev.c linux-2.6.22-591/net/x25/x25_dev.c
+--- linux-2.6.22-570/net/x25/x25_dev.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/x25/x25_dev.c 2007-12-21 15:36:15.000000000 -0500
+@@ -95,6 +95,9 @@
+ struct sk_buff *nskb;
+ struct x25_neigh *nb;
+
++ if (dev->nd_net != &init_net)
++ goto drop;
++
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ if (!nskb)
+ goto drop;
+diff -Nurb linux-2.6.22-570/net/x25/x25_proc.c linux-2.6.22-591/net/x25/x25_proc.c
+--- linux-2.6.22-570/net/x25/x25_proc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/x25/x25_proc.c 2007-12-21 15:36:15.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/init.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/x25.h>
+
+@@ -301,7 +302,7 @@
+ struct proc_dir_entry *p;
+ int rc = -ENOMEM;
+
+- x25_proc_dir = proc_mkdir("x25", proc_net);
++ x25_proc_dir = proc_mkdir("x25", init_net.proc_net);
+ if (!x25_proc_dir)
+ goto out;
+
+@@ -328,7 +329,7 @@
+ out_socket:
+ remove_proc_entry("route", x25_proc_dir);
+ out_route:
+- remove_proc_entry("x25", proc_net);
++ remove_proc_entry("x25", init_net.proc_net);
+ goto out;
+ }
+
+@@ -337,7 +338,7 @@
+ remove_proc_entry("forward", x25_proc_dir);
+ remove_proc_entry("route", x25_proc_dir);
+ remove_proc_entry("socket", x25_proc_dir);
+- remove_proc_entry("x25", proc_net);
++ remove_proc_entry("x25", init_net.proc_net);
+ }
+
+ #else /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-570/net/x25/x25_route.c linux-2.6.22-591/net/x25/x25_route.c
+--- linux-2.6.22-570/net/x25/x25_route.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/x25/x25_route.c 2007-12-21 15:36:15.000000000 -0500
+@@ -129,7 +129,7 @@
+ */
+ struct net_device *x25_dev_get(char *devname)
+ {
+- struct net_device *dev = dev_get_by_name(devname);
++ struct net_device *dev = dev_get_by_name(&init_net, devname);
+
+ if (dev &&
+ (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25
+diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_policy.c linux-2.6.22-591/net/xfrm/xfrm_policy.c
+--- linux-2.6.22-570/net/xfrm/xfrm_policy.c 2007-12-21 15:35:57.000000000 -0500
++++ linux-2.6.22-591/net/xfrm/xfrm_policy.c 2007-12-21 15:36:15.000000000 -0500
+@@ -30,8 +30,6 @@
+
+ #include "xfrm_hash.h"
+
+-int sysctl_xfrm_larval_drop __read_mostly;
+-
+ DEFINE_MUTEX(xfrm_cfg_mutex);
+ EXPORT_SYMBOL(xfrm_cfg_mutex);
+
+@@ -1570,7 +1568,7 @@
+
+ if (unlikely(nx<0)) {
+ err = nx;
+- if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
++ if (err == -EAGAIN && init_net.sysctl_xfrm_larval_drop) {
+ /* EREMOTE tells the caller to generate
+ * a one-shot blackhole route.
+ */
+@@ -1954,8 +1952,8 @@
+ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
+ {
+ while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+- dst->dev = &loopback_dev;
+- dev_hold(&loopback_dev);
++ dst->dev = &init_net.loopback_dev;
++ dev_hold(dst->dev);
+ dev_put(dev);
+ }
+ }
+@@ -2357,6 +2355,11 @@
+
+ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
++ struct net_device *dev = ptr;
++
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ switch (event) {
+ case NETDEV_DOWN:
+ xfrm_flush_bundles();
+diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_state.c linux-2.6.22-591/net/xfrm/xfrm_state.c
+--- linux-2.6.22-570/net/xfrm/xfrm_state.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/xfrm/xfrm_state.c 2007-12-21 15:36:15.000000000 -0500
+@@ -28,14 +28,6 @@
+ struct sock *xfrm_nl;
+ EXPORT_SYMBOL(xfrm_nl);
+
+-u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
+-EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
+-
+-u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
+-EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
+-
+-u32 sysctl_xfrm_acq_expires __read_mostly = 30;
+-
+ /* Each xfrm_state may be linked to two tables:
+
+ 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+@@ -665,8 +657,8 @@
+ h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+ hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+ }
+- x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
+- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
++ x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires;
++ x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ;
+ add_timer(&x->timer);
+ xfrm_state_num++;
+ xfrm_hash_grow_check(x->bydst.next != NULL);
+@@ -815,9 +807,9 @@
+ x->props.family = family;
+ x->props.mode = mode;
+ x->props.reqid = reqid;
+- x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
++ x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires;
+ xfrm_state_hold(x);
+- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
++ x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ;
+ add_timer(&x->timer);
+ hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+ h = xfrm_src_hash(daddr, saddr, family);
+@@ -1775,6 +1767,19 @@
+
+ EXPORT_SYMBOL(xfrm_init_state);
+
++
++static int xfrm_state_pernet_init(struct net *net)
++{
++ net->sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
++ net->sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
++ net->sysctl_xfrm_acq_expires = 30;
++ return 0;
++}
++
++static struct pernet_operations xfrm_state_net_ops = {
++ .init = xfrm_state_pernet_init,
++};
++
+ void __init xfrm_state_init(void)
+ {
+ unsigned int sz;
+@@ -1789,5 +1794,7 @@
+ xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
+
+ INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
++
++ register_pernet_subsys(&xfrm_state_net_ops);
+ }
+
+diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_user.c linux-2.6.22-591/net/xfrm/xfrm_user.c
+--- linux-2.6.22-570/net/xfrm/xfrm_user.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/net/xfrm/xfrm_user.c 2007-12-21 15:36:15.000000000 -0500
+@@ -374,7 +374,8 @@
+ return err;
+ }
+
+-static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
++static struct xfrm_state *xfrm_state_construct(struct net *net,
++ struct xfrm_usersa_info *p,
+ struct rtattr **xfrma,
+ int *errp)
+ {
+@@ -410,9 +411,9 @@
+ goto error;
+
+ x->km.seq = p->seq;
+- x->replay_maxdiff = sysctl_xfrm_aevent_rseqth;
++ x->replay_maxdiff = net->sysctl_xfrm_aevent_rseqth;
+ /* sysctl_xfrm_aevent_etime is in 100ms units */
+- x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M;
++ x->replay_maxage = (net->sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M;
+ x->preplay.bitmap = 0;
+ x->preplay.seq = x->replay.seq+x->replay_maxdiff;
+ x->preplay.oseq = x->replay.oseq +x->replay_maxdiff;
+@@ -436,6 +437,7 @@
+ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
+ {
++ struct net *net = skb->sk->sk_net;
+ struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+ struct xfrm_state *x;
+ int err;
+@@ -445,7 +447,7 @@
+ if (err)
+ return err;
+
+- x = xfrm_state_construct(p, xfrma, &err);
++ x = xfrm_state_construct(net, p, xfrma, &err);
+ if (!x)
+ return err;
+
+@@ -2559,7 +2561,7 @@
+
+ printk(KERN_INFO "Initializing XFRM netlink socket\n");
+
+- nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
++ nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX,
+ xfrm_netlink_rcv, NULL, THIS_MODULE);
+ if (nlsk == NULL)
+ return -ENOMEM;
+diff -Nurb linux-2.6.22-570/rej linux-2.6.22-591/rej
+--- linux-2.6.22-570/rej 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/rej 2007-12-21 15:36:12.000000000 -0500
+@@ -0,0 +1,28 @@
++vi -o ./drivers/dma/ioatdma.c ./drivers/dma/ioatdma.c.rej
++vi -o ./fs/nfs/super.c ./fs/nfs/super.c.rej
++vi -o ./fs/ocfs2/aops.c ./fs/ocfs2/aops.c.rej
++vi -o ./fs/ocfs2/file.c ./fs/ocfs2/file.c.rej
++vi -o ./fs/ocfs2/super.c ./fs/ocfs2/super.c.rej
++vi -o ./fs/proc/base.c ./fs/proc/base.c.rej
++vi -o ./fs/sysfs/file.c ./fs/sysfs/file.c.rej
++vi -o ./fs/sync.c ./fs/sync.c.rej
++vi -o ./include/acpi/processor.h ./include/acpi/processor.h.rej
++vi -o ./include/linux/sunrpc/clnt.h ./include/linux/sunrpc/clnt.h.rej
++vi -o ./include/linux/syscalls.h ./include/linux/syscalls.h.rej
++vi -o ./include/linux/nfs_mount.h ./include/linux/nfs_mount.h.rej
++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej
++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej
++vi -o ./include/linux/fs.h ./include/linux/fs.h.rej
++vi -o ./kernel/timer.c ./kernel/timer.c.rej
++vi -o ./kernel/fork.c ./kernel/fork.c.rej
++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej
++vi -o ./kernel/sys.c ./kernel/sys.c.rej
++vi -o ./kernel/user.c ./kernel/user.c.rej
++vi -o ./kernel/utsname.c ./kernel/utsname.c.rej
++vi -o ./kernel/sched.c ./kernel/sched.c.rej
++vi -o ./kernel/container.c ./kernel/container.c.rej
++vi -o ./mm/memory.c ./mm/memory.c.rej
++vi -o ./mm/hugetlb.c ./mm/hugetlb.c.rej
++vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej
++vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej
++vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej
+diff -Nurb linux-2.6.22-570/scripts/Makefile.build.orig linux-2.6.22-591/scripts/Makefile.build.orig
+--- linux-2.6.22-570/scripts/Makefile.build.orig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/scripts/Makefile.build.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,348 +0,0 @@
+-# ==========================================================================
+-# Building
+-# ==========================================================================
+-
+-src := $(obj)
+-
+-PHONY := __build
+-__build:
+-
+-# Read .config if it exist, otherwise ignore
+--include include/config/auto.conf
+-
+-include scripts/Kbuild.include
+-
+-# The filename Kbuild has precedence over Makefile
+-kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
+-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile)
+-
+-include scripts/Makefile.lib
+-
+-ifdef host-progs
+-ifneq ($(hostprogs-y),$(host-progs))
+-$(warning kbuild: $(obj)/Makefile - Usage of host-progs is deprecated. Please replace with hostprogs-y!)
+-hostprogs-y += $(host-progs)
+-endif
+-endif
+-
+-# Do not include host rules unles needed
+-ifneq ($(hostprogs-y)$(hostprogs-m),)
+-include scripts/Makefile.host
+-endif
+-
+-ifneq ($(KBUILD_SRC),)
+-# Create output directory if not already present
+-_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj))
+-
+-# Create directories for object files if directory does not exist
+-# Needed when obj-y := dir/file.o syntax is used
+-_dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d)))
+-endif
+-
+-
+-ifdef EXTRA_TARGETS
+-$(warning kbuild: $(obj)/Makefile - Usage of EXTRA_TARGETS is obsolete in 2.6. Please fix!)
+-endif
+-
+-ifdef build-targets
+-$(warning kbuild: $(obj)/Makefile - Usage of build-targets is obsolete in 2.6. Please fix!)
+-endif
+-
+-ifdef export-objs
+-$(warning kbuild: $(obj)/Makefile - Usage of export-objs is obsolete in 2.6. Please fix!)
+-endif
+-
+-ifdef O_TARGET
+-$(warning kbuild: $(obj)/Makefile - Usage of O_TARGET := $(O_TARGET) is obsolete in 2.6. Please fix!)
+-endif
+-
+-ifdef L_TARGET
+-$(error kbuild: $(obj)/Makefile - Use of L_TARGET is replaced by lib-y in 2.6. Please fix!)
+-endif
+-
+-ifdef list-multi
+-$(warning kbuild: $(obj)/Makefile - list-multi := $(list-multi) is obsolete in 2.6. Please fix!)
+-endif
+-
+-ifndef obj
+-$(warning kbuild: Makefile.build is included improperly)
+-endif
+-
+-# ===========================================================================
+-
+-ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
+-lib-target := $(obj)/lib.a
+-endif
+-
+-ifneq ($(strip $(obj-y) $(obj-m) $(obj-n) $(obj-) $(lib-target)),)
+-builtin-target := $(obj)/built-in.o
+-endif
+-
+-# We keep a list of all modules in $(MODVERDIR)
+-
+-__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \
+- $(if $(KBUILD_MODULES),$(obj-m)) \
+- $(subdir-ym) $(always)
+- @:
+-
+-# Linus' kernel sanity checking tool
+-ifneq ($(KBUILD_CHECKSRC),0)
+- ifeq ($(KBUILD_CHECKSRC),2)
+- quiet_cmd_force_checksrc = CHECK $<
+- cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ;
+- else
+- quiet_cmd_checksrc = CHECK $<
+- cmd_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ;
+- endif
+-endif
+-
+-
+-# Compile C sources (.c)
+-# ---------------------------------------------------------------------------
+-
+-# Default is built-in, unless we know otherwise
+-modkern_cflags := $(CFLAGS_KERNEL)
+-quiet_modtag := $(empty) $(empty)
+-
+-$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE)
+-
+-$(real-objs-m) : quiet_modtag := [M]
+-$(real-objs-m:.o=.i) : quiet_modtag := [M]
+-$(real-objs-m:.o=.s) : quiet_modtag := [M]
+-$(real-objs-m:.o=.lst): quiet_modtag := [M]
+-
+-$(obj-m) : quiet_modtag := [M]
+-
+-# Default for not multi-part modules
+-modname = $(basetarget)
+-
+-$(multi-objs-m) : modname = $(modname-multi)
+-$(multi-objs-m:.o=.i) : modname = $(modname-multi)
+-$(multi-objs-m:.o=.s) : modname = $(modname-multi)
+-$(multi-objs-m:.o=.lst) : modname = $(modname-multi)
+-$(multi-objs-y) : modname = $(modname-multi)
+-$(multi-objs-y:.o=.i) : modname = $(modname-multi)
+-$(multi-objs-y:.o=.s) : modname = $(modname-multi)
+-$(multi-objs-y:.o=.lst) : modname = $(modname-multi)
+-
+-quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
+-cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $<
+-
+-$(obj)/%.s: $(src)/%.c FORCE
+- $(call if_changed_dep,cc_s_c)
+-
+-quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@
+-cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $<
+-
+-$(obj)/%.i: $(src)/%.c FORCE
+- $(call if_changed_dep,cc_i_c)
+-
+-quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
+-cmd_cc_symtypes_c = \
+- $(CPP) -D__GENKSYMS__ $(c_flags) $< \
+- | $(GENKSYMS) -T $@ >/dev/null; \
+- test -s $@ || rm -f $@
+-
+-$(obj)/%.symtypes : $(src)/%.c FORCE
+- $(call if_changed_dep,cc_symtypes_c)
+-
+-# C (.c) files
+-# The C file is compiled and updated dependency information is generated.
+-# (See cmd_cc_o_c + relevant part of rule_cc_o_c)
+-
+-quiet_cmd_cc_o_c = CC $(quiet_modtag) $@
+-
+-ifndef CONFIG_MODVERSIONS
+-cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+-
+-else
+-# When module versioning is enabled the following steps are executed:
+-# o compile a .tmp_<file>.o from <file>.c
+-# o if .tmp_<file>.o doesn't contain a __ksymtab version, i.e. does
+-# not export symbols, we just rename .tmp_<file>.o to <file>.o and
+-# are done.
+-# o otherwise, we calculate symbol versions using the good old
+-# genksyms on the preprocessed source and postprocess them in a way
+-# that they are usable as a linker script
+-# o generate <file>.o from .tmp_<file>.o using the linker to
+-# replace the unresolved symbols __crc_exported_symbol with
+-# the actual value of the checksum generated by genksyms
+-
+-cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
+-cmd_modversions = \
+- if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
+- $(CPP) -D__GENKSYMS__ $(c_flags) $< \
+- | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \
+- -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \
+- > $(@D)/.tmp_$(@F:.o=.ver); \
+- \
+- $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \
+- -T $(@D)/.tmp_$(@F:.o=.ver); \
+- rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \
+- else \
+- mv -f $(@D)/.tmp_$(@F) $@; \
+- fi;
+-endif
+-
+-define rule_cc_o_c
+- $(call echo-cmd,checksrc) $(cmd_checksrc) \
+- $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \
+- $(cmd_modversions) \
+- scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \
+- $(dot-target).tmp; \
+- rm -f $(depfile); \
+- mv -f $(dot-target).tmp $(dot-target).cmd
+-endef
+-
+-# Built-in and composite module parts
+-$(obj)/%.o: $(src)/%.c FORCE
+- $(call cmd,force_checksrc)
+- $(call if_changed_rule,cc_o_c)
+-
+-# Single-part modules are special since we need to mark them in $(MODVERDIR)
+-
+-$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE
+- $(call cmd,force_checksrc)
+- $(call if_changed_rule,cc_o_c)
+- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
+-
+-quiet_cmd_cc_lst_c = MKLST $@
+- cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
+- $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \
+- System.map $(OBJDUMP) > $@
+-
+-$(obj)/%.lst: $(src)/%.c FORCE
+- $(call if_changed_dep,cc_lst_c)
+-
+-# Compile assembler sources (.S)
+-# ---------------------------------------------------------------------------
+-
+-modkern_aflags := $(AFLAGS_KERNEL)
+-
+-$(real-objs-m) : modkern_aflags := $(AFLAGS_MODULE)
+-$(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE)
+-
+-quiet_cmd_as_s_S = CPP $(quiet_modtag) $@
+-cmd_as_s_S = $(CPP) $(a_flags) -o $@ $<
+-
+-$(obj)/%.s: $(src)/%.S FORCE
+- $(call if_changed_dep,as_s_S)
+-
+-quiet_cmd_as_o_S = AS $(quiet_modtag) $@
+-cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+-
+-$(obj)/%.o: $(src)/%.S FORCE
+- $(call if_changed_dep,as_o_S)
+-
+-targets += $(real-objs-y) $(real-objs-m) $(lib-y)
+-targets += $(extra-y) $(MAKECMDGOALS) $(always)
+-
+-# Linker scripts preprocessor (.lds.S -> .lds)
+-# ---------------------------------------------------------------------------
+-quiet_cmd_cpp_lds_S = LDS $@
+- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $<
+-
+-$(obj)/%.lds: $(src)/%.lds.S FORCE
+- $(call if_changed_dep,cpp_lds_S)
+-
+-# Build the compiled-in targets
+-# ---------------------------------------------------------------------------
+-
+-# To build objects in subdirs, we need to descend into the directories
+-$(sort $(subdir-obj-y)): $(subdir-ym) ;
+-
+-#
+-# Rule to compile a set of .o files into one .o file
+-#
+-ifdef builtin-target
+-quiet_cmd_link_o_target = LD $@
+-# If the list of objects to link is empty, just create an empty built-in.o
+-cmd_link_o_target = $(if $(strip $(obj-y)),\
+- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^),\
+- rm -f $@; $(AR) rcs $@)
+-
+-$(builtin-target): $(obj-y) FORCE
+- $(call if_changed,link_o_target)
+-
+-targets += $(builtin-target)
+-endif # builtin-target
+-
+-#
+-# Rule to compile a set of .o files into one .a file
+-#
+-ifdef lib-target
+-quiet_cmd_link_l_target = AR $@
+-cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(lib-y)
+-
+-$(lib-target): $(lib-y) FORCE
+- $(call if_changed,link_l_target)
+-
+-targets += $(lib-target)
+-endif
+-
+-#
+-# Rule to link composite objects
+-#
+-# Composite objects are specified in kbuild makefile as follows:
+-# <composite-object>-objs := <list of .o files>
+-# or
+-# <composite-object>-y := <list of .o files>
+-link_multi_deps = \
+-$(filter $(addprefix $(obj)/, \
+-$($(subst $(obj)/,,$(@:.o=-objs))) \
+-$($(subst $(obj)/,,$(@:.o=-y)))), $^)
+-
+-quiet_cmd_link_multi-y = LD $@
+-cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps)
+-
+-quiet_cmd_link_multi-m = LD [M] $@
+-cmd_link_multi-m = $(LD) $(ld_flags) $(LDFLAGS_MODULE) -o $@ $(link_multi_deps)
+-
+-# We would rather have a list of rules like
+-# foo.o: $(foo-objs)
+-# but that's not so easy, so we rather make all composite objects depend
+-# on the set of all their parts
+-$(multi-used-y) : %.o: $(multi-objs-y) FORCE
+- $(call if_changed,link_multi-y)
+-
+-$(multi-used-m) : %.o: $(multi-objs-m) FORCE
+- $(call if_changed,link_multi-m)
+- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod)
+-
+-targets += $(multi-used-y) $(multi-used-m)
+-
+-
+-# Descending
+-# ---------------------------------------------------------------------------
+-
+-PHONY += $(subdir-ym)
+-$(subdir-ym):
+- $(Q)$(MAKE) $(build)=$@
+-
+-# Add FORCE to the prequisites of a target to force it to be always rebuilt.
+-# ---------------------------------------------------------------------------
+-
+-PHONY += FORCE
+-
+-FORCE:
+-
+-# Read all saved command lines and dependencies for the $(targets) we
+-# may be building above, using $(if_changed{,_dep}). As an
+-# optimization, we don't need to read them if the target does not
+-# exist, we will rebuild anyway in that case.
+-
+-targets := $(wildcard $(sort $(targets)))
+-cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+-
+-ifneq ($(cmd_files),)
+- include $(cmd_files)
+-endif
+-
+-
+-# Declare the contents of the .PHONY variable as phony. We keep that
+-# information in a variable se we can use it in if_changed and friends.
+-
+-.PHONY: $(PHONY)
+diff -Nurb linux-2.6.22-570/scripts/Makefile.modpost.orig linux-2.6.22-591/scripts/Makefile.modpost.orig
+--- linux-2.6.22-570/scripts/Makefile.modpost.orig 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/scripts/Makefile.modpost.orig 1969-12-31 19:00:00.000000000 -0500
+@@ -1,132 +0,0 @@
+-# ===========================================================================
+-# Module versions
+-# ===========================================================================
+-#
+-# Stage one of module building created the following:
+-# a) The individual .o files used for the module
+-# b) A <module>.o file which is the .o files above linked together
+-# c) A <module>.mod file in $(MODVERDIR)/, listing the name of the
+-# the preliminary <module>.o file, plus all .o files
+-
+-# Stage 2 is handled by this file and does the following
+-# 1) Find all modules from the files listed in $(MODVERDIR)/
+-# 2) modpost is then used to
+-# 3) create one <module>.mod.c file pr. module
+-# 4) create one Module.symvers file with CRC for all exported symbols
+-# 5) compile all <module>.mod.c files
+-# 6) final link of the module to a <module.ko> file
+-
+-# Step 3 is used to place certain information in the module's ELF
+-# section, including information such as:
+-# Version magic (see include/vermagic.h for full details)
+-# - Kernel release
+-# - SMP is CONFIG_SMP
+-# - PREEMPT is CONFIG_PREEMPT
+-# - GCC Version
+-# Module info
+-# - Module version (MODULE_VERSION)
+-# - Module alias'es (MODULE_ALIAS)
+-# - Module license (MODULE_LICENSE)
+-# - See include/linux/module.h for more details
+-
+-# Step 4 is solely used to allow module versioning in external modules,
+-# where the CRC of each module is retrieved from the Module.symers file.
+-
+-# KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined
+-# symbols in the final module linking stage
+-# KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
+-# This is solely usefull to speed up test compiles
+-PHONY := _modpost
+-_modpost: __modpost
+-
+-include include/config/auto.conf
+-include scripts/Kbuild.include
+-include scripts/Makefile.lib
+-
+-kernelsymfile := $(objtree)/Module.symvers
+-modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+-
+-# Step 1), find all modules listed in $(MODVERDIR)/
+-__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
+-modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+-
+-# Stop after building .o files if NOFINAL is set. Makes compile tests quicker
+-_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules))
+-
+-
+-# Step 2), invoke modpost
+-# Includes step 3,4
+-quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
+- cmd_modpost = scripts/mod/modpost \
+- $(if $(CONFIG_MODVERSIONS),-m) \
+- $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \
+- $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \
+- $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \
+- $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
+- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
+-
+-PHONY += __modpost
+-__modpost: $(modules:.ko=.o) FORCE
+- $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^)
+-
+-quiet_cmd_kernel-mod = MODPOST $@
+- cmd_kernel-mod = $(cmd_modpost) $(KBUILD_VMLINUX_OBJS)
+-
+-PHONY += vmlinux
+-vmlinux: FORCE
+- $(call cmd,kernel-mod)
+-
+-# Declare generated files as targets for modpost
+-$(symverfile): __modpost ;
+-$(modules:.ko=.mod.c): __modpost ;
+-
+-
+-# Step 5), compile all *.mod.c files
+-
+-# modname is set to make c_flags define KBUILD_MODNAME
+-modname = $(notdir $(@:.mod.o=))
+-
+-quiet_cmd_cc_o_c = CC $@
+- cmd_cc_o_c = $(CC) $(c_flags) $(CFLAGS_MODULE) \
+- -c -o $@ $<
+-
+-$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
+- $(call if_changed_dep,cc_o_c)
+-
+-targets += $(modules:.ko=.mod.o)
+-
+-# Step 6), final link of the modules
+-quiet_cmd_ld_ko_o = LD [M] $@
+- cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \
+- $(filter-out FORCE,$^)
+-
+-$(modules): %.ko :%.o %.mod.o FORCE
+- $(call if_changed,ld_ko_o)
+-
+-targets += $(modules)
+-
+-
+-# Add FORCE to the prequisites of a target to force it to be always rebuilt.
+-# ---------------------------------------------------------------------------
+-
+-PHONY += FORCE
+-
+-FORCE:
+-
+-# Read all saved command lines and dependencies for the $(targets) we
+-# may be building above, using $(if_changed{,_dep}). As an
+-# optimization, we don't need to read them if the target does not
+-# exist, we will rebuild anyway in that case.
+-
+-targets := $(wildcard $(sort $(targets)))
+-cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+-
+-ifneq ($(cmd_files),)
+- include $(cmd_files)
+-endif
+-
+-
+-# Declare the contents of the .PHONY variable as phony. We keep that
+-# information in a variable se we can use it in if_changed and friends.
+-
+-.PHONY: $(PHONY)
+diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-591/security/commoncap.c
+--- linux-2.6.22-570/security/commoncap.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/security/commoncap.c 2007-12-21 15:36:13.000000000 -0500
+@@ -150,7 +150,7 @@
+
+ if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
+ !cap_issubset (new_permitted, current->cap_permitted)) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+
+ if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
+ if (!capable(CAP_SETUID)) {
+diff -Nurb linux-2.6.22-570/security/dummy.c linux-2.6.22-591/security/dummy.c
+--- linux-2.6.22-570/security/dummy.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/security/dummy.c 2007-12-21 15:36:13.000000000 -0500
+@@ -131,7 +131,7 @@
+ static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
+ {
+ if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) {
+- current->mm->dumpable = suid_dumpable;
++ set_dumpable(current->mm, suid_dumpable);
+
+ if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) {
+ bprm->e_uid = current->uid;
+@@ -421,8 +421,12 @@
+
+ static int dummy_file_mmap (struct file *file, unsigned long reqprot,
+ unsigned long prot,
+- unsigned long flags)
++ unsigned long flags,
++ unsigned long addr,
++ unsigned long addr_only)
+ {
++ if (addr < mmap_min_addr)
++ return -EACCES;
+ return 0;
+ }
+
+diff -Nurb linux-2.6.22-570/security/keys/request_key.c linux-2.6.22-591/security/keys/request_key.c
+--- linux-2.6.22-570/security/keys/request_key.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/keys/request_key.c 2007-12-21 15:36:13.000000000 -0500
+@@ -108,7 +108,8 @@
+ argv[i] = NULL;
+
+ /* do it */
+- ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1);
++ ret = call_usermodehelper_keys(argv[0], argv, envp, keyring,
++ UMH_WAIT_PROC);
+
+ error_link:
+ key_put(keyring);
+diff -Nurb linux-2.6.22-570/security/security.c linux-2.6.22-591/security/security.c
+--- linux-2.6.22-570/security/security.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/security.c 2007-12-21 15:36:13.000000000 -0500
+@@ -24,6 +24,7 @@
+ extern void security_fixup_ops(struct security_operations *ops);
+
+ struct security_operations *security_ops; /* Initialized to NULL */
++unsigned long mmap_min_addr; /* 0 means no protection */
+
+ static inline int verify(struct security_operations *ops)
+ {
+@@ -176,4 +177,5 @@
+ EXPORT_SYMBOL_GPL(unregister_security);
+ EXPORT_SYMBOL_GPL(mod_reg_security);
+ EXPORT_SYMBOL_GPL(mod_unreg_security);
++EXPORT_SYMBOL_GPL(mmap_min_addr);
+ EXPORT_SYMBOL(security_ops);
+diff -Nurb linux-2.6.22-570/security/selinux/avc.c linux-2.6.22-591/security/selinux/avc.c
+--- linux-2.6.22-570/security/selinux/avc.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/avc.c 2007-12-21 15:36:13.000000000 -0500
+@@ -586,7 +586,7 @@
+ }
+ }
+ if (inode)
+- audit_log_format(ab, " dev=%s ino=%ld",
++ audit_log_format(ab, " dev=%s ino=%lu",
+ inode->i_sb->s_id,
+ inode->i_ino);
+ break;
+@@ -832,6 +832,7 @@
+ * @tsid: target security identifier
+ * @tclass: target security class
+ * @requested: requested permissions, interpreted based on @tclass
++ * @flags: AVC_STRICT or 0
+ * @avd: access vector decisions
+ *
+ * Check the AVC to determine whether the @requested permissions are granted
+@@ -847,6 +848,7 @@
+ */
+ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
+ u16 tclass, u32 requested,
++ unsigned flags,
+ struct av_decision *avd)
+ {
+ struct avc_node *node;
+@@ -874,7 +876,7 @@
+ denied = requested & ~(p_ae->avd.allowed);
+
+ if (!requested || denied) {
+- if (selinux_enforcing)
++ if (selinux_enforcing || (flags & AVC_STRICT))
+ rc = -EACCES;
+ else
+ if (node)
+@@ -909,7 +911,7 @@
+ struct av_decision avd;
+ int rc;
+
+- rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, &avd);
++ rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
+ avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
+ return rc;
+ }
+diff -Nurb linux-2.6.22-570/security/selinux/hooks.c linux-2.6.22-591/security/selinux/hooks.c
+--- linux-2.6.22-570/security/selinux/hooks.c 2007-12-21 15:35:59.000000000 -0500
++++ linux-2.6.22-591/security/selinux/hooks.c 2007-12-21 15:36:15.000000000 -0500
+@@ -111,6 +111,9 @@
+ /* Original (dummy) security module. */
+ static struct security_operations *original_ops = NULL;
+
++/* Did we enable minimum mmap address checking? */
++static int enabled_mmap_min_addr;
++
+ /* Minimal support for a secondary security module,
+ just to allow the use of the dummy or capability modules.
+ The owlsm module can alternatively be used as a secondary
+@@ -1593,6 +1596,7 @@
+ rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
+ SECCLASS_CAPABILITY,
+ CAP_TO_MASK(CAP_SYS_ADMIN),
++ 0,
+ NULL);
+
+ if (rc == 0)
+@@ -2570,12 +2574,16 @@
+ }
+
+ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
+- unsigned long prot, unsigned long flags)
++ unsigned long prot, unsigned long flags,
++ unsigned long addr, unsigned long addr_only)
+ {
+- int rc;
++ int rc = 0;
++ u32 sid = ((struct task_security_struct*)(current->security))->sid;
+
+- rc = secondary_ops->file_mmap(file, reqprot, prot, flags);
+- if (rc)
++ if (addr < mmap_min_addr)
++ rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
++ MEMPROTECT__MMAP_ZERO, NULL);
++ if (rc || addr_only)
+ return rc;
+
+ if (selinux_checkreqprot)
+@@ -3223,8 +3231,8 @@
+ /* Range of port numbers used to automatically bind.
+ Need to determine whether we should perform a name_bind
+ permission check between the socket and the port number. */
+-#define ip_local_port_range_0 sysctl_local_port_range[0]
+-#define ip_local_port_range_1 sysctl_local_port_range[1]
++#define ip_local_port_range_0 (sk->sk_net->sysctl_local_port_range[0])
++#define ip_local_port_range_1 (sk->sk_net->sysctl_local_port_range[1])
+
+ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+ {
+@@ -3968,6 +3976,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET);
+ }
+
+@@ -3979,6 +3991,10 @@
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+ {
++ /* Only filter packets in the initial network namespace */
++ if ((in?in:out)->nd_net != &init_net)
++ return NF_ACCEPT;
++
+ return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6);
+ }
+
+@@ -4628,7 +4644,7 @@
+ if (p->ptrace & PT_PTRACED) {
+ error = avc_has_perm_noaudit(tsec->ptrace_sid, sid,
+ SECCLASS_PROCESS,
+- PROCESS__PTRACE, &avd);
++ PROCESS__PTRACE, 0, &avd);
+ if (!error)
+ tsec->sid = sid;
+ task_unlock(p);
+@@ -4910,6 +4926,16 @@
+ sel_inode_cache = kmem_cache_create("selinux_inode_security",
+ sizeof(struct inode_security_struct),
+ 0, SLAB_PANIC, NULL, NULL);
++
++ /*
++ * Tasks cannot mmap below this without the mmap_zero permission.
++ * If not enabled already, do so by setting it to 64KB.
++ */
++ if (mmap_min_addr == 0) {
++ enabled_mmap_min_addr = 1;
++ mmap_min_addr = 65536;
++ }
++
+ avc_init();
+
+ original_ops = secondary_ops = security_ops;
+@@ -5060,6 +5086,10 @@
+ selinux_disabled = 1;
+ selinux_enabled = 0;
+
++ /* Disable minimum mmap address check only if we enabled it */
++ if (enabled_mmap_min_addr)
++ mmap_min_addr = 0;
++
+ /* Reset security_ops to the secondary module, dummy or capability. */
+ security_ops = secondary_ops;
+
+diff -Nurb linux-2.6.22-570/security/selinux/include/av_perm_to_string.h linux-2.6.22-591/security/selinux/include/av_perm_to_string.h
+--- linux-2.6.22-570/security/selinux/include/av_perm_to_string.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/av_perm_to_string.h 2007-12-21 15:36:13.000000000 -0500
+@@ -158,3 +158,4 @@
+ S_(SECCLASS_KEY, KEY__CREATE, "create")
+ S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind")
+ S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect")
++ S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero")
+diff -Nurb linux-2.6.22-570/security/selinux/include/av_permissions.h linux-2.6.22-591/security/selinux/include/av_permissions.h
+--- linux-2.6.22-570/security/selinux/include/av_permissions.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/av_permissions.h 2007-12-21 15:36:13.000000000 -0500
+@@ -823,3 +823,4 @@
+ #define DCCP_SOCKET__NAME_BIND 0x00200000UL
+ #define DCCP_SOCKET__NODE_BIND 0x00400000UL
+ #define DCCP_SOCKET__NAME_CONNECT 0x00800000UL
++#define MEMPROTECT__MMAP_ZERO 0x00000001UL
+diff -Nurb linux-2.6.22-570/security/selinux/include/avc.h linux-2.6.22-591/security/selinux/include/avc.h
+--- linux-2.6.22-570/security/selinux/include/avc.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/avc.h 2007-12-21 15:36:13.000000000 -0500
+@@ -102,8 +102,10 @@
+ u16 tclass, u32 requested,
+ struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+
++#define AVC_STRICT 1 /* Ignore permissive mode. */
+ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
+ u16 tclass, u32 requested,
++ unsigned flags,
+ struct av_decision *avd);
+
+ int avc_has_perm(u32 ssid, u32 tsid,
+diff -Nurb linux-2.6.22-570/security/selinux/include/class_to_string.h linux-2.6.22-591/security/selinux/include/class_to_string.h
+--- linux-2.6.22-570/security/selinux/include/class_to_string.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/class_to_string.h 2007-12-21 15:36:13.000000000 -0500
+@@ -63,3 +63,4 @@
+ S_("key")
+ S_(NULL)
+ S_("dccp_socket")
++ S_("memprotect")
+diff -Nurb linux-2.6.22-570/security/selinux/include/flask.h linux-2.6.22-591/security/selinux/include/flask.h
+--- linux-2.6.22-570/security/selinux/include/flask.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/flask.h 2007-12-21 15:36:13.000000000 -0500
+@@ -49,6 +49,7 @@
+ #define SECCLASS_PACKET 57
+ #define SECCLASS_KEY 58
+ #define SECCLASS_DCCP_SOCKET 60
++#define SECCLASS_MEMPROTECT 61
+
+ /*
+ * Security identifier indices for initial entities
+diff -Nurb linux-2.6.22-570/security/selinux/include/security.h linux-2.6.22-591/security/selinux/include/security.h
+--- linux-2.6.22-570/security/selinux/include/security.h 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/include/security.h 2007-12-21 15:36:13.000000000 -0500
+@@ -41,6 +41,7 @@
+
+ int security_load_policy(void * data, size_t len);
+
++#define SEL_VEC_MAX 32
+ struct av_decision {
+ u32 allowed;
+ u32 decided;
+@@ -87,6 +88,9 @@
+
+ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
+
++int security_get_classes(char ***classes, int *nclasses);
++int security_get_permissions(char *class, char ***perms, int *nperms);
++
+ #define SECURITY_FS_USE_XATTR 1 /* use xattr */
+ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */
+ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */
+diff -Nurb linux-2.6.22-570/security/selinux/netif.c linux-2.6.22-591/security/selinux/netif.c
+--- linux-2.6.22-570/security/selinux/netif.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/netif.c 2007-12-21 15:36:15.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/notifier.h>
+ #include <linux/netdevice.h>
+ #include <linux/rcupdate.h>
++#include <net/net_namespace.h>
+
+ #include "security.h"
+ #include "objsec.h"
+@@ -234,6 +235,9 @@
+ {
+ struct net_device *dev = ptr;
+
++ if (dev->nd_net != &init_net)
++ return NOTIFY_DONE;
++
+ if (event == NETDEV_DOWN)
+ sel_netif_kill(dev);
+
+diff -Nurb linux-2.6.22-570/security/selinux/netlink.c linux-2.6.22-591/security/selinux/netlink.c
+--- linux-2.6.22-570/security/selinux/netlink.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/netlink.c 2007-12-21 15:36:15.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/selinux_netlink.h>
++#include <net/net_namespace.h>
+
+ static struct sock *selnl;
+
+@@ -104,8 +105,8 @@
+
+ static int __init selnl_init(void)
+ {
+- selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL,
+- THIS_MODULE);
++ selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX,
++ SELNLGRP_MAX, NULL, NULL, THIS_MODULE);
+ if (selnl == NULL)
+ panic("SELinux: Cannot create netlink socket.");
+ netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);
+diff -Nurb linux-2.6.22-570/security/selinux/selinuxfs.c linux-2.6.22-591/security/selinux/selinuxfs.c
+--- linux-2.6.22-570/security/selinux/selinuxfs.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/selinuxfs.c 2007-12-21 15:36:13.000000000 -0500
+@@ -67,6 +67,10 @@
+ static int bool_num = 0;
+ static int *bool_pending_values = NULL;
+
++/* global data for classes */
++static struct dentry *class_dir = NULL;
++static unsigned long last_class_ino;
++
+ extern void selnl_notify_setenforce(int val);
+
+ /* Check whether a task is allowed to use a security operation. */
+@@ -106,6 +110,7 @@
+
+ #define SEL_INITCON_INO_OFFSET 0x01000000
+ #define SEL_BOOL_INO_OFFSET 0x02000000
++#define SEL_CLASS_INO_OFFSET 0x04000000
+ #define SEL_INO_MASK 0x00ffffff
+
+ #define TMPBUFLEN 12
+@@ -237,6 +242,11 @@
+
+ /* declaration for sel_write_load */
+ static int sel_make_bools(void);
++static int sel_make_classes(void);
++
++/* declaration for sel_make_class_dirs */
++static int sel_make_dir(struct inode *dir, struct dentry *dentry,
++ unsigned long *ino);
+
+ static ssize_t sel_read_mls(struct file *filp, char __user *buf,
+ size_t count, loff_t *ppos)
+@@ -287,10 +297,18 @@
+ goto out;
+
+ ret = sel_make_bools();
++ if (ret) {
++ length = ret;
++ goto out1;
++ }
++
++ ret = sel_make_classes();
+ if (ret)
+ length = ret;
+ else
+ length = count;
++
++out1:
+ audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
+ "policy loaded auid=%u",
+ audit_get_loginuid(current->audit_context));
+@@ -940,9 +958,8 @@
+ .write = sel_commit_bools_write,
+ };
+
+-/* delete booleans - partial revoke() from
+- * fs/proc/generic.c proc_kill_inodes */
+-static void sel_remove_bools(struct dentry *de)
++/* partial revoke() from fs/proc/generic.c proc_kill_inodes */
++static void sel_remove_entries(struct dentry *de)
+ {
+ struct list_head *p, *node;
+ struct super_block *sb = de->d_sb;
+@@ -998,7 +1015,7 @@
+ kfree(bool_pending_values);
+ bool_pending_values = NULL;
+
+- sel_remove_bools(dir);
++ sel_remove_entries(dir);
+
+ if (!(page = (char*)get_zeroed_page(GFP_KERNEL)))
+ return -ENOMEM;
+@@ -1048,7 +1065,7 @@
+ return ret;
+ err:
+ kfree(values);
+- sel_remove_bools(dir);
++ sel_remove_entries(dir);
+ ret = -ENOMEM;
+ goto out;
+ }
+@@ -1294,7 +1311,227 @@
+ return ret;
+ }
+
+-static int sel_make_dir(struct inode *dir, struct dentry *dentry)
++static inline unsigned int sel_div(unsigned long a, unsigned long b)
++{
++ return a / b - (a % b < 0);
++}
++
++static inline unsigned long sel_class_to_ino(u16 class)
++{
++ return (class * (SEL_VEC_MAX + 1)) | SEL_CLASS_INO_OFFSET;
++}
++
++static inline u16 sel_ino_to_class(unsigned long ino)
++{
++ return sel_div(ino & SEL_INO_MASK, SEL_VEC_MAX + 1);
++}
++
++static inline unsigned long sel_perm_to_ino(u16 class, u32 perm)
++{
++ return (class * (SEL_VEC_MAX + 1) + perm) | SEL_CLASS_INO_OFFSET;
++}
++
++static inline u32 sel_ino_to_perm(unsigned long ino)
++{
++ return (ino & SEL_INO_MASK) % (SEL_VEC_MAX + 1);
++}
++
++static ssize_t sel_read_class(struct file * file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ ssize_t rc, len;
++ char *page;
++ unsigned long ino = file->f_path.dentry->d_inode->i_ino;
++
++ page = (char *)__get_free_page(GFP_KERNEL);
++ if (!page) {
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino));
++ rc = simple_read_from_buffer(buf, count, ppos, page, len);
++ free_page((unsigned long)page);
++out:
++ return rc;
++}
++
++static const struct file_operations sel_class_ops = {
++ .read = sel_read_class,
++};
++
++static ssize_t sel_read_perm(struct file * file, char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ ssize_t rc, len;
++ char *page;
++ unsigned long ino = file->f_path.dentry->d_inode->i_ino;
++
++ page = (char *)__get_free_page(GFP_KERNEL);
++ if (!page) {
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ len = snprintf(page, PAGE_SIZE,"%d", sel_ino_to_perm(ino));
++ rc = simple_read_from_buffer(buf, count, ppos, page, len);
++ free_page((unsigned long)page);
++out:
++ return rc;
++}
++
++static const struct file_operations sel_perm_ops = {
++ .read = sel_read_perm,
++};
++
++static int sel_make_perm_files(char *objclass, int classvalue,
++ struct dentry *dir)
++{
++ int i, rc = 0, nperms;
++ char **perms;
++
++ rc = security_get_permissions(objclass, &perms, &nperms);
++ if (rc)
++ goto out;
++
++ for (i = 0; i < nperms; i++) {
++ struct inode *inode;
++ struct dentry *dentry;
++
++ dentry = d_alloc_name(dir, perms[i]);
++ if (!dentry) {
++ rc = -ENOMEM;
++ goto out1;
++ }
++
++ inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
++ if (!inode) {
++ rc = -ENOMEM;
++ goto out1;
++ }
++ inode->i_fop = &sel_perm_ops;
++ /* i+1 since perm values are 1-indexed */
++ inode->i_ino = sel_perm_to_ino(classvalue, i+1);
++ d_add(dentry, inode);
++ }
++
++out1:
++ for (i = 0; i < nperms; i++)
++ kfree(perms[i]);
++ kfree(perms);
++out:
++ return rc;
++}
++
++static int sel_make_class_dir_entries(char *classname, int index,
++ struct dentry *dir)
++{
++ struct dentry *dentry = NULL;
++ struct inode *inode = NULL;
++ int rc;
++
++ dentry = d_alloc_name(dir, "index");
++ if (!dentry) {
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
++ if (!inode) {
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ inode->i_fop = &sel_class_ops;
++ inode->i_ino = sel_class_to_ino(index);
++ d_add(dentry, inode);
++
++ dentry = d_alloc_name(dir, "perms");
++ if (!dentry) {
++ rc = -ENOMEM;
++ goto out;
++ }
++
++ rc = sel_make_dir(dir->d_inode, dentry, &last_class_ino);
++ if (rc)
++ goto out;
++
++ rc = sel_make_perm_files(classname, index, dentry);
++
++out:
++ return rc;
++}
++
++static void sel_remove_classes(void)
++{
++ struct list_head *class_node;
++
++ list_for_each(class_node, &class_dir->d_subdirs) {
++ struct dentry *class_subdir = list_entry(class_node,
++ struct dentry, d_u.d_child);
++ struct list_head *class_subdir_node;
++
++ list_for_each(class_subdir_node, &class_subdir->d_subdirs) {
++ struct dentry *d = list_entry(class_subdir_node,
++ struct dentry, d_u.d_child);
++
++ if (d->d_inode)
++ if (d->d_inode->i_mode & S_IFDIR)
++ sel_remove_entries(d);
++ }
++
++ sel_remove_entries(class_subdir);
++ }
++
++ sel_remove_entries(class_dir);
++}
++
++static int sel_make_classes(void)
++{
++ int rc = 0, nclasses, i;
++ char **classes;
++
++ /* delete any existing entries */
++ sel_remove_classes();
++
++ rc = security_get_classes(&classes, &nclasses);
++ if (rc < 0)
++ goto out;
++
++ /* +2 since classes are 1-indexed */
++ last_class_ino = sel_class_to_ino(nclasses+2);
++
++ for (i = 0; i < nclasses; i++) {
++ struct dentry *class_name_dir;
++
++ class_name_dir = d_alloc_name(class_dir, classes[i]);
++ if (!class_name_dir) {
++ rc = -ENOMEM;
++ goto out1;
++ }
++
++ rc = sel_make_dir(class_dir->d_inode, class_name_dir,
++ &last_class_ino);
++ if (rc)
++ goto out1;
++
++ /* i+1 since class values are 1-indexed */
++ rc = sel_make_class_dir_entries(classes[i], i+1,
++ class_name_dir);
++ if (rc)
++ goto out1;
++ }
++
++out1:
++ for (i = 0; i < nclasses; i++)
++ kfree(classes[i]);
++ kfree(classes);
++out:
++ return rc;
++}
++
++static int sel_make_dir(struct inode *dir, struct dentry *dentry,
++ unsigned long *ino)
+ {
+ int ret = 0;
+ struct inode *inode;
+@@ -1306,7 +1543,7 @@
+ }
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+- inode->i_ino = ++sel_last_ino;
++ inode->i_ino = ++(*ino);
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_add(dentry, inode);
+@@ -1352,7 +1589,7 @@
+ goto err;
+ }
+
+- ret = sel_make_dir(root_inode, dentry);
++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ if (ret)
+ goto err;
+
+@@ -1385,7 +1622,7 @@
+ goto err;
+ }
+
+- ret = sel_make_dir(root_inode, dentry);
++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ if (ret)
+ goto err;
+
+@@ -1399,7 +1636,7 @@
+ goto err;
+ }
+
+- ret = sel_make_dir(root_inode, dentry);
++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ if (ret)
+ goto err;
+
+@@ -1407,6 +1644,18 @@
+ if (ret)
+ goto err;
+
++ dentry = d_alloc_name(sb->s_root, "class");
++ if (!dentry) {
++ ret = -ENOMEM;
++ goto err;
++ }
++
++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
++ if (ret)
++ goto err;
++
++ class_dir = dentry;
++
+ out:
+ return ret;
+ err:
+diff -Nurb linux-2.6.22-570/security/selinux/ss/policydb.c linux-2.6.22-591/security/selinux/ss/policydb.c
+--- linux-2.6.22-570/security/selinux/ss/policydb.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/ss/policydb.c 2007-12-21 15:36:13.000000000 -0500
+@@ -21,6 +21,7 @@
+ */
+
+ #include <linux/kernel.h>
++#include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/errno.h>
+@@ -598,6 +599,7 @@
+ struct range_trans *rt, *lrt = NULL;
+
+ for (i = 0; i < SYM_NUM; i++) {
++ cond_resched();
+ hashtab_map(p->symtab[i].table, destroy_f[i], NULL);
+ hashtab_destroy(p->symtab[i].table);
+ }
+@@ -612,6 +614,7 @@
+ avtab_destroy(&p->te_avtab);
+
+ for (i = 0; i < OCON_NUM; i++) {
++ cond_resched();
+ c = p->ocontexts[i];
+ while (c) {
+ ctmp = c;
+@@ -623,6 +626,7 @@
+
+ g = p->genfs;
+ while (g) {
++ cond_resched();
+ kfree(g->fstype);
+ c = g->head;
+ while (c) {
+@@ -639,18 +643,21 @@
+ cond_policydb_destroy(p);
+
+ for (tr = p->role_tr; tr; tr = tr->next) {
++ cond_resched();
+ kfree(ltr);
+ ltr = tr;
+ }
+ kfree(ltr);
+
+ for (ra = p->role_allow; ra; ra = ra -> next) {
++ cond_resched();
+ kfree(lra);
+ lra = ra;
+ }
+ kfree(lra);
+
+ for (rt = p->range_tr; rt; rt = rt -> next) {
++ cond_resched();
+ if (lrt) {
+ ebitmap_destroy(&lrt->target_range.level[0].cat);
+ ebitmap_destroy(&lrt->target_range.level[1].cat);
+diff -Nurb linux-2.6.22-570/security/selinux/ss/services.c linux-2.6.22-591/security/selinux/ss/services.c
+--- linux-2.6.22-570/security/selinux/ss/services.c 2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-591/security/selinux/ss/services.c 2007-12-21 15:36:13.000000000 -0500
+@@ -1587,19 +1587,18 @@
+ u32 *nel)
+ {
+ struct context *fromcon, usercon;
+- u32 *mysids, *mysids2, sid;
++ u32 *mysids = NULL, *mysids2, sid;
+ u32 mynel = 0, maxnel = SIDS_NEL;
+ struct user_datum *user;
+ struct role_datum *role;
+- struct av_decision avd;
+ struct ebitmap_node *rnode, *tnode;
+ int rc = 0, i, j;
+
+- if (!ss_initialized) {
+ *sids = NULL;
+ *nel = 0;
++
++ if (!ss_initialized)
+ goto out;
+- }
+
+ POLICY_RDLOCK;
+
+@@ -1635,17 +1634,9 @@
+ if (mls_setup_user_range(fromcon, user, &usercon))
+ continue;
+
+- rc = context_struct_compute_av(fromcon, &usercon,
+- SECCLASS_PROCESS,
+- PROCESS__TRANSITION,
+- &avd);
+- if (rc || !(avd.allowed & PROCESS__TRANSITION))
+- continue;
+ rc = sidtab_context_to_sid(&sidtab, &usercon, &sid);
+- if (rc) {
+- kfree(mysids);
++ if (rc)
+ goto out_unlock;
+- }
+ if (mynel < maxnel) {
+ mysids[mynel++] = sid;
+ } else {
+@@ -1653,7 +1644,6 @@
+ mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC);
+ if (!mysids2) {
+ rc = -ENOMEM;
+- kfree(mysids);
+ goto out_unlock;
+ }
+ memcpy(mysids2, mysids, mynel * sizeof(*mysids2));
+@@ -1664,11 +1654,32 @@
+ }
+ }
+
+- *sids = mysids;
+- *nel = mynel;
+-
+ out_unlock:
+ POLICY_RDUNLOCK;
++ if (rc || !mynel) {
++ kfree(mysids);
++ goto out;
++ }
++
++ mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL);
++ if (!mysids2) {
++ rc = -ENOMEM;
++ kfree(mysids);
++ goto out;
++ }
++ for (i = 0, j = 0; i < mynel; i++) {
++ rc = avc_has_perm_noaudit(fromsid, mysids[i],
++ SECCLASS_PROCESS,
++ PROCESS__TRANSITION, AVC_STRICT,
++ NULL);
++ if (!rc)
++ mysids2[j++] = mysids[i];
++ cond_resched();
++ }
++ rc = 0;
++ kfree(mysids);
++ *sids = mysids2;
++ *nel = j;
+ out:
+ return rc;
+ }
+@@ -1996,6 +2007,101 @@
+ return rc;
+ }
+
++static int get_classes_callback(void *k, void *d, void *args)
++{
++ struct class_datum *datum = d;
++ char *name = k, **classes = args;
++ int value = datum->value - 1;
++
++ classes[value] = kstrdup(name, GFP_ATOMIC);
++ if (!classes[value])
++ return -ENOMEM;
++
++ return 0;
++}
++
++int security_get_classes(char ***classes, int *nclasses)
++{
++ int rc = -ENOMEM;
++
++ POLICY_RDLOCK;
++
++ *nclasses = policydb.p_classes.nprim;
++ *classes = kcalloc(*nclasses, sizeof(*classes), GFP_ATOMIC);
++ if (!*classes)
++ goto out;
++
++ rc = hashtab_map(policydb.p_classes.table, get_classes_callback,
++ *classes);
++ if (rc < 0) {
++ int i;
++ for (i = 0; i < *nclasses; i++)
++ kfree((*classes)[i]);
++ kfree(*classes);
++ }
++
++out:
++ POLICY_RDUNLOCK;
++ return rc;
++}
++
++static int get_permissions_callback(void *k, void *d, void *args)
++{
++ struct perm_datum *datum = d;
++ char *name = k, **perms = args;
++ int value = datum->value - 1;
++
++ perms[value] = kstrdup(name, GFP_ATOMIC);
++ if (!perms[value])
++ return -ENOMEM;
++
++ return 0;
++}
++
++int security_get_permissions(char *class, char ***perms, int *nperms)
++{
++ int rc = -ENOMEM, i;
++ struct class_datum *match;
++
++ POLICY_RDLOCK;
++
++ match = hashtab_search(policydb.p_classes.table, class);
++ if (!match) {
++ printk(KERN_ERR "%s: unrecognized class %s\n",
++ __FUNCTION__, class);
++ rc = -EINVAL;
++ goto out;
++ }
++
++ *nperms = match->permissions.nprim;
++ *perms = kcalloc(*nperms, sizeof(*perms), GFP_ATOMIC);
++ if (!*perms)
++ goto out;
++
++ if (match->comdatum) {
++ rc = hashtab_map(match->comdatum->permissions.table,
++ get_permissions_callback, *perms);
++ if (rc < 0)
++ goto err;
++ }
++
++ rc = hashtab_map(match->permissions.table, get_permissions_callback,
++ *perms);
++ if (rc < 0)
++ goto err;
++
++out:
++ POLICY_RDUNLOCK;
++ return rc;
++
++err:
++ POLICY_RDUNLOCK;
++ for (i = 0; i < *nperms; i++)
++ kfree((*perms)[i]);
++ kfree(*perms);
++ return rc;
++}
++
+ struct selinux_audit_rule {
+ u32 au_seqno;
+ struct context au_ctxt;
+diff -Nurb linux-2.6.22-570/toapply linux-2.6.22-591/toapply
+--- linux-2.6.22-570/toapply 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/toapply 2007-12-21 15:36:15.000000000 -0500
+@@ -0,0 +1,51 @@
++cat ../broken-out/cpuidle-fix-the-uninitialized-variable-in-sysfs-routine.patch | patch -p1
++cat ../broken-out/cpuidle-make-cpuidle-sysfs-driver-governor-switch-off-by-default.patch | patch -p1
++cat ../broken-out/acpi-video-dont-export-sysfs-backlight-interface-if-query-_bcl-fail.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-rules.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-root-sysfs_dirent-root-dentry-association.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1
++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1
++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link-fix.patch | patch -p1
++cat ../broken-out/git-scsi-misc-vs-greg-sysfs-stuff.patch | patch -p1
++cat ../broken-out/gregkh-usb-usb-cxacru-cleanup-sysfs-attribute-code.patch | patch -p1
++cat ../broken-out/gregkh-usb-usb-add-iad-support-to-usbfs-and-sysfs.patch | patch -p1
++cat ../broken-out/x86_64-mm-xen-add-the-xenbus-sysfs-and-virtual-device-hotplug-driver.patch | patch -p1
++cat ../broken-out/drivers-edac-mc-sysfs-add-missing-mem-types.patch | patch -p1
++cat ../broken-out/drivers-edac-edac_device-sysfs-cleanup.patch | patch -p1
++cat ../broken-out/drivers-edac-add-device-sysfs-attributes.patch | patch -p1
+diff -Nurb linux-2.6.22-570/trellis-mm1-1.sh linux-2.6.22-591/trellis-mm1-1.sh
+--- linux-2.6.22-570/trellis-mm1-1.sh 1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-591/trellis-mm1-1.sh 2007-12-21 15:36:13.000000000 -0500
+@@ -0,0 +1,142 @@
++cat ../broken-out/origin.patch | patch -p1
++cat ../broken-out/ioatdma-fix-section-mismatches.patch | patch -p1
++cat ../broken-out/introduce-fixed-sys_sync_file_range2-syscall-implement-on.patch | patch -p1
++cat ../broken-out/git-acpi.patch | patch -p1
++cat ../broken-out/agk-dm-dm-netlink.patch | patch -p1
++cat ../broken-out/git-powerpc.patch | patch -p1
++cat ../broken-out/make-drivers-char-hvc_consoleckhvcd-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1
++cat ../broken-out/gregkh-driver-driver-core-make-devt_attr-and-uevent_attr-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1
++cat ../broken-out/gregkh-driver-block-device.patch | patch -p1
++cat ../broken-out/revert-gregkh-driver-block-device.patch | patch -p1
++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1
++cat ../broken-out/git-md-accel.patch | patch -p1
++cat ../broken-out/git-mmc.patch | patch -p1
++cat ../broken-out/git-net.patch | patch -p1
++cat ../broken-out/tun-tap-allow-group-ownership-of-tun-tap-devices.patch | patch -p1
++cat ../broken-out/git-nfs.patch | patch -p1
++cat ../broken-out/git-ocfs2.patch | patch -p1
++cat ../broken-out/git-selinux.patch | patch -p1
++cat ../broken-out/revert-acpi-change-for-scsi.patch | patch -p1
++cat ../broken-out/git-scsi-misc.patch | patch -p1
++cat ../broken-out/git-unionfs.patch | patch -p1
++cat ../broken-out/x86_64-mm-unwinder.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-add-kstrndup.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-add-argv_split.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-split-usermodehelper-setup-from-execution.patch | patch -p1
++cat ../broken-out/x86_64-mm-add-common-orderly_poweroff.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-tidy-up-usermode-helper-waiting-a-bit.patch | patch -p1
++cat ../broken-out/x86_64-mm-xen-add-the-xen-virtual-network-device-driver.patch | patch -p1
++cat ../broken-out/i386-show-unhandled-signals.patch | patch -p1
++cat ../broken-out/git-kgdb.patch | patch -p1
++cat ../broken-out/hugetlb-remove-unnecessary-nid-initialization.patch | patch -p1
++cat ../broken-out/mm-alloc_large_system_hash-can-free-some-memory-for.patch | patch -p1
++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings.patch | patch -p1
++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings-fix.patch | patch -p1
++cat ../broken-out/mm-merge-populate-and-nopage-into-fault-fixes-nonlinear.patch | patch -p1
++cat ../broken-out/add-a-bitmap-that-is-used-to-track-flags-affecting-a-block-of-pages.patch | patch -p1
++cat ../broken-out/add-__gfp_movable-for-callers-to-flag-allocations-from-high-memory-that-may-be-migrated.patch | patch -p1
++cat ../broken-out/split-the-free-lists-for-movable-and-unmovable-allocations.patch | patch -p1
++cat ../broken-out/choose-pages-from-the-per-cpu-list-based-on-migration-type.patch | patch -p1
++cat ../broken-out/add-a-configure-option-to-group-pages-by-mobility.patch | patch -p1
++cat ../broken-out/move-free-pages-between-lists-on-steal.patch | patch -p1
++cat ../broken-out/group-short-lived-and-reclaimable-kernel-allocations.patch | patch -p1
++cat ../broken-out/allow-huge-page-allocations-to-use-gfp_high_movable.patch | patch -p1
++cat ../broken-out/maps2-uninline-some-functions-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-eliminate-the-pmd_walker-struct-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-remove-vma-from-args-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-propagate-errors-from-callback-in-page-walker.patch | patch -p1
++cat ../broken-out/maps2-add-callbacks-for-each-level-to-page-walker.patch | patch -p1
++cat ../broken-out/maps2-move-the-page-walker-code-to-lib.patch | patch -p1
++cat ../broken-out/maps2-simplify-interdependence-of-proc-pid-maps-and-smaps.patch | patch -p1
++cat ../broken-out/maps2-move-clear_refs-code-to-task_mmuc.patch | patch -p1
++cat ../broken-out/maps2-regroup-task_mmu-by-interface.patch | patch -p1
++cat ../broken-out/maps2-make-proc-pid-smaps-optional-under-config_embedded.patch | patch -p1
++cat ../broken-out/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch | patch -p1
++cat ../broken-out/maps2-add-proc-pid-pagemap-interface.patch | patch -p1
++cat ../broken-out/have-kswapd-keep-a-minimum-order-free-other-than-order-0.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix-2.patch | patch -p1
++cat ../broken-out/uml-use-get_free_pages-to-allocate-kernel-stacks.patch | patch -p1
++cat ../broken-out/add-generic-exit-time-stack-depth-checking-to-config_debug_stack_usage.patch | patch -p1
++cat ../broken-out/cpuset-remove-sched-domain-hooks-from-cpusets.patch | patch -p1
++cat ../broken-out/clone-flag-clone_parent_tidptr-leaves-invalid-results-in-memory.patch | patch -p1
++cat ../broken-out/use-boot-based-time-for-process-start-time-and-boot-time.patch | patch -p1
++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock.patch | patch -p1
++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock-fix.patch | patch -p1
++cat ../broken-out/taskstats-add-context-switch-counters.patch | patch -p1
++cat ../broken-out/taskstats-add-context-switch-counters-fix.patch | patch -p1
++cat ../broken-out/remove-config_uts_ns-and-config_ipc_ns.patch | patch -p1
++cat ../broken-out/user-namespace-add-the-framework.patch | patch -p1
++cat ../broken-out/user-namespace-add-unshare.patch | patch -p1
++cat ../broken-out/mm-fix-create_new_namespaces-return-value.patch | patch -p1
++cat ../broken-out/add-a-kmem_cache-for-nsproxy-objects.patch | patch -p1
++cat ../broken-out/namespace-ensure-clone_flags-are-always-stored-in-an-unsigned-long.patch | patch -p1
++cat ../broken-out/sysctlc-add-text-telling-people-to-use-ctl_unnumbered.patch | patch -p1
++cat ../broken-out/proper-prototype-for-proc_nr_files.patch | patch -p1
++cat ../broken-out/move-seccomp-from-proc-to-a-prctl.patch | patch -p1
++cat ../broken-out/uninline-check_signature.patch | patch -p1
++cat ../broken-out/revoke-core-code.patch | patch -p1
++cat ../broken-out/revoke-wire-up-i386-system-calls.patch | patch -p1
++cat ../broken-out/fallocate-implementation-on-i86-x86_64-and-powerpc.patch | patch -p1
++cat ../broken-out/coredump-masking-reimplementation-of-dumpable-using-two-flags.patch | patch -p1
++cat ../broken-out/coredump-masking-add-an-interface-for-core-dump-filter.patch | patch -p1
++cat ../broken-out/cpuset-zero-malloc-revert-the-old-cpuset-fix.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework-fix.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework-fix-for-bad-lock-balance-in-containers.patch | patch -p1
++cat ../broken-out/containersv10-example-cpu-accounting-subsystem.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface-fix.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface-fix-2.patch | patch -p1
++cat ../broken-out/containersv10-add-fork-exit-hooks.patch | patch -p1
++cat ../broken-out/containersv10-add-fork-exit-hooks-fix.patch | patch -p1
++cat ../broken-out/containersv10-add-container_clone-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-procfs-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-procfs-interface-fix.patch | patch -p1
++cat ../broken-out/containersv10-make-cpusets-a-client-of-containers.patch | patch -p1
++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships.patch | patch -p1
++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships-cpuset-zero-malloc-fix-for-new-containers.patch | patch -p1
++cat ../broken-out/containersv10-simple-debug-info-subsystem.patch | patch -p1
++cat ../broken-out/containersv10-support-for-automatic-userspace-release-agents.patch | patch -p1
++cat ../broken-out/containers-implement-subsys-post_clone.patch | patch -p1
++cat ../broken-out/containers-implement-namespace-tracking-subsystem-v3.patch | patch -p1
++cat ../broken-out/keep-track-of-network-interface-renaming.patch | patch -p1
++cat ../broken-out/v2.6.22-rc6-mm1-netns23.patch | patch -p1